In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm
from scipy.sparse.linalg import eigsh
#from pretrainedModel import pretrainedModel
from tensorflow import keras
from PIL import Image
from sklearn.preprocessing import StandardScaler
import torch
from sklearn.decomposition import PCA
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import NearestNeighbors
import matplotlib.pyplot as plt
from scipy.sparse import csr_matrix
import time
import warnings
from sklearn.cluster import KMeans
import sys
import os

from copy import deepcopy

In [2]:
import random
import torch.nn as nn
import numpy as np
import torch
import math
import torch.optim as optim
from torch.utils.data import DataLoader

root = '../../'
bits = 16

In [3]:
torch.tensor( np.load( root + r"Features/HPO og Validering/CIFAR/y_hpo_CIfar.npy" ) )

tensor([6, 3, 2,  ..., 1, 5, 3])

In [4]:
def get_cifar():
    """    
    X_train, y_train, X_val, y_val
    """
    
    X_train = torch.tensor( np.load( root + r"Features/HPO og Validering/CIFAR/X_hpo_Cifar.npy" ) )
    y_train = torch.tensor( np.load( root + r"Features/HPO og Validering/CIFAR/y_hpo_CIfar.npy" ) )

    label_amnt = len(y_train.unique())
    y_train = torch.nn.functional.one_hot(y_train, label_amnt)

    X_val = torch.tensor( np.load( root + r"Features/HPO og Validering/CIFAR/X_val_Cifar.npy" ) )
    y_val = torch.tensor( np.load( root + r"Features/HPO og Validering/CIFAR/y_val_Cifar.npy" ) )

    label_amnt = len(y_val.unique())
    y_val = torch.nn.functional.one_hot(y_val, label_amnt)

    return X_train, y_train, X_val, y_val


In [None]:
def get_imgNet():
    """    
    X_train, y_train, X_val, y_val
    """
    
    X_train = torch.tensor( np.load( root + r"Features\HPO og Validering\Imagenet\X_hpo_Img.npy" ) )
    y_train = torch.tensor( np.load( root + r"Features/HPO og Validering/Imagenet/y_hpo_Img.npy" ) )

    y_train = torch.tensor(y_train, dtype=torch.long)

    y_train = torch.nn.functional.one_hot(y_train)

    X_val = torch.tensor( np.load( root + r"Features/HPO og Validering/Imagenet/X_val_Img.npy" ) )
    y_val = torch.tensor( np.load( root + r"Features/HPO og Validering/Imagenet/y_val_Img.npy" ) )


    y_val = torch.tensor(y_val, dtype=torch.long)

    y_val = torch.nn.functional.one_hot(y_val)

    return X_train, y_train, X_val, y_val

In [None]:
def get_nus():
    """    
    X_train, y_train, X_val, y_val
    """
    
    X_train = torch.tensor( np.load( root + r"Features\HPO og Validering\Nus_Wide\X_hpo_Nus.npy" ) )
    y_train = torch.tensor( np.load( root + r"Features/HPO og Validering/Nus_Wide/y_hpo_Nus.npy" ) )

    y_train = torch.tensor(y_train, dtype=torch.long)

    X_val = torch.tensor( np.load( root + r"Features/HPO og Validering/Nus_Wide/X_val_Nus.npy" ) )
    y_val = torch.tensor( np.load( root + r"Features/HPO og Validering/Nus_Wide/y_val_Nus.npy" ) )


    y_val = torch.tensor(y_val, dtype=torch.long)

    return X_train, y_train, X_val, y_val

In [None]:
def get_dataloader(file, batchSize = 100):
    """
    "cifar"
    "imgnet"
    "nus"
    """
    file = file.lower()
    legal_files = ["cifar", "imgnet", "nus"]
    if file not in legal_files:
        raise ValueError(f'The given file name was "{file}", expected from {legal_files}')


    # == LOAD IN THE DATA ==
    if file == "cifar":
        X_train, y_train, _, _ = get_cifar()
    elif file == "imgnet":
        X_train, y_train, _, _ = get_imgNet()
    elif file == "nus":
        X_train, y_train, _, _ = get_nus()

    # == MAKE DATA LOADER ==
    train_data = []
    for i in range(len(X_train)):
        train_data.append([X_train[i], y_train[i]])
    dataloader = DataLoader(train_data, batch_size=batchSize, shuffle=True)


    return dataloader

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49


In [6]:
class DTSHLoss(torch.nn.Module):
    def __init__(self):
        super(DTSHLoss, self).__init__()

    def forward(self, u, y, LAMBDA=1, ALPHA=1):
        #LAMBDA = 1
        #ALPHA  = 1

        inner_product = u @ u.t()   # Similarity Matrix
        s = y @ y.t() > 0           # A matrix that show if the two idexes are the same or not
        count = 0

        loss1 = 0
        for row in range(s.shape[0]):
            # if has positive pairs and negative pairs
            if s[row].sum() != 0 and (~s[row]).sum() != 0:
                count += 1
                theta_positive = inner_product[row][s[row] == 1]                
                theta_negative = inner_product[row][s[row] == 0]

                triple = (theta_positive.unsqueeze(1) - theta_negative.unsqueeze(0) - ALPHA ).clamp(min=-100,max=50)
                loss1 += -(triple - torch.log(1 + torch.exp(triple))).mean()

        if count != 0:
            loss1 = loss1 / count
        else:
            loss1 = 0

        loss2 = LAMBDA * (u - u.sign()).pow(2).mean()

        return loss1 + loss2

In [7]:
from tqdm import tqdm

def one_hot_encode(a):
    b = np.zeros((a.size, a.max() + 1))
    b[np.arange(a.size), a] = 1
    return b

def mean_average_precision(test_hashes, training_hashes, test_labels, training_labels):
    aps = []
    if len(training_labels.shape) == 1:
        training_labels = one_hot_encode(training_labels)
        test_labels = one_hot_encode(test_labels)
    for i, test_hash in enumerate(tqdm(test_hashes)):
        label = test_labels[i]
        distances = np.abs(training_hashes - test_hashes[i]).sum(axis=1)
        tp = np.where((training_labels*label).sum(axis=1)>0, 1, 0)
        hash_df = pd.DataFrame({"distances":distances, "tp":tp}).reset_index()
        hash_df = hash_df.sort_values(["distances", "index"]).reset_index(drop=True)
        hash_df = hash_df.drop(["index", "distances"], axis=1).reset_index()
        hash_df = hash_df[hash_df["tp"]==1]
        hash_df["tp"] = hash_df["tp"].cumsum()
        hash_df["index"] = hash_df["index"] +1 
        precision = np.array(hash_df["tp"]) / np.array(hash_df["index"])
        ap = precision.mean()
        aps.append(ap)
    
    return np.array(aps).mean()

In [8]:
def earlyStop(LossList, n = 10):
    bestVal = min(LossList)

    bestVal_i = LossList.index(bestVal)

    if bestVal_i < len(LossList) - n: return True

    

In [9]:
def res(X, model):
    results =  model(X)
    results = results.detach().numpy()

    results = (results > 0).astype(int) 

    return results

In [None]:
def HPO(HP):
    time_start = time.time()

    data = {}
    
    # LAMBDA=1, 
    # ALPHA=1
    # lr=1e-5
    # weight_decay=1e-5
    # bits = 16

    LAMBDA= HP["lambda"] 
    ALPHA=  HP["alpha"]
    lr=     HP["lr"]
    weight_decay= HP["wd"]
    bits = HP["bits"]


    model = nn.Sequential(  nn.Linear(4096,256),
                            nn.ReLU(),
                            nn.Linear(256, bits),
                            )

    criterion = DTSHLoss()
    optimizer = optim.RMSprop(model.parameters(), lr=lr , weight_decay=weight_decay)

    dataloader = get_dataloader("nus")
    historical_lostList = []
    for i in range(1500):
        loss_list = []
        for j,batch  in enumerate(dataloader):
            X_batch = batch[0]
            y_batch = batch[1]

            optimizer.zero_grad()

            u = model(X_batch)
            loss = criterion(u, y_batch.float(), LAMBDA=LAMBDA, ALPHA=ALPHA)
            loss.backward()
            optimizer.step()

            loss_list.append( float(loss) )
        
        
        mean_loss = sum(loss_list) / len(loss_list)
        if i % 10 == 1:
            print(i, mean_loss)
        historical_lostList.append(mean_loss)

        if earlyStop(historical_lostList, n = 10): 
            print(i, mean_loss)
            print("Early Stop!!!")
            data["earlyStop"] = True
            break
        
        if time.time() - time_start > 60*30: 
            data["time stopage"] = True
            break

    
    # === EVALUATE ===
    X_train, y_train, X_val, y_val = get_nus()


    data["loss"] = historical_lostList
    data["map"] = mean_average_precision(test_hashes=res(X_val, model), training_hashes=res(X_train, model), test_labels=y_val, training_labels=y_train)
    return data

In [11]:
def get_combos(D):
    l = list(D)
    curr_i = dict(zip(l,[0]*len(l)))
    combos = []

    while True:
        for i in range(len(l)):
            val = l[i]

            if curr_i[val] > (len(D[val])-1):
                curr_i[val] = 0
                
                if i+1 != len(l):
                    curr_i[ l[i+1] ] += 1                    
                else:
                    return combos                    

        combo = deepcopy(D)
        for key in curr_i:
            list_ = combo[key]
            index = curr_i[key] 
             
            combo[key] = list_[index]

        combos.append( combo )
        curr_i[l[0]] += 1

In [12]:
import json
import datetime

def save_dict(d, path):
    # === CLEAN PATH ===
    path = path.replace("\\", "/") # Ensures that it's always "/" and not "\"

    if path[-1] != "/": # Ensures that path ends with "/"
        path += "/"

    # === DEFINE FOLDER ===
    folder_path = f"{path}{os.environ['COMPUTERNAME']}"

    if not os.path.exists(folder_path): # Makes the Path if it doesn't exist
        os.makedirs(folder_path)

    # === DEFINE FILE NAME/PATH ===
    now = str(datetime.datetime.now())
    now = now[:now.index(".")].replace(" ", "_").replace(":", ";")

    filePath = f"{folder_path}/{now}.json"

    # === SAVE FILE ===
    with open(filePath, "w") as fp:
        json.dump(d , fp)

In [13]:
from copy import deepcopy
import os
import json
import datetime
import time
from joblib import Parallel, delayed
import random

def _get_combos(D):
    l = list(D)
    curr_i = dict(zip(l,[0]*len(l)))
    combos = []

    while True:
        for i in range(len(l)):
            val = l[i]

            if curr_i[val] > (len(D[val])-1):
                curr_i[val] = 0
                
                if i+1 != len(l):
                    curr_i[ l[i+1] ] += 1                    
                else:
                    return combos                    

        combo = deepcopy(D)
        for key in curr_i:
            list_ = combo[key]
            index = curr_i[key] 
             
            combo[key] = list_[index]

        combos.append( combo )
        curr_i[l[0]] += 1

def save_dict(d, path, indentifier=""):
    # === CLEAN PATH ===
    path = path.replace("\\", "/") # Ensures that it's always "/" and not "\"

    if path[-1] != "/": # Ensures that path ends with "/"
        path += "/"

    # === DEFINE FOLDER ===
    folder_path = f"{path}{os.environ['COMPUTERNAME']}"

    if not os.path.exists(folder_path): # Makes the Path if it doesn't exist
        os.makedirs(folder_path)

    # === DEFINE FILE NAME/PATH ===
    now = str(datetime.datetime.now())
    now = now[:now.index(".")].replace(" ", "_").replace(":", ";")

    if indentifier == "":
        filePath = f"{folder_path}/{now}.json"
    else:
        filePath = f"{folder_path}/{now}_{indentifier}.json"

    # === SAVE FILE ===
    with open(filePath, "w") as fp:
        json.dump(d , fp)

def read_folder(folder_path):
    """
    Opens all folder inside "folder_path" and reads the contents of thoose folders.
    """

    data_list = []

    for folder in os.listdir(folder_path):
        folderPath = folder_path+"/"+folder
        
        for file in os.listdir(folderPath):
            filePath = f"{folderPath}/{file}"
            with open( filePath ) as json_file:
                data = json.load(json_file)
                data["filePath"] = filePath

                data_list.append( data )
    
    return data_list

def work(task, path, func, n_jobs=10, taskName="task", shuffle_tasks=True, verbose=1): 
    # === CLEAN PATH + MAKE FOLDeR ===
    path = path.replace("\\", "/") # Ensures that it's always "/" and not "\"
    if path[-1] != "/": # Ensures that path ends with "/"
        path += "/"
    folder_path = f"{path}{os.environ['COMPUTERNAME']}"
    if not os.path.exists(folder_path): # Makes the Path if it doesn't exist
        os.makedirs(folder_path)
    
    # === DEFINE COMBOS / TASKS ===
    tasks = _get_combos( task )
    
    done_tasks =[ d[taskName] for d in read_folder( path ) ]

    remaining_tasks = [t for t in tasks if t not in done_tasks]

    if shuffle_tasks: random.shuffle(remaining_tasks)

    # === WRAPPER FOR "FUNC" ===
    def moddified_func(t, i):               
        start_time = time.time()

        result_dict = {}
        
        result_dict[taskName] = t # the task

        result_dict["result"] = func(t) # the result of doing the task
        
        end_time = time.time()
        time_spend = end_time - start_time
        result_dict["time"] = time_spend

        save_dict(result_dict, path, indentifier=i)

        return result_dict

    # === PARALLEL PROCCESING ===
    func_results =  Parallel(n_jobs=n_jobs, verbose=verbose)( [ delayed(moddified_func)(t, i) for i, t in enumerate(remaining_tasks) ] )

    return func_results

In [None]:
HP = {
    "lambda" : [0.5 , 1, 2],
    "alpha"  : [1, 3, 5],
    "lr"     : [1e-4 ,1e-5, 1e-6],
    "wd"     : [1e-4 ,1e-5, 1e-6],
    "bits"   : [12, 24, 32, 48]
}


work(HP, root+r"Results\HPO\DTSH2\Nus", HPO)

[Parallel(n_jobs=10)]: Using backend LokyBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed: 68.1min
[Parallel(n_jobs=10)]: Done 180 tasks      | elapsed: 344.7min
[Parallel(n_jobs=10)]: Done 324 out of 324 | elapsed: 577.3min finished


[{'task': {'lambda': 0.5, 'alpha': 3, 'lr': 1e-06, 'wd': 1e-05, 'bits': 32},
  'result': {'earlyStop': True,
   'loss': [2.6661347198486327,
    2.1348473715782164,
    1.8568380784988403,
    1.701100823879242,
    1.6041195583343506,
    1.5324396729469298,
    1.4788545918464662,
    1.4304435849189758,
    1.372380039691925,
    1.3413821411132814,
    1.305715970993042,
    1.2820958852767945,
    1.2583160018920898,
    1.2444797348976135,
    1.2191516065597534,
    1.2059733772277832,
    1.1797338342666626,
    1.1686817526817321,
    1.157115308046341,
    1.1446496999263764,
    1.1290300285816193,
    1.1188615047931671,
    1.109255883693695,
    1.0950919902324676,
    1.0808906090259551,
    1.076414645910263,
    1.0625843107700348,
    1.0570900332927704,
    1.0497659480571746,
    1.0430785083770753,
    1.0311715376377106,
    1.0267965948581697,
    1.0249136734008788,
    1.0139085698127746,
    1.0055706548690795,
    0.9934258389472962,
    0.9955564069747925,
 