In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm
from scipy.sparse.linalg import eigsh
#from pretrainedModel import pretrainedModel
from tensorflow import keras
from PIL import Image
from sklearn.preprocessing import StandardScaler
import torch
from sklearn.decomposition import PCA
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import NearestNeighbors
import matplotlib.pyplot as plt
from scipy.sparse import csr_matrix
import time
import warnings
from sklearn.cluster import KMeans
import sys
import os

from copy import deepcopy

In [2]:
import random
import torch.nn as nn
import numpy as np
import torch
import math
import torch.optim as optim
from torch.utils.data import DataLoader

root = '../../'
bits = 16

In [3]:
torch.tensor( np.load( root + r"Features/HPO og Validering/CIFAR/y_hpo_CIfar.npy" ) )

tensor([6, 3, 2,  ..., 1, 5, 3])

In [4]:
def get_cifar():
    """    
    X_train, y_train, X_val, y_val
    """
    
    X_train = torch.tensor( np.load( root + r"Features/HPO og Validering/CIFAR/X_hpo_Cifar.npy" ) )
    y_train = torch.tensor( np.load( root + r"Features/HPO og Validering/CIFAR/y_hpo_CIfar.npy" ) )

    label_amnt = len(y_train.unique())
    y_train = torch.nn.functional.one_hot(y_train, label_amnt)

    X_val = torch.tensor( np.load( root + r"Features/HPO og Validering/CIFAR/X_val_Cifar.npy" ) )
    y_val = torch.tensor( np.load( root + r"Features/HPO og Validering/CIFAR/y_val_Cifar.npy" ) )

    label_amnt = len(y_val.unique())
    y_val = torch.nn.functional.one_hot(y_val, label_amnt)

    return X_train, y_train, X_val, y_val


In [5]:
def get_dataloader(file):
    """
    "cifar"

    """
    file = file.lower()
    legal_files = ["cifar"]
    if file not in legal_files:
        raise ValueError(f'The given file name was "{file}", expected from {legal_files}')


    # == LOAD IN THE DATA ==
    if file == "cifar":
        X_train, y_train, _, _ = get_cifar()


    # == MAKE DATA LOADER ==
    train_data = []
    for i in range(len(X_train)):
        train_data.append([X_train[i], y_train[i]])
    dataloader = DataLoader(train_data, batch_size=100, shuffle=True)


    return dataloader

for i,batch  in enumerate(get_dataloader("cifar")):
    print(i)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49


In [6]:
class DTSHLoss(torch.nn.Module):
    def __init__(self):
        super(DTSHLoss, self).__init__()

    def forward(self, u, y, LAMBDA=1, ALPHA=1):
        #LAMBDA = 1
        #ALPHA  = 1

        inner_product = u @ u.t()   # Similarity Matrix
        s = y @ y.t() > 0           # A matrix that show if the two idexes are the same or not
        count = 0

        loss1 = 0
        for row in range(s.shape[0]):
            # if has positive pairs and negative pairs
            if s[row].sum() != 0 and (~s[row]).sum() != 0:
                count += 1
                theta_positive = inner_product[row][s[row] == 1]                
                theta_negative = inner_product[row][s[row] == 0]

                triple = (theta_positive.unsqueeze(1) - theta_negative.unsqueeze(0) - ALPHA ).clamp(min=-100,max=50)
                loss1 += -(triple - torch.log(1 + torch.exp(triple))).mean()

        if count != 0:
            loss1 = loss1 / count
        else:
            loss1 = 0

        loss2 = LAMBDA * (u - u.sign()).pow(2).mean()

        return loss1 + loss2

In [7]:
from tqdm import tqdm

def one_hot_encode(a):
    b = np.zeros((a.size, a.max() + 1))
    b[np.arange(a.size), a] = 1
    return b

def mean_average_precision(test_hashes, training_hashes, test_labels, training_labels):
    aps = []
    if len(training_labels.shape) == 1:
        training_labels = one_hot_encode(training_labels)
        test_labels = one_hot_encode(test_labels)
    for i, test_hash in enumerate(tqdm(test_hashes)):
        label = test_labels[i]
        distances = np.abs(training_hashes - test_hashes[i]).sum(axis=1)
        tp = np.where((training_labels*label).sum(axis=1)>0, 1, 0)
        hash_df = pd.DataFrame({"distances":distances, "tp":tp}).reset_index()
        hash_df = hash_df.sort_values(["distances", "index"]).reset_index(drop=True)
        hash_df = hash_df.drop(["index", "distances"], axis=1).reset_index()
        hash_df = hash_df[hash_df["tp"]==1]
        hash_df["tp"] = hash_df["tp"].cumsum()
        hash_df["index"] = hash_df["index"] +1 
        precision = np.array(hash_df["tp"]) / np.array(hash_df["index"])
        ap = precision.mean()
        aps.append(ap)
    
    return np.array(aps).mean()

In [8]:
def earlyStop(LossList, n = 10):
    bestVal = min(LossList)

    bestVal_i = LossList.index(bestVal)

    if bestVal_i < len(LossList) - n: return True

    

In [9]:
def res(X, model):
    results =  model(X)
    results = results.detach().numpy()

    results = (results > 0).astype(int) 

    return results

In [None]:
def HPO(HP):
    # LAMBDA=1, 
    # ALPHA=1
    # lr=1e-5
    # weight_decay=1e-5
    # bits = 16

    LAMBDA= HP["lambda"] 
    ALPHA=  HP["alpha"]
    lr=     HP["lr"]
    weight_decay= HP["wd"]
    bits = HP["bits"]


    model = nn.Sequential(  nn.Linear(4096,256),
                            nn.ReLU(),
                            nn.Linear(256, bits),
                            )

    criterion = DTSHLoss()
    optimizer = optim.RMSprop(model.parameters(), lr=lr , weight_decay=weight_decay)

    dataloader = get_dataloader("cifar")
    historical_lostList = []
    for i in range(1500):
        loss_list = []
        for j,batch  in enumerate(dataloader):
            X_batch = batch[0]
            y_batch = batch[1]

            optimizer.zero_grad()

            u = model(X_batch)
            loss = criterion(u, y_batch.float(), LAMBDA=LAMBDA, ALPHA=ALPHA)
            loss.backward()
            optimizer.step()

            loss_list.append( float(loss) )
        
        
        mean_loss = sum(loss_list) / len(loss_list)
        if i % 10 == 1:
            print(i, mean_loss)
        historical_lostList.append(mean_loss)

        if earlyStop(historical_lostList, n = 20): 
            print(i, mean_loss)
            print("Early Stop!!!")
            break

    
    # === EVALUATE ===
    X_train, y_train, X_val, y_val = get_cifar()

    data = {}
    data["hp"] = HP
    data["loss"] = historical_lostList
    data["map"] = mean_average_precision(test_hashes=res(X_val, model), training_hashes=res(X_train, model), test_labels=y_val, training_labels=y_train)
    return data

In [11]:
def get_combos(D):
    l = list(D)
    curr_i = dict(zip(l,[0]*len(l)))
    combos = []

    while True:
        for i in range(len(l)):
            val = l[i]

            if curr_i[val] > (len(D[val])-1):
                curr_i[val] = 0
                
                if i+1 != len(l):
                    curr_i[ l[i+1] ] += 1                    
                else:
                    return combos                    

        combo = deepcopy(D)
        for key in curr_i:
            list_ = combo[key]
            index = curr_i[key] 
             
            combo[key] = list_[index]

        combos.append( combo )
        curr_i[l[0]] += 1

In [12]:
import json
import datetime

def save_dict(d, path):
    # === CLEAN PATH ===
    path = path.replace("\\", "/") # Ensures that it's always "/" and not "\"

    if path[-1] != "/": # Ensures that path ends with "/"
        path += "/"

    # === DEFINE FOLDER ===
    folder_path = f"{path}{os.environ['COMPUTERNAME']}"

    if not os.path.exists(folder_path): # Makes the Path if it doesn't exist
        os.makedirs(folder_path)

    # === DEFINE FILE NAME/PATH ===
    now = str(datetime.datetime.now())
    now = now[:now.index(".")].replace(" ", "_").replace(":", ";")

    filePath = f"{folder_path}/{now}.json"

    # === SAVE FILE ===
    with open(filePath, "w") as fp:
        json.dump(d , fp)

In [13]:
HP = {
    "lambda" : [0.5 , 1, 2, 4],
    "alpha"  : [0.5 , 1, 2, 4],
    "lr"     : [1e-4 ,1e-5, 1e-6],
    "wd"     : [1e-4 ,1e-5, 1e-6],
    "bits"   : [12, 24, 32, 48]
}


for hp in get_combos(HP):
    print("\n\n\n")
    print(hp)
    result_dict =  HPO(hp)
    print(result_dict)

    save_dict(result_dict, root+r"Results\HPO\DTSH")





{'lambda': 0.5, 'alpha': 0.5, 'lr': 0.0001, 'wd': 0.0001, 'bits': 12}
1 0.45304609298706056
11 0.27259749233722685
21 0.20287711828947066
31 0.16110509708523751
41 0.12687018796801566
51 0.1100686851143837
61 0.08842064931988716
71 0.08461065165698528
81 0.06515292994678021
91 0.06334872767329217
101 0.05832636781036854
111 0.05740471601486206
121 0.046654238626360894
131 0.051035604998469354
141 0.038449977338314054
151 0.04534109212458134
161 0.041299344524741176
171 0.0390989600494504
181 0.03217027191072702
191 0.03219738736748695


100%|██████████| 500/500 [00:00<00:00, 760.76it/s]


{'hp': {'lambda': 0.5, 'alpha': 0.5, 'lr': 0.0001, 'wd': 0.0001, 'bits': 12}, 'loss': [1.1490629774332046, 0.45304609298706056, 0.40761515974998475, 0.38615551829338074, 0.3645141625404358, 0.34951023042201995, 0.33676176190376284, 0.3246812230348587, 0.3109441050887108, 0.2945237347483635, 0.2846969649195671, 0.27259749233722685, 0.2687932774424553, 0.2561223044991493, 0.24275815099477768, 0.24823377937078475, 0.23421849578619003, 0.22583379328250885, 0.22064979910850524, 0.21170886874198913, 0.20482995897531509, 0.20287711828947066, 0.20210463106632232, 0.19022259175777434, 0.18752935588359831, 0.19024583399295808, 0.1812731795012951, 0.17033196598291397, 0.1704340758919716, 0.16369929611682893, 0.16525931149721146, 0.16110509708523751, 0.1543874190747738, 0.1514212094247341, 0.14926185443997383, 0.1482391183078289, 0.14124933242797852, 0.13699621111154556, 0.13631986305117608, 0.13470687717199326, 0.13430650115013124, 0.12687018796801566, 0.12234171003103256, 0.12551231041550637, 0.

KeyboardInterrupt: 