In [1]:
import os

import numpy as np

from tensorflow.keras.models import load_model

from autoencoder import Autoencoder
from helper_funcs import *

import pandas

import optuna
from optuna.visualization import plot_pareto_front, plot_optimization_history, plot_slice

from params import lsh_test, hypercube_test, kmeans_test, gnn_test, mrng_test, nsg_test, get_aaf

2023-12-31 02:00:30.476603: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-12-31 02:00:30.909116: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-12-31 02:00:30.909952: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  from .autonotebook import tqdm as notebook_tqdm


In [2]:
models = os.listdir('./models/')

dataset = b'MNIST/input.dat'
query   = b'MNIST/query.dat'

model_to_files = {}
for i, model in enumerate(models):
    normalized_dataset = b'MNIST/' + models[i].removesuffix('.keras').encode() + b'_normalized_dataset.dat'
    normalized_query   = b'MNIST/' + models[i].removesuffix('.keras').encode() + b'_normalized_query.dat'
    encoded_dataset    = b'MNIST/' + models[i].removesuffix('.keras').encode() + b'_encoded_dataset.dat'
    encoded_query      = b'MNIST/' + models[i].removesuffix('.keras').encode() + b'_encoded_query.dat'
    decoded_dataset    = b'MNIST/' + models[i].removesuffix('.keras').encode() + b'_decoded_dataset.dat'
    decoded_query      = b'MNIST/' + models[i].removesuffix('.keras').encode() + b'_decoded_query.dat'

    model_to_files.update({models[i] : [normalized_dataset, normalized_query,
                                        encoded_dataset, encoded_query,
                                        decoded_dataset, decoded_query]})

n = 60000

In [3]:
for model in model_to_files:
    normalized_dataset, normalized_query, encoded_dataset, encoded_query, decoded_dataset, decoded_query = model_to_files[model]

    model = b'models/' + model.encode()

    # load model
    autoencoder = load_model(model.decode())
    shape = autoencoder.layers[-2].output_shape[1:] # get shape of encoded layer

    # load dataset
    x_train = load_dataset(dataset)
    x_train = x_train.astype('float32') / 255.
    x_test = load_dataset(query)
    x_test = x_test.astype('float32') / 255.
    if len(shape) == 3: # if model type is convolutional
        x_train = np.reshape(x_train, (len(x_train), 28, 28, 1))
        x_test = np.reshape(x_test, (len(x_test), 28, 28, 1))
    else:
        x_train = np.reshape(x_train, (len(x_train), 784))
        x_test = np.reshape(x_test, (len(x_test), 784))

    encoded_train = autoencoder.encode(x_train)
    encoded_test = autoencoder.encode(x_test)

    # deflatten encoded datasets
    encoded_train = deflatten_encoded(encoded_train, shape)
    encoded_test = deflatten_encoded(encoded_test, shape)

    # decode encoded datasets
    decoded_train = autoencoder.decode(encoded_train)
    decoded_test = autoencoder.decode(encoded_test)

    # save original datasets normalized
    save_decoded_binary(x_train, normalized_dataset)
    save_decoded_binary(x_test, normalized_query)

    # normalize encoded datasets
    encoded_train = normalize(encoded_train)
    encoded_test = normalize(encoded_test)

    # save encoded datasets
    save_encoded_binary(encoded_train, encoded_dataset)
    save_encoded_binary(encoded_test, encoded_query)

    # normalize decoded datasets
    decoded_train = normalize(decoded_train)
    decoded_test = normalize(decoded_test)

    # save decoded datasets
    save_decoded_binary(decoded_train, decoded_dataset)
    save_decoded_binary(decoded_test, decoded_query)



In [6]:
def objective_lsh(trial):
    model = trial.suggest_categorical('model', model_to_files.keys())
    param_dict = {'k': trial.suggest_int('k', 1, 10),
                  'L': trial.suggest_int('L', 1, 10),
                  'table_size':  trial.suggest_categorical('table_size', [int(n/32), int(n/16), int(n/8)]),
                  'window_size': trial.suggest_float('window_size', 0.01, 1),
                  'query_trick': trial.suggest_categorical('query_trick', [True, False])
                 }
    
    print("Trial parameters:", param_dict)

    encoded_dataset, encoded_query = model_to_files[model][2:4]

    average_time, aaf_latent, min_neighbors = lsh_test(encoded_dataset, encoded_query, queries_num=100, **param_dict, N=60)

    # trial should return at least 60 neighbors to be used in GNNS
    # penalize model if slower than brute force
    c0 = - min_neighbors.value + 60
    c1 = average_time.value - 1e-4
    trial.set_user_attr('constraint', (c0, c1))

    return aaf_latent.value, average_time.value

def constraints(trial):
    return trial.user_attrs['constraint']

In [7]:
%%time
sampler = optuna.samplers.NSGAIISampler(constraints_func=constraints)
lsh_study = optuna.create_study(study_name='lsh', directions=['minimize', 'minimize'], sampler=sampler)
lsh_study.optimize(objective_lsh, n_trials=100, n_jobs=-1)
print("-------------------- Best trials --------------------")
trials = sorted(lsh_study.best_trials, key=lambda x: x.values)
# print feasible trials only
for trial in trials:
    print("Trial no. {}".format(trial.number))
    print(" Values = {}, Constraints = {}".format(trial.values, trial.user_attrs["constraint"]))
    print(" Params = {}".format(trial.params))

[I 2023-12-31 02:02:48,229] A new study created in memory with name: lsh


Trial parameters: {'k': 6, 'L': 5, 'table_size': 1875, 'window_size': 1, 'query_trick': True}
Trial parameters: {'k': 4, 'L': 6, 'table_size': 3750, 'window_size': 0, 'query_trick': False}
Trial parameters: {'k': 1, 'L': 8, 'table_size': 1875, 'window_size': 1, 'query_trick': False}
Trial parameters: {'k': 2, 'L': 3, 'table_size': 3750, 'window_size': 0, 'query_trick': True}
Trial parameters: {'k': 8, 'L': 10, 'table_size': 1875, 'window_size': 1, 'query_trick': True}
Trial parameters: {'k': 8, 'L': 4, 'table_size': 7500, 'window_size': 0, 'query_trick': False}
Trial parameters: {'k': 3, 'L': 2, 'table_size': 7500, 'window_size': 0, 'query_trick': False}
Trial parameters: {'k': 8, 'L': 5, 'table_size': 7500, 'window_size': 0, 'query_trick': False}
Trial parameters: {'k': 5, 'L': 7, 'table_size': 7500, 'window_size': 1, 'query_trick': True}
Trial parameters: {'k': 6, 'L': 2, 'table_size': 1875, 'window_size': 0, 'query_trick': False}
Trial parameters: {'k': 4, 'L': 7, 'table_size': 1875

[I 2023-12-31 02:02:50,577] Trial 2 finished with values: [1.5032889047955507, 0.009155769999999999] and parameters: {'model': 'model_conv_46.keras', 'k': 1, 'L': 8, 'table_size': 1875, 'window_size': 1, 'query_trick': False}. 


Trial parameters: {'k': 10, 'L': 10, 'table_size': 3750, 'window_size': 0, 'query_trick': True}


[W 2023-12-31 02:02:52,447] Trial 0 failed with parameters: {'model': 'model_conv_46.keras', 'k': 6, 'L': 5, 'table_size': 1875, 'window_size': 1, 'query_trick': True} because of the following error: The value nan is not acceptable.
[W 2023-12-31 02:02:52,450] Trial 0 failed with value (nan, 0.0016034000000000005).


Trial parameters: {'k': 10, 'L': 9, 'table_size': 1875, 'window_size': 1, 'query_trick': False}


[W 2023-12-31 02:02:53,246] Trial 11 failed with parameters: {'model': 'model_dense_43.keras', 'k': 5, 'L': 7, 'table_size': 7500, 'window_size': 1, 'query_trick': True} because of the following error: The value nan is not acceptable.
[W 2023-12-31 02:02:53,247] Trial 11 failed with value (nan, 0.00114518).


Trial parameters: {'k': 1, 'L': 10, 'table_size': 1875, 'window_size': 0, 'query_trick': True}


[I 2023-12-31 02:02:55,093] Trial 8 finished with values: [0.999908201704201, 0.46837748999999973] and parameters: {'model': 'model_conv_46.keras', 'k': 6, 'L': 2, 'table_size': 1875, 'window_size': 0, 'query_trick': False}. 
[W 2023-12-31 02:02:55,113] Trial 9 failed with parameters: {'model': 'model_dense_1.keras', 'k': 4, 'L': 7, 'table_size': 1875, 'window_size': 1, 'query_trick': True} because of the following error: The value nan is not acceptable.
[W 2023-12-31 02:02:55,114] Trial 9 failed with value (nan, 0.0034774800000000024).


Trial parameters: {'k': 8, 'L': 2, 'table_size': 7500, 'window_size': 0, 'query_trick': True}
Trial parameters: {'k': 6, 'L': 10, 'table_size': 7500, 'window_size': 1, 'query_trick': False}


[W 2023-12-31 02:02:55,945] Trial 4 failed with parameters: {'model': 'model_conv_12.keras', 'k': 8, 'L': 10, 'table_size': 1875, 'window_size': 1, 'query_trick': True} because of the following error: The value nan is not acceptable.
[W 2023-12-31 02:02:55,946] Trial 4 failed with value (nan, 0.005300389999999999).


Trial parameters: {'k': 2, 'L': 5, 'table_size': 7500, 'window_size': 1, 'query_trick': True}


[I 2023-12-31 02:02:56,172] Trial 6 finished with values: [0.999951183835149, 0.5095927000000001] and parameters: {'model': 'model_dense_43.keras', 'k': 3, 'L': 2, 'table_size': 7500, 'window_size': 0, 'query_trick': False}. 


Trial parameters: {'k': 3, 'L': 3, 'table_size': 1875, 'window_size': 0, 'query_trick': False}


[W 2023-12-31 02:02:57,307] Trial 10 failed with parameters: {'model': 'model_dense_1.keras', 'k': 9, 'L': 10, 'table_size': 3750, 'window_size': 1, 'query_trick': True} because of the following error: The value nan is not acceptable.
[W 2023-12-31 02:02:57,309] Trial 10 failed with value (nan, 0.0032336699999999997).


Trial parameters: {'k': 1, 'L': 10, 'table_size': 3750, 'window_size': 0, 'query_trick': True}


[I 2023-12-31 02:02:58,884] Trial 13 finished with values: [1.5375706518476722, 0.0045239600000000005] and parameters: {'model': 'model_conv_12.keras', 'k': 10, 'L': 9, 'table_size': 1875, 'window_size': 1, 'query_trick': False}. 


Trial parameters: {'k': 3, 'L': 5, 'table_size': 7500, 'window_size': 0, 'query_trick': False}


[W 2023-12-31 02:02:59,591] Trial 17 failed with parameters: {'model': 'model_dense_26.keras', 'k': 2, 'L': 5, 'table_size': 7500, 'window_size': 1, 'query_trick': True} because of the following error: The value nan is not acceptable.
[W 2023-12-31 02:02:59,594] Trial 17 failed with value (nan, 0.00110546).


Trial parameters: {'k': 4, 'L': 4, 'table_size': 3750, 'window_size': 0, 'query_trick': False}


[I 2023-12-31 02:03:00,351] Trial 3 finished with values: [0.9999845110546619, 0.85213341] and parameters: {'model': 'model_dense_1.keras', 'k': 2, 'L': 3, 'table_size': 3750, 'window_size': 0, 'query_trick': True}. 


Trial parameters: {'k': 1, 'L': 7, 'table_size': 3750, 'window_size': 1, 'query_trick': False}


[I 2023-12-31 02:03:00,781] Trial 5 finished with values: [0.999951183835149, 0.9263925799999997] and parameters: {'model': 'model_dense_43.keras', 'k': 8, 'L': 4, 'table_size': 7500, 'window_size': 0, 'query_trick': False}. 


Trial parameters: {'k': 7, 'L': 10, 'table_size': 7500, 'window_size': 1, 'query_trick': False}


[I 2023-12-31 02:03:01,272] Trial 16 finished with values: [1.8048059634390878, 0.0038952999999999982] and parameters: {'model': 'model_conv_12.keras', 'k': 6, 'L': 10, 'table_size': 7500, 'window_size': 1, 'query_trick': False}. 


Trial parameters: {'k': 1, 'L': 1, 'table_size': 1875, 'window_size': 1, 'query_trick': True}


[I 2023-12-31 02:03:01,881] Trial 7 finished with values: [0.9998639081922337, 1.1055373900000003] and parameters: {'model': 'model_dense_26.keras', 'k': 8, 'L': 5, 'table_size': 7500, 'window_size': 0, 'query_trick': False}. 
[I 2023-12-31 02:03:01,963] Trial 15 finished with values: [0.9998639081922337, 0.45592603] and parameters: {'model': 'model_dense_26.keras', 'k': 8, 'L': 2, 'table_size': 7500, 'window_size': 0, 'query_trick': True}. 


Trial parameters: {'k': 10, 'L': 3, 'table_size': 1875, 'window_size': 0, 'query_trick': True}
Trial parameters: {'k': 2, 'L': 3, 'table_size': 3750, 'window_size': 0, 'query_trick': False}


[I 2023-12-31 02:03:03,780] Trial 22 finished with values: [1.4050596036710312, 0.009946059999999998] and parameters: {'model': 'model_conv_12.keras', 'k': 1, 'L': 7, 'table_size': 3750, 'window_size': 1, 'query_trick': False}. 


Trial parameters: {'k': 7, 'L': 4, 'table_size': 1875, 'window_size': 0, 'query_trick': False}


[W 2023-12-31 02:03:04,522] Trial 24 failed with parameters: {'model': 'model_dense_43.keras', 'k': 1, 'L': 1, 'table_size': 1875, 'window_size': 1, 'query_trick': True} because of the following error: The value nan is not acceptable.
[W 2023-12-31 02:03:04,523] Trial 24 failed with value (nan, 0.0012284099999999999).


Trial parameters: {'k': 1, 'L': 5, 'table_size': 7500, 'window_size': 1, 'query_trick': True}


[I 2023-12-31 02:03:05,183] Trial 1 finished with values: [0.9999829171257432, 1.4241466599999995] and parameters: {'model': 'model_conv_19.keras', 'k': 4, 'L': 6, 'table_size': 3750, 'window_size': 0, 'query_trick': False}. 


Trial parameters: {'k': 8, 'L': 4, 'table_size': 7500, 'window_size': 0, 'query_trick': True}


[I 2023-12-31 02:03:07,454] Trial 23 finished with values: [2.6058060111555226, 0.0019794000000000005] and parameters: {'model': 'model_dense_26.keras', 'k': 7, 'L': 10, 'table_size': 7500, 'window_size': 1, 'query_trick': False}. 


Trial parameters: {'k': 8, 'L': 5, 'table_size': 7500, 'window_size': 0, 'query_trick': True}


[I 2023-12-31 02:03:08,215] Trial 18 finished with values: [0.999951183835149, 0.8364890399999999] and parameters: {'model': 'model_dense_43.keras', 'k': 3, 'L': 3, 'table_size': 1875, 'window_size': 0, 'query_trick': False}. 


Trial parameters: {'k': 2, 'L': 7, 'table_size': 3750, 'window_size': 1, 'query_trick': True}


[I 2023-12-31 02:03:09,968] Trial 28 finished with values: [1.396509954755074, 0.003735940000000002] and parameters: {'model': 'model_dense_1.keras', 'k': 1, 'L': 5, 'table_size': 7500, 'window_size': 1, 'query_trick': True}. 


Trial parameters: {'k': 1, 'L': 4, 'table_size': 1875, 'window_size': 1, 'query_trick': True}


[I 2023-12-31 02:03:10,996] Trial 26 finished with values: [0.999908201704201, 0.6448856699999999] and parameters: {'model': 'model_conv_46.keras', 'k': 2, 'L': 3, 'table_size': 3750, 'window_size': 0, 'query_trick': False}. 


Trial parameters: {'k': 1, 'L': 4, 'table_size': 1875, 'window_size': 1, 'query_trick': False}


[I 2023-12-31 02:03:12,044] Trial 25 finished with values: [0.999908201704201, 0.7130813599999999] and parameters: {'model': 'model_conv_46.keras', 'k': 10, 'L': 3, 'table_size': 1875, 'window_size': 0, 'query_trick': True}. 


Trial parameters: {'k': 8, 'L': 10, 'table_size': 7500, 'window_size': 1, 'query_trick': True}


[W 2023-12-31 02:03:14,639] Trial 31 failed with parameters: {'model': 'model_conv_19.keras', 'k': 2, 'L': 7, 'table_size': 3750, 'window_size': 1, 'query_trick': True} because of the following error: The value nan is not acceptable.
[W 2023-12-31 02:03:14,641] Trial 31 failed with value (nan, 0.002631430000000002).
[I 2023-12-31 02:03:14,645] Trial 32 finished with values: [1.3990318679469138, 0.003999400000000002] and parameters: {'model': 'model_dense_1.keras', 'k': 1, 'L': 4, 'table_size': 1875, 'window_size': 1, 'query_trick': True}. 
[I 2023-12-31 02:03:15,173] Trial 33 finished with values: [1.5373577126413358, 0.005075040000000002] and parameters: {'model': 'model_dense_43.keras', 'k': 1, 'L': 4, 'table_size': 1875, 'window_size': 1, 'query_trick': False}. 
[I 2023-12-31 02:03:16,028] Trial 21 finished with values: [0.9999845110546619, 1.2037005800000002] and parameters: {'model': 'model_dense_1.keras', 'k': 4, 'L': 4, 'table_size': 3750, 'window_size': 0, 'query_trick': False}

KeyboardInterrupt: 

In [None]:
df = lsh_study.trials_dataframe()

df_sorted = df.copy(deep=True)
df_sorted = df_sorted.dropna(subset=['value'])
df_sorted = df_sorted.sort_values(by=['value'], ascending=True)
df_sorted = df_sorted.reset_index(drop=True)
df_sorted

In [None]:
plot_pareto_front(lsh_study, target_names=['aaf', 'average_time'])

In [None]:
plot_optimization_history(lsh_study, target = lambda t: t.values[0], target_name = 'aaf')

In [None]:
plot_optimization_history(lsh_study, target = lambda t: t.values[1], target_name = 'average_time')

In [None]:
plot_slice(lsh_study, target = lambda t: t.values[0], target_name = 'aaf')

In [None]:
plot_slice(lsh_study, target = lambda t: t.values[1], target_name = 'average_time')