# Comparison of results in initial and latent space

[1. Optimize LSH](#1-optimize-lsh)

[2. Optimize Hypercube](#2-optimize-hypercube)

[3. Optimize K-Means](#3-optimize-k-means)

[4. Optimize GNNS](#4-optimize-gnns)

[5. Optimize MRNG](#5-optimize-mrng)

[6. Optimize NSG](#6-optimize-nsg)

[7. Results](#7-results)

[8. Conclusions](#8-conclusions)

# Import libraries

In [1]:
import os

import numpy as np

from tensorflow.keras.models import load_model

from autoencoder import Autoencoder
from helper_funcs import *

import optuna
from optuna.visualization import plot_pareto_front, plot_optimization_history, plot_slice

2023-12-27 13:35:58.236079: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-12-27 13:35:58.280702: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-12-27 13:35:58.281270: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
models = os.listdir('./models/')

dataset = 'MNIST/input.dat'
query = 'MNIST/query.dat'

model_to_files = {}
for i, model in enumerate(models):
    output_dataset = 'MNIST/' + models[i].removesuffix('.keras') + '_output_dataset.dat'
    output_query  = 'MNIST/' + models[i].removesuffix('.keras') + '_query_dataset.dat'

    models[i] = 'models/' + models[i]

    model_to_files.update({models[i] : [output_dataset, output_query]})

In [3]:
for model in model_to_files:
    output_dataset, output_query = model_to_files[model]

    # load model
    autoencoder = load_model(model)
    shape = autoencoder.layers[-2].output_shape[1:] # get shape of encoded layer

    # load dataset
    x_train = load_dataset(dataset)
    x_train = x_train.astype('float32') / 255.
    x_test = load_dataset(query)
    x_test = x_test.astype('float32') / 255.
    if len(shape) == 3: # if model type is convolutional
        x_train = np.reshape(x_train, (len(x_train), 28, 28, 1))
        x_test = np.reshape(x_test, (len(x_test), 28, 28, 1))
    else:
        x_train = np.reshape(x_train, (len(x_train), 784))
        x_test = np.reshape(x_test, (len(x_test), 784))

    encoded_train = autoencoder.encode(x_train)
    encoded_test = autoencoder.encode(x_test)

    # save encoded datasets
    save_encoded_binary(encoded_train, output_dataset)
    save_encoded_binary(encoded_test, output_query)

2023-12-27 13:36:08.994509: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-12-27 13:36:09.001451: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1960] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


  36/1875 [..............................] - ETA: 2s  

2023-12-27 13:36:09.635966: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 188160000 exceeds 10% of free system memory.


  1/313 [..............................] - ETA: 21s

2023-12-27 13:36:19.795947: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 188160000 exceeds 10% of free system memory.


  10/1875 [..............................] - ETA: 11s 

2023-12-27 13:36:21.423428: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 188160000 exceeds 10% of free system memory.




2023-12-27 13:36:51.690319: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 188160000 exceeds 10% of free system memory.


  11/1875 [..............................] - ETA: 10s 

2023-12-27 13:36:55.261238: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 188160000 exceeds 10% of free system memory.




# 1. Optimize LSH

To skip logs, click [here](#visualize-lsh-study-results).

In [None]:
def objective_lsh(trial):
    # code

In [None]:
%%time
for i in range(10):
    try:
        sampler = optuna.samplers.NSGAIISampler(constraints_func=constraints)
        lsh_study = optuna.create_study(study_name='lsh', directions=['minimize', 'minimize'], sampler=sampler)
        lsh_study.optimize(objective_lsh, n_trials=50)
        print("-------------------- Best trials --------------------")
        trials = sorted(lsh_study.best_trials, key=lambda x: x.values)
        # print feasible trials only
        for trial in trials:
            print("Trial no. {}".format(trial.number))
            print(" Values = {}, Constraints = {}".format(trial.values, trial.user_attrs["constraint"]))
            print(" Params = {}".format(trial.params))
        break
    except:
        print("Trial failed, trying again...")
        continue

In [None]:
df = lsh_study.trials_dataframe()

df_sorted = df.copy(deep=True)
df_sorted = df_sorted.dropna(subset=['value'])
df_sorted = df_sorted.sort_values(by=['value'], ascending=True)
df_sorted = df_sorted.reset_index(drop=True)
df_sorted

## Visualize LSH study results

In [None]:
plot_pareto_front(lsh_study, target_names=['aaf', 'average_time'])

In [None]:
plot_optimization_history(lsh_study, target = lambda t: t.values[0], target_name = 'aaf')

In [None]:
plot_optimization_history(lsh_study, target = lambda t: t.values[1], target_name = 'average_time')

In [None]:
plot_slice(lsh_study, target = lambda t: t.values[0], target_name = 'aaf')

In [None]:
plot_slice(lsh_study, target = lambda t: t.values[1], target_name = 'average_time')

# 2. Optimize Hypercube

To skip logs, click [here](#visualize-hypercube-study-results).

In [None]:
def objective_hypercube(trial):
    # code

In [None]:
%%time
hypercube_study = optuna.create_study(study_name='hypercube', directions=['minimize', 'minimize'])
hypercube_study.optimize(objective_hypercube, n_trials=50)
print("-----------------------------------------------------")

trials = sorted(hypercube_study.best_trials, key=lambda x: x.values)
for trial in trials:
    print("Trial no. {}".format(trial.number))
    print(" Values = {}".format(trial.values))
    print(" Params = {}".format(trial.params))

In [None]:
df = hypercube_study.trials_dataframe()

df_sorted = df.copy(deep=True)
df_sorted = df_sorted.dropna(subset=['value'])
df_sorted = df_sorted.sort_values(by=['value'], ascending=True)
df_sorted = df_sorted.reset_index(drop=True)
df_sorted

## Visualize Hypercube study results

In [None]:
plot_pareto_front(hypercube_study, target_names=['aaf', 'average_time'])

In [None]:
plot_optimization_history(hypercube_study, target = lambda t: t.values[0], target_name = 'aaf')

In [None]:
plot_optimization_history(hypercube_study, target = lambda t: t.values[1], target_name = 'average_time')

In [None]:
plot_slice(hypercube_study, target = lambda t: t.values[0], target_name = 'aaf')

In [None]:
plot_slice(hypercube_study, target = lambda t: t.values[1], target_name = 'average_time')

# 3. Optimize K-Means

To skip logs, click [here](#visualize-k-means-study-results).

In [None]:
def objective_kmeans(trial):
    # code

In [None]:
%%time
for i in range(10):
    try:
        kmeans_study = optuna.create_study(study_name='kmeans', directions=['minimize', 'minimize'])
        kmeans_study.optimize(objective_kmeans, n_trials=50)
        print("-------------------- Best trials --------------------")
        trials = sorted(kmeans_study.best_trials, key=lambda x: x.values)
        for trial in trials:
            print("Trial no. {}".format(trial.number))
            print(" Values = {}".format(trial.values))
            print(" Params = {}".format(trial.params))
        break
    except:
        print("Trial failed, trying again...")
        continue

In [None]:
df = kmeans_study.trials_dataframe()

df_sorted = df.copy(deep=True)
df_sorted = df_sorted.dropna(subset=['value'])
df_sorted = df_sorted.sort_values(by=['value'], ascending=True)
df_sorted = df_sorted.reset_index(drop=True)
df_sorted

## Visualize K-Means study results

In [None]:
plot_pareto_front(kmeans_study, target_names=['aaf', 'average_time'])

In [None]:
plot_optimization_history(kmeans_study, target = lambda t: t.values[0], target_name = 'aaf')

In [None]:
plot_optimization_history(kmeans_study, target = lambda t: t.values[1], target_name = 'average_time')

In [None]:
plot_slice(kmeans_study, target = lambda t: t.values[0], target_name = 'aaf')

In [None]:
plot_slice(kmeans_study, target = lambda t: t.values[1], target_name = 'average_time')

# 4. Optimize GNNS

To skip logs, click [here](#visualize-gnns-study-results).

In [None]:
def objective_gnns(trial):
    # code

In [None]:
%%time
for i in range(10):
    try:
        gnns_study = optuna.create_study(study_name='gnns', directions=['minimize', 'minimize'])
        gnns_study.optimize(objective_gnns, n_trials=50)
        print("-------------------- Best trials --------------------")
        trials = sorted(gnns_study.best_trials, key=lambda x: x.values)
        for trial in trials:
            print("Trial no. {}".format(trial.number))
            print(" Values = {}".format(trial.values))
            print(" Params = {}".format(trial.params))
        break
    except:
        print("Trial failed, trying again...")
        continue

In [None]:
df = gnns_study.trials_dataframe()

df_sorted = df.copy(deep=True)
df_sorted = df_sorted.dropna(subset=['value'])
df_sorted = df_sorted.sort_values(by=['value'], ascending=True)
df_sorted = df_sorted.reset_index(drop=True)
df_sorted

## Visualize GNNS study results

In [None]:
plot_pareto_front(gnns_study, target_names=['aaf', 'average_time'])

In [None]:
plot_optimization_history(gnns_study, target = lambda t: t.values[0], target_name = 'aaf')

In [None]:
plot_optimization_history(gnns_study, target = lambda t: t.values[1], target_name = 'average_time')

In [None]:
plot_slice(gnns_study, target = lambda t: t.values[0], target_name = 'aaf')

In [None]:
plot_slice(gnns_study, target = lambda t: t.values[1], target_name = 'average_time')

# 5. Optimize MRNG

To skip logs, click [here](#visualize-mrng-study-results).

In [None]:
def objective_mrng(trial):
    # code

In [None]:
%%time
for i in range(10):
    try:
        mrng_study = optuna.create_study(study_name='mrng', directions=['minimize', 'minimize'])
        mrng_study.optimize(objective_mrng, n_trials=50)
        print("-------------------- Best trials --------------------")
        trials = sorted(mrng_study.best_trials, key=lambda x: x.values)
        for trial in trials:
            print("Trial no. {}".format(trial.number))
            print(" Values = {}".format(trial.values))
            print(" Params = {}".format(trial.params))
        break
    except:
        print("Trial failed, trying again...")
        continue

In [None]:
df = mrng_study.trials_dataframe()

df_sorted = df.copy(deep=True)
df_sorted = df_sorted.dropna(subset=['value'])
df_sorted = df_sorted.sort_values(by=['value'], ascending=True)
df_sorted = df_sorted.reset_index(drop=True)
df_sorted

## Visualize MRNG study results

In [None]:
plot_pareto_front(mrng_study, target_names=['aaf', 'average_time'])

In [None]:
plot_optimization_history(mrng_study, target = lambda t: t.values[0], target_name = 'aaf')

In [None]:
plot_optimization_history(mrng_study, target = lambda t: t.values[1], target_name = 'average_time')

In [None]:
plot_slice(mrng_study, target = lambda t: t.values[0], target_name = 'aaf')

In [None]:
plot_slice(mrng_study, target = lambda t: t.values[1], target_name = 'average_time')

# 6. Optimize NSG

To skip logs, click [here](#visualize-nsg-study-results).

In [None]:
def objective_nsg(trial):
    # code

In [None]:
%%time
for i in range(10):
    try:
        nsg_study = optuna.create_study(study_name='nsg', directions=['minimize', 'minimize'])
        nsg_study.optimize(objective_nsg, n_trials=100, n_jobs=-1)
        print("-------------------- Best trials --------------------")
        trials = sorted(nsg_study.best_trials, key=lambda x: x.values)
        for trial in trials:
            print("Trial no. {}".format(trial.number))
            print(" Values = {}".format(trial.values))
            print(" Params = {}".format(trial.params))
        break
    except:
        print("Trial failed, trying again...")
        continue

In [None]:
df = nsg_study.trials_dataframe()

df_sorted = df.copy(deep=True)
df_sorted = df_sorted.dropna(subset=['value'])
df_sorted = df_sorted.sort_values(by=['value'], ascending=True)
df_sorted = df_sorted.reset_index(drop=True)
df_sorted

## Visualize NSG study results

In [None]:
plot_pareto_front(nsg_study, target_names=['aaf', 'average_time'])

In [None]:
plot_optimization_history(nsg_study, target = lambda t: t.values[0], target_name = 'aaf')

In [None]:
plot_optimization_history(nsg_study, target = lambda t: t.values[1], target_name = 'average_time')

In [None]:
plot_slice(nsg_study, target = lambda t: t.values[0], target_name = 'aaf')

In [None]:
plot_slice(nsg_study, target = lambda t: t.values[1], target_name = 'average_time')

# 7. Results

# 8. Conclusions