# Grid Search for non-Clustering Algorithms in Latent Space (+ Comparison to Initial Space)

In [None]:
import os

from multiprocessing.pool import ThreadPool

import ctypes
from ctypes import *

import numpy as np

from tensorflow.keras.models import load_model

from autoencoder import Autoencoder

from helper_funcs import *

import pandas
pandas.set_option('display.max_rows', None)

from params import get_aaf

2024-01-04 22:03:29.997994: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-01-04 22:03:30.062941: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-01-04 22:03:30.063789: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
models = os.listdir('./models/')

dataset = b'MNIST/input.dat'
query   = b'MNIST/query.dat'

model_to_files = {}
for i, model in enumerate(models):
    normalized_dataset = b'MNIST/' + models[i].removesuffix('.keras').encode() + b'_normalized_dataset.dat'
    normalized_query   = b'MNIST/' + models[i].removesuffix('.keras').encode() + b'_normalized_query.dat'
    encoded_dataset    = b'MNIST/' + models[i].removesuffix('.keras').encode() + b'_encoded_dataset.dat'
    encoded_query      = b'MNIST/' + models[i].removesuffix('.keras').encode() + b'_encoded_query.dat'
    decoded_dataset    = b'MNIST/' + models[i].removesuffix('.keras').encode() + b'_decoded_dataset.dat'
    decoded_query      = b'MNIST/' + models[i].removesuffix('.keras').encode() + b'_decoded_query.dat'

    model_to_files.update({models[i] : [normalized_dataset, normalized_query,
                                        encoded_dataset, encoded_query,
                                        decoded_dataset, decoded_query]})

n = 60000

In [3]:
for model in model_to_files:
    normalized_dataset, normalized_query, encoded_dataset, encoded_query, decoded_dataset, decoded_query = model_to_files[model]

    model = b'models/' + model.encode()

    # load model
    autoencoder = load_model(model.decode())
    shape = autoencoder.layers[-2].output_shape[1:] # get shape of encoded layer

    # load dataset
    x_train = load_dataset(dataset)
    x_train = x_train.astype('float32') / 255.
    x_test = load_dataset(query)
    x_test = x_test.astype('float32') / 255.
    if len(shape) == 3: # if model type is convolutional
        x_train = np.reshape(x_train, (len(x_train), 28, 28, 1))
        x_test = np.reshape(x_test, (len(x_test), 28, 28, 1))
    else:
        x_train = np.reshape(x_train, (len(x_train), 784))
        x_test = np.reshape(x_test, (len(x_test), 784))

    encoded_train = autoencoder.encode(x_train)
    encoded_test = autoencoder.encode(x_test)

    # deflatten encoded datasets
    encoded_train = deflatten_encoded(encoded_train, shape)
    encoded_test = deflatten_encoded(encoded_test, shape)

    # decode encoded datasets
    decoded_train = autoencoder.decode(encoded_train)
    decoded_test = autoencoder.decode(encoded_test)

    # save original datasets normalized
    save_decoded_binary(x_train, normalized_dataset)
    save_decoded_binary(x_test, normalized_query)

    # normalize encoded datasets
    encoded_train = normalize(encoded_train)
    encoded_test = normalize(encoded_test)

    # save encoded datasets
    save_encoded_binary(encoded_train, encoded_dataset)
    save_encoded_binary(encoded_test, encoded_query)

    # normalize decoded datasets
    decoded_train = normalize(decoded_train)
    decoded_test = normalize(decoded_test)

    # save decoded datasets
    save_decoded_binary(decoded_train, decoded_dataset)
    save_decoded_binary(decoded_test, decoded_query)



In [4]:
best_params_lsh = [7, 4, 1875, 0, 0.6]      # k, L, table_size, query_trick, window
best_params_hypercube = [3, 67, 1000, 0.42] # k, M, probes, window

# k, E, R
best_params_gnns = {'model_conv_46.keras':  [92, 44, 9],
                    'model_conv_12.keras':  [86, 59, 8],
                    'model_conv_19.keras':  [96, 87, 7],
                    'model_dense_26.keras': [70, 69, 9],
                    'model_dense_43.keras': [100, 66, 10], 
                    'model_dense_1.keras':  [64, 58, 10]}

# l
best_params_mrng = {'model_dense_26.keras': [883],
                    'model_conv_46.keras':  [859],
                    'model_conv_19.keras':  [507],
                    'model_dense_1.keras':  [428],
                    'model_dense_43.keras': [818], 
                    'model_conv_12.keras':  [814]}

# l, m, k, lq
best_params_nsg = {'model_dense_26.keras': [987, 52, 69, 667],
                   'model_conv_46.keras':  [987, 43, 87, 476],
                   'model_dense_43.keras': [960, 102, 86, 768],
                   'model_dense_1.keras':  [996, 11, 98, 657],
                   'model_conv_12.keras':  [635, 197, 50, 907],
                   'model_conv_19.keras':  [663, 99, 41, 533]}

rows = []

In [5]:
def run_brute_force(model):
    normalized_dataset, normalized_query, encoded_dataset, encoded_query, decoded_dataset, decoded_query = model_to_files[model]

    conf = {
        'model': b'BRUTE',
        'vals': [],
        'dataset': normalized_dataset,
        'query': normalized_query,
        'encoded_dataset': encoded_dataset,
        'decoded_dataset': decoded_dataset,
    }

    aaf_lat_init, average_time = get_aaf(100, conf)

    print("Average time           :", average_time.value)
    print("AAF (latent to initial):", aaf_lat_init.value)
    print("-------------------------")

    rows.append([model, 'BRUTE', average_time.value, aaf_lat_init.value])

def run_lsh(model):
    normalized_dataset, normalized_query, encoded_dataset, encoded_query, decoded_dataset, decoded_query = model_to_files[model]
    
    conf = {
        'model': b'LSH',
        'vals': best_params_lsh[:-1],
        'window': best_params_lsh[-1],
        'dataset': normalized_dataset,
        'query': normalized_query,
        'encoded_dataset': encoded_dataset,
        'decoded_dataset': decoded_dataset,
    }

    aaf_lat_init, average_time = get_aaf(100, conf)

    print("Average time           :", average_time.value)
    print("AAF (latent to initial):", aaf_lat_init.value)
    print("-------------------------")

    rows.append([model, 'LSH', average_time.value, aaf_lat_init.value])

def run_hypercube(model):
    normalized_dataset, normalized_query, encoded_dataset, encoded_query, decoded_dataset, decoded_query = model_to_files[model]
    
    conf = {
        'model': b'CUBE',
        'vals': best_params_hypercube[:-1],
        'window': best_params_hypercube[-1],
        'dataset': normalized_dataset,
        'query': normalized_query,
        'encoded_dataset': encoded_dataset,
        'decoded_dataset': decoded_dataset,
    }

    aaf_lat_init, average_time = get_aaf(100, conf)

    print("Average time           :", average_time.value)
    print("AAF (latent to initial):", aaf_lat_init.value)
    print("-------------------------")

    rows.append([model, 'CUBE', average_time.value, aaf_lat_init.value])

def run_gnns(model):
    normalized_dataset, normalized_query, encoded_dataset, encoded_query, decoded_dataset, decoded_query = model_to_files[model]
    
    conf = {
        'model': b'GNNS',
        'vals': best_params_gnns[model],
        'dataset': normalized_dataset,
        'query': normalized_query,
        'encoded_dataset': encoded_dataset,
        'decoded_dataset': decoded_dataset,
    }

    aaf_lat_init, average_time = get_aaf(100, conf)

    print("Average time           :", average_time.value)
    print("AAF (latent to initial):", aaf_lat_init.value)
    print("-------------------------")

    rows.append([model, 'GNNS', average_time.value, aaf_lat_init.value])

def run_mrng(model):
    normalized_dataset, normalized_query, encoded_dataset, encoded_query, decoded_dataset, decoded_query = model_to_files[model]
    
    conf = {
        'model': b'MRNG',
        'vals': best_params_mrng[model],
        'dataset': normalized_dataset,
        'query': normalized_query,
        'encoded_dataset': encoded_dataset,
        'decoded_dataset': decoded_dataset,
    }

    aaf_lat_init, average_time = get_aaf(100, conf)

    print("Average time           :", average_time.value)
    print("AAF (latent to initial):", aaf_lat_init.value)
    print("-------------------------")

    rows.append([model, 'MRNG', average_time.value, aaf_lat_init.value])

def run_nsg(model):
    normalized_dataset, normalized_query, encoded_dataset, encoded_query, decoded_dataset, decoded_query = model_to_files[model]
    
    conf = {
        'model': b'NSG',
        'vals': best_params_nsg[model],
        'dataset': normalized_dataset,
        'query': normalized_query,
        'encoded_dataset': encoded_dataset,
        'decoded_dataset': decoded_dataset,
    }

    aaf_lat_init, average_time = get_aaf(100, conf)

    print("Average time           :", average_time.value)
    print("AAF (latent to initial):", aaf_lat_init.value)
    print("-------------------------")

    rows.append([model, 'NSG', average_time.value, aaf_lat_init.value])

In [6]:
algorithms_to_functions = {'BRUTE': run_brute_force,
                           'LSH'  : run_lsh,
                           'CUBE' : run_hypercube,
                           'GNNS' : run_gnns,
                           'MRNG' : run_mrng,
                           'NSG'  : run_nsg}

pool = ThreadPool(processes=4)

for model in models:
    for algorithm in algorithms_to_functions:
        pool.apply_async(algorithms_to_functions[algorithm], (model,))

pool.close()
pool.join()

Average time           : 0.00015760000000000004
AAF (latent to initial): 2.6849539011470784
-------------------------
Average time           : 0.022484710000000012
AAF (latent to initial): 2.7069796529387666
-------------------------
Average time           : 0.004404469999999999
AAF (latent to initial): 2.6927031712536427
-------------------------
Average time           : 0.004235639999999999
AAF (latent to initial): 2.696657502556476
-------------------------
Average time           : 0.04212557
AAF (latent to initial): 2.7011758102939525
-------------------------
Average time           : 0.004493399999999998
AAF (latent to initial): 2.7018941933902223
-------------------------
Average time           : 0.00018291
AAF (latent to initial): 2.6878864962969096
-------------------------
Average time           : 0.006621910000000004
AAF (latent to initial): 2.693092026981012
-------------------------
Average time           : 0.00391214
AAF (latent to initial): 2.701824108480082
-------------

In [7]:
col_models, col_algorithms, col_average_time, col_aaf_lat_init = [], [], [], []

for row in rows:
    model, algorithm, average_time, aaf_lat_init = row

    col_models.append(model)
    col_algorithms.append(algorithm)
    col_average_time.append(average_time)
    col_aaf_lat_init.append(aaf_lat_init)

col_dict = {'model': col_models, 'algorithm': col_algorithms,
            'average time': col_average_time, 'AAF (latent to initial)': col_aaf_lat_init}

df = pandas.DataFrame(data=col_dict)
df

Unnamed: 0,model,algorithm,average time,AAF (latent to initial)
0,model_conv_46.keras,CUBE,0.000158,2.684954
1,model_conv_46.keras,BRUTE,0.022485,2.70698
2,model_conv_46.keras,LSH,0.004404,2.692703
3,model_conv_46.keras,GNNS,0.004236,2.696658
4,model_conv_12.keras,BRUTE,0.042126,2.701176
5,model_conv_12.keras,LSH,0.004493,2.701894
6,model_conv_12.keras,CUBE,0.000183,2.687886
7,model_conv_12.keras,GNNS,0.006622,2.693092
8,model_conv_46.keras,NSG,0.003912,2.701824
9,model_dense_43.keras,BRUTE,0.036824,2.70048


In [8]:
df_sorted = df.copy(deep=True)
df_sorted = df_sorted.sort_values(by='model', ascending=True)
df_sorted = df_sorted.reset_index(drop=True)
df_sorted

Unnamed: 0,model,algorithm,average time,AAF (latent to initial)
0,model_conv_12.keras,NSG,0.008182,2.703686
1,model_conv_12.keras,BRUTE,0.042126,2.701176
2,model_conv_12.keras,LSH,0.004493,2.701894
3,model_conv_12.keras,CUBE,0.000183,2.687886
4,model_conv_12.keras,GNNS,0.006622,2.693092
5,model_conv_12.keras,MRNG,0.005959,2.687886
6,model_conv_19.keras,CUBE,0.0004,2.687194
7,model_conv_19.keras,BRUTE,0.035867,2.68182
8,model_conv_19.keras,LSH,0.003861,2.671276
9,model_conv_19.keras,MRNG,0.002522,2.687194


In [9]:
df_sorted = df.copy(deep=True)
df_sorted = df_sorted.sort_values(by='algorithm', ascending=True)
df_sorted = df_sorted.reset_index(drop=True)
df_sorted

Unnamed: 0,model,algorithm,average time,AAF (latent to initial)
0,model_conv_46.keras,BRUTE,0.022485,2.70698
1,model_dense_1.keras,BRUTE,0.04731,2.687364
2,model_conv_12.keras,BRUTE,0.042126,2.701176
3,model_dense_26.keras,BRUTE,0.029702,2.6896
4,model_dense_43.keras,BRUTE,0.036824,2.70048
5,model_conv_19.keras,BRUTE,0.035867,2.68182
6,model_conv_46.keras,CUBE,0.000158,2.684954
7,model_dense_1.keras,CUBE,0.000204,2.687194
8,model_conv_12.keras,CUBE,0.000183,2.687886
9,model_conv_19.keras,CUBE,0.0004,2.687194
