In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import json
from utils.experiment import Experiment

In [None]:
df_train = pd.read_csv('../dataset/pointcloud_mnist_2d/train.csv')

X = df_train[df_train.columns[1:]].to_numpy()
y = df_train[df_train.columns[0]].to_numpy()

X = X.reshape(X.shape[0], -1, 3)

In [None]:
num_points = np.sum((X[:, :, 2] > 0).astype(int), axis=1)

In [None]:
set_size_median = np.median(num_points).astype(int)

### for hash code length = 1024, num_slices = 16 seems to be a good choice. 

In [None]:
dataset = 'point_mnist'
n_slices = 16
code_length = 1024
ks = [4, 8, 16]
ref = 'rand'

for k in ks:
    exp = Experiment(dataset, 'swe', 'faiss-lsh', ref_func=ref, k=k, ref_size=set_size_median, code_length=code_length, num_slices=n_slices)
    exp.test()
    report = exp.get_exp_report()
    print(report)

loading dataset...
loading cached base embedding...
compute query embedding...


100%|██████████| 10000/10000 [00:09<00:00, 1036.87it/s]


{'dataset': 'point_mnist', 'pooling': 'swe', 'ann': 'faiss-lsh', 'k': 4, 'code_length': 1024, 'emb_time_per_sample': 0.001001107931137085, 'inf_time_per_sample': 0.007409391689300537, 'acc': 0.9239, 'precision_k': 0.8998}
loading dataset...
loading cached base embedding...
compute query embedding...


100%|██████████| 10000/10000 [00:09<00:00, 1041.30it/s]


{'dataset': 'point_mnist', 'pooling': 'swe', 'ann': 'faiss-lsh', 'k': 8, 'code_length': 1024, 'emb_time_per_sample': 0.0009994057178497313, 'inf_time_per_sample': 0.00728784453868866, 'acc': 0.9242, 'precision_k': 0.8871375}
loading dataset...
loading cached base embedding...
compute query embedding...


100%|██████████| 10000/10000 [00:09<00:00, 1042.93it/s]


{'dataset': 'point_mnist', 'pooling': 'swe', 'ann': 'faiss-lsh', 'k': 16, 'code_length': 1024, 'emb_time_per_sample': 0.0009984215021133422, 'inf_time_per_sample': 0.008004561114311219, 'acc': 0.9173, 'precision_k': 0.870675}


### WE

In [None]:
dataset = 'point_mnist'
code_length = 1024
ks = [4, 8, 16]
ref = 'randn'

for k in ks:
    exp = Experiment(dataset, 'we', 'faiss-lsh', ref_func=ref, k=k, ref_size=set_size_median, code_length=code_length)
    exp.test()
    report = exp.get_exp_report()
    print(report)

loading dataset...
compute base embedding...


100%|██████████| 60000/60000 [05:10<00:00, 193.04it/s]


compute query embedding...


100%|██████████| 10000/10000 [00:49<00:00, 201.14it/s]


{'dataset': 'point_mnist', 'pooling': 'we', 'ann': 'faiss-lsh', 'k': 4, 'code_length': 1024, 'emb_time_per_sample': 0.005010288023948669, 'inf_time_per_sample': 0.007546841073036194, 'acc': 0.8935, 'precision_k': 0.851825}
loading dataset...
loading cached base embedding...
compute query embedding...


100%|██████████| 10000/10000 [00:49<00:00, 201.97it/s]


{'dataset': 'point_mnist', 'pooling': 'we', 'ann': 'faiss-lsh', 'k': 8, 'code_length': 1024, 'emb_time_per_sample': 0.004987445116043091, 'inf_time_per_sample': 0.007168257474899292, 'acc': 0.8957, 'precision_k': 0.83725}
loading dataset...
loading cached base embedding...
compute query embedding...


100%|██████████| 10000/10000 [00:49<00:00, 202.01it/s]


{'dataset': 'point_mnist', 'pooling': 'we', 'ann': 'faiss-lsh', 'k': 16, 'code_length': 1024, 'emb_time_per_sample': 0.004981817150115967, 'inf_time_per_sample': 0.007156570935249329, 'acc': 0.8917, 'precision_k': 0.82075625}


In [None]:
dataset = 'point_mnist'
code_length = 1024
ks = [4, 8, 16]
ref = 'rand'

for k in ks:
    exp = Experiment(dataset, 'we', 'faiss-lsh', ref_func=ref, k=k, ref_size=set_size_median, code_length=code_length)
    exp.test()
    report = exp.get_exp_report()
    print(report)

loading dataset...
loading cached base embedding...
compute query embedding...


100%|██████████| 10000/10000 [00:44<00:00, 226.42it/s]


{'dataset': 'point_mnist', 'pooling': 'we', 'ann': 'faiss-lsh', 'k': 4, 'code_length': 1024, 'emb_time_per_sample': 0.004448658466339112, 'inf_time_per_sample': 0.007181220746040344, 'acc': 0.9176, 'precision_k': 0.8842}
loading dataset...
loading cached base embedding...
compute query embedding...


100%|██████████| 10000/10000 [00:42<00:00, 232.96it/s]


{'dataset': 'point_mnist', 'pooling': 'we', 'ann': 'faiss-lsh', 'k': 8, 'code_length': 1024, 'emb_time_per_sample': 0.004326587533950805, 'inf_time_per_sample': 0.007151295781135559, 'acc': 0.9153, 'precision_k': 0.8714875}
loading dataset...
loading cached base embedding...
compute query embedding...


100%|██████████| 10000/10000 [00:42<00:00, 233.69it/s]


{'dataset': 'point_mnist', 'pooling': 'we', 'ann': 'faiss-lsh', 'k': 16, 'code_length': 1024, 'emb_time_per_sample': 0.004333140254020691, 'inf_time_per_sample': 0.0071627659797668455, 'acc': 0.9099, 'precision_k': 0.85620625}


### FS

In [None]:
for k in ks:
    exp = Experiment(dataset, 'fs', 'faiss-lsh', ref_func=ref,
                     k=k, ref_size=set_size_median, code_length=code_length)
    exp.test()
    report = exp.get_exp_report()
    print(report)

loading dataset...
loading cached base embedding...
compute query embedding...


100%|██████████| 10000/10000 [00:02<00:00, 3746.93it/s]


{'dataset': 'point_mnist', 'pooling': 'fs', 'ann': 'faiss-lsh', 'k': 4, 'code_length': 1024, 'emb_time_per_sample': 0.0003043778419494629, 'inf_time_per_sample': 0.007164075183868408, 'acc': 0.0581, 'precision_k': 0.07025}
loading dataset...
loading cached base embedding...
compute query embedding...


100%|██████████| 10000/10000 [00:02<00:00, 3839.20it/s]


{'dataset': 'point_mnist', 'pooling': 'fs', 'ann': 'faiss-lsh', 'k': 8, 'code_length': 1024, 'emb_time_per_sample': 0.0002974188804626465, 'inf_time_per_sample': 0.007396276664733887, 'acc': 0.0748, 'precision_k': 0.076275}
loading dataset...
loading cached base embedding...
compute query embedding...


100%|██████████| 10000/10000 [00:02<00:00, 3650.82it/s]


{'dataset': 'point_mnist', 'pooling': 'fs', 'ann': 'faiss-lsh', 'k': 16, 'code_length': 1024, 'emb_time_per_sample': 0.0003101985454559326, 'inf_time_per_sample': 0.007460631322860718, 'acc': 0.0699, 'precision_k': 0.07785}


### Cov

In [None]:
for k in ks:
    exp = Experiment(dataset, 'cov', 'faiss-lsh',
                     k=k, ref_size=set_size_median, code_length=code_length)
    exp.test()
    report = exp.get_exp_report()
    print(report)

loading dataset...
compute base embedding...


100%|██████████| 60000/60000 [00:04<00:00, 13198.13it/s]


compute query embedding...


100%|██████████| 10000/10000 [00:00<00:00, 13314.78it/s]


{'dataset': 'point_mnist', 'pooling': 'cov', 'ann': 'faiss-lsh', 'k': 4, 'code_length': 1024, 'emb_time_per_sample': 0.00011116304397583008, 'inf_time_per_sample': 0.007299674415588379, 'acc': 0.2647, 'precision_k': 0.247275}
loading dataset...
loading cached base embedding...
compute query embedding...


100%|██████████| 10000/10000 [00:00<00:00, 14727.10it/s]


{'dataset': 'point_mnist', 'pooling': 'cov', 'ann': 'faiss-lsh', 'k': 8, 'code_length': 1024, 'emb_time_per_sample': 0.0001014611005783081, 'inf_time_per_sample': 0.007206280159950256, 'acc': 0.276, 'precision_k': 0.2481}
loading dataset...
loading cached base embedding...
compute query embedding...


100%|██████████| 10000/10000 [00:00<00:00, 14466.60it/s]


{'dataset': 'point_mnist', 'pooling': 'cov', 'ann': 'faiss-lsh', 'k': 16, 'code_length': 1024, 'emb_time_per_sample': 0.00010209002494812011, 'inf_time_per_sample': 0.007218893051147461, 'acc': 0.2813, 'precision_k': 0.24806875}


### GeM-1

In [None]:
for k in ks:
    exp = Experiment(dataset, 'gem', 'faiss-lsh',
                     k=k, ref_size=set_size_median, code_length=code_length, power=1)
    exp.test()
    report = exp.get_exp_report()
    print(report)

loading dataset...
compute base embedding...


100%|██████████| 60000/60000 [00:01<00:00, 35956.04it/s]


compute query embedding...


100%|██████████| 10000/10000 [00:00<00:00, 39929.78it/s]


{'dataset': 'point_mnist', 'pooling': 'gem', 'ann': 'faiss-lsh', 'k': 4, 'code_length': 1024, 'power': 1, 'emb_time_per_sample': 5.6887125968933106e-05, 'inf_time_per_sample': 0.007195631670951844, 'acc': 0.1084, 'precision_k': 0.104175}
loading dataset...
loading cached base embedding...
compute query embedding...


100%|██████████| 10000/10000 [00:00<00:00, 39843.45it/s]


{'dataset': 'point_mnist', 'pooling': 'gem', 'ann': 'faiss-lsh', 'k': 8, 'code_length': 1024, 'power': 1, 'emb_time_per_sample': 5.9602665901184084e-05, 'inf_time_per_sample': 0.00717870876789093, 'acc': 0.1041, 'precision_k': 0.1022375}
loading dataset...
loading cached base embedding...
compute query embedding...


100%|██████████| 10000/10000 [00:00<00:00, 40766.04it/s]


{'dataset': 'point_mnist', 'pooling': 'gem', 'ann': 'faiss-lsh', 'k': 16, 'code_length': 1024, 'power': 1, 'emb_time_per_sample': 6.200997829437256e-05, 'inf_time_per_sample': 0.0072134280443191525, 'acc': 0.0999, 'precision_k': 0.1011875}


### GeM-2

In [None]:
for k in ks:
    exp = Experiment(dataset, 'gem', 'faiss-lsh',
                     k=k, ref_size=set_size_median, code_length=code_length, power=2)
    exp.test()
    report = exp.get_exp_report()
    print(report)

loading dataset...
compute base embedding...


100%|██████████| 60000/60000 [00:03<00:00, 19558.42it/s]


compute query embedding...


100%|██████████| 10000/10000 [00:00<00:00, 21621.83it/s]


{'dataset': 'point_mnist', 'pooling': 'gem', 'ann': 'faiss-lsh', 'k': 4, 'code_length': 1024, 'power': 2, 'emb_time_per_sample': 8.054089546203613e-05, 'inf_time_per_sample': 0.007240815949440003, 'acc': 0.3216, 'precision_k': 0.286925}
loading dataset...
loading cached base embedding...
compute query embedding...


100%|██████████| 10000/10000 [00:00<00:00, 21879.59it/s]


{'dataset': 'point_mnist', 'pooling': 'gem', 'ann': 'faiss-lsh', 'k': 8, 'code_length': 1024, 'power': 2, 'emb_time_per_sample': 8.032679557800293e-05, 'inf_time_per_sample': 0.00720102207660675, 'acc': 0.347, 'precision_k': 0.285625}
loading dataset...
loading cached base embedding...
compute query embedding...


100%|██████████| 10000/10000 [00:00<00:00, 20293.66it/s]


{'dataset': 'point_mnist', 'pooling': 'gem', 'ann': 'faiss-lsh', 'k': 16, 'code_length': 1024, 'power': 2, 'emb_time_per_sample': 8.609297275543213e-05, 'inf_time_per_sample': 0.007226097083091736, 'acc': 0.3706, 'precision_k': 0.28910625}


### GeM-4

In [None]:
for k in ks:
    exp = Experiment(dataset, 'gem', 'faiss-lsh',
                     k=k, ref_size=set_size_median, code_length=code_length, power=4)
    exp.test()
    report = exp.get_exp_report()
    print(report)

loading dataset...
compute base embedding...


100%|██████████| 60000/60000 [00:05<00:00, 11095.30it/s]


compute query embedding...


100%|██████████| 10000/10000 [00:00<00:00, 11822.00it/s]


{'dataset': 'point_mnist', 'pooling': 'gem', 'ann': 'faiss-lsh', 'k': 4, 'code_length': 1024, 'power': 4, 'emb_time_per_sample': 0.00012045366764068603, 'inf_time_per_sample': 0.007203473114967347, 'acc': 0.4459, 'precision_k': 0.393425}
loading dataset...
loading cached base embedding...
compute query embedding...


100%|██████████| 10000/10000 [00:00<00:00, 11918.05it/s]


{'dataset': 'point_mnist', 'pooling': 'gem', 'ann': 'faiss-lsh', 'k': 8, 'code_length': 1024, 'power': 4, 'emb_time_per_sample': 0.00011752259731292725, 'inf_time_per_sample': 0.007463881540298462, 'acc': 0.4712, 'precision_k': 0.3903625}
loading dataset...
loading cached base embedding...
compute query embedding...


100%|██████████| 10000/10000 [00:00<00:00, 11706.83it/s]


{'dataset': 'point_mnist', 'pooling': 'gem', 'ann': 'faiss-lsh', 'k': 16, 'code_length': 1024, 'power': 4, 'emb_time_per_sample': 0.00011959459781646729, 'inf_time_per_sample': 0.007261830496788025, 'acc': 0.4872, 'precision_k': 0.38295625}
