In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import json
from utils.experiment import Experiment

In [None]:
df_train = pd.read_csv('../dataset/pointcloud_mnist_2d/train.csv')

X = df_train[df_train.columns[1:]].to_numpy()
y = df_train[df_train.columns[0]].to_numpy()

X = X.reshape(X.shape[0], -1, 3)

In [None]:
num_points = np.sum((X[:, :, 2] > 0).astype(int), axis=1)

In [None]:
set_size_median = np.median(num_points).astype(int)

### for hash code length = 1024, num_slices = 16 seems to be a good choice. 

In [None]:
dataset = 'point_mnist'
n_slices = 16
code_length = 1024
ks = [4, 8, 16]
ref = 'rand'

for k in ks:
    exp = Experiment(dataset, 'swe', 'faiss-lsh', ref_func=ref, k=k, ref_size=set_size_median, code_length=code_length, num_slices=n_slices)
    exp.test()
    report = exp.get_exp_report()
    print(report)

loading dataset...
loading cached base embedding...
compute query embedding...


100%|██████████| 10000/10000 [00:09<00:00, 1087.15it/s]


{'dataset': 'point_mnist', 'pooling': 'swe', 'ann': 'faiss-lsh', 'k': 4, 'code_length': 1024, 'emb_time_per_sample': 0.0009572587966918945, 'inf_time_per_sample': 0.00693941912651062, 'acc': 0.9239, 'precision_k': 0.8998}
loading dataset...
loading cached base embedding...
compute query embedding...


100%|██████████| 10000/10000 [00:08<00:00, 1159.26it/s]


{'dataset': 'point_mnist', 'pooling': 'swe', 'ann': 'faiss-lsh', 'k': 8, 'code_length': 1024, 'emb_time_per_sample': 0.0009122352838516236, 'inf_time_per_sample': 0.006888640093803406, 'acc': 0.9242, 'precision_k': 0.8871375}
loading dataset...
loading cached base embedding...
compute query embedding...


100%|██████████| 10000/10000 [00:08<00:00, 1134.44it/s]


{'dataset': 'point_mnist', 'pooling': 'swe', 'ann': 'faiss-lsh', 'k': 16, 'code_length': 1024, 'emb_time_per_sample': 0.0009334187030792236, 'inf_time_per_sample': 0.006827449154853821, 'acc': 0.9173, 'precision_k': 0.870675}


### WE

In [None]:
dataset = 'point_mnist'
code_length = 1024
ks = [4, 8, 16]
ref = 'randn'

for k in ks:
    exp = Experiment(dataset, 'we', 'faiss-lsh', ref_func=ref, k=k, ref_size=set_size_median, code_length=code_length)
    exp.test()
    report = exp.get_exp_report()
    print(report)

loading dataset...
loading cached base embedding...
compute query embedding...


100%|██████████| 10000/10000 [00:50<00:00, 199.81it/s]


{'dataset': 'point_mnist', 'pooling': 'we', 'ann': 'faiss-lsh', 'k': 4, 'code_length': 1024, 'emb_time_per_sample': 0.005045134782791138, 'inf_time_per_sample': 0.006749906206130981, 'acc': 0.9221, 'precision_k': 0.895575}
loading dataset...
loading cached base embedding...
compute query embedding...


100%|██████████| 10000/10000 [00:50<00:00, 198.24it/s]


{'dataset': 'point_mnist', 'pooling': 'we', 'ann': 'faiss-lsh', 'k': 8, 'code_length': 1024, 'emb_time_per_sample': 0.005081389307975769, 'inf_time_per_sample': 0.006766121625900268, 'acc': 0.9231, 'precision_k': 0.8844875}
loading dataset...
loading cached base embedding...
compute query embedding...


100%|██████████| 10000/10000 [00:50<00:00, 197.52it/s]


{'dataset': 'point_mnist', 'pooling': 'we', 'ann': 'faiss-lsh', 'k': 16, 'code_length': 1024, 'emb_time_per_sample': 0.005097668766975403, 'inf_time_per_sample': 0.006824886250495911, 'acc': 0.9191, 'precision_k': 0.86925}


In [None]:
dataset = 'point_mnist'
code_length = 1024
ks = [4, 8, 16]
ref = 'rand'

for k in ks:
    exp = Experiment(dataset, 'we', 'faiss-lsh', ref_func=ref, k=k, ref_size=set_size_median, code_length=code_length)
    exp.test()
    report = exp.get_exp_report()
    print(report)

loading dataset...
compute base embedding...


100%|██████████| 60000/60000 [04:19<00:00, 231.01it/s]


compute query embedding...


100%|██████████| 10000/10000 [00:43<00:00, 229.90it/s]


{'dataset': 'point_mnist', 'pooling': 'we', 'ann': 'faiss-lsh', 'k': 4, 'code_length': 1024, 'emb_time_per_sample': 0.004383328247070313, 'inf_time_per_sample': 0.006869648504257202, 'acc': 0.9207, 'precision_k': 0.88995}
loading dataset...
loading cached base embedding...
compute query embedding...


100%|██████████| 10000/10000 [00:43<00:00, 229.45it/s]


{'dataset': 'point_mnist', 'pooling': 'we', 'ann': 'faiss-lsh', 'k': 8, 'code_length': 1024, 'emb_time_per_sample': 0.004387899017333985, 'inf_time_per_sample': 0.0068446012735366825, 'acc': 0.9224, 'precision_k': 0.877675}
loading dataset...
loading cached base embedding...
compute query embedding...


100%|██████████| 10000/10000 [00:43<00:00, 232.14it/s]


{'dataset': 'point_mnist', 'pooling': 'we', 'ann': 'faiss-lsh', 'k': 16, 'code_length': 1024, 'emb_time_per_sample': 0.004337923049926758, 'inf_time_per_sample': 0.006674224042892456, 'acc': 0.9163, 'precision_k': 0.8617375}


### FS

In [None]:
for k in ks:
    exp = Experiment(dataset, 'fs', 'faiss-lsh', ref_func=ref,
                     k=k, ref_size=set_size_median, code_length=code_length)
    exp.test()
    report = exp.get_exp_report()
    print(report)

loading dataset...
loading cached base embedding...
compute query embedding...


100%|██████████| 10000/10000 [00:02<00:00, 3737.56it/s]


{'dataset': 'point_mnist', 'pooling': 'fs', 'ann': 'faiss-lsh', 'k': 4, 'code_length': 1024, 'emb_time_per_sample': 0.00029875319004058837, 'inf_time_per_sample': 0.006773207664489746, 'acc': 0.0581, 'precision_k': 0.07025}
loading dataset...
loading cached base embedding...
compute query embedding...


100%|██████████| 10000/10000 [00:02<00:00, 4179.29it/s]


{'dataset': 'point_mnist', 'pooling': 'fs', 'ann': 'faiss-lsh', 'k': 8, 'code_length': 1024, 'emb_time_per_sample': 0.0002708813667297363, 'inf_time_per_sample': 0.006771250128746032, 'acc': 0.0748, 'precision_k': 0.076275}
loading dataset...
loading cached base embedding...
compute query embedding...


100%|██████████| 10000/10000 [00:02<00:00, 3925.05it/s]


{'dataset': 'point_mnist', 'pooling': 'fs', 'ann': 'faiss-lsh', 'k': 16, 'code_length': 1024, 'emb_time_per_sample': 0.0002839841365814209, 'inf_time_per_sample': 0.006753718757629395, 'acc': 0.0699, 'precision_k': 0.07785}


### Cov

In [None]:
for k in ks:
    exp = Experiment(dataset, 'cov', 'faiss-lsh',
                     k=k, ref_size=set_size_median, code_length=code_length)
    exp.test()
    report = exp.get_exp_report()
    print(report)

loading dataset...
loading cached base embedding...
compute query embedding...


100%|██████████| 10000/10000 [00:00<00:00, 13738.31it/s]


{'dataset': 'point_mnist', 'pooling': 'cov', 'ann': 'faiss-lsh', 'k': 4, 'code_length': 1024, 'emb_time_per_sample': 0.00010192229747772217, 'inf_time_per_sample': 0.006724641036987305, 'acc': 0.2647, 'precision_k': 0.247275}
loading dataset...
loading cached base embedding...
compute query embedding...


100%|██████████| 10000/10000 [00:00<00:00, 16419.65it/s]


{'dataset': 'point_mnist', 'pooling': 'cov', 'ann': 'faiss-lsh', 'k': 8, 'code_length': 1024, 'emb_time_per_sample': 8.825936317443848e-05, 'inf_time_per_sample': 0.006788952469825744, 'acc': 0.276, 'precision_k': 0.2481}
loading dataset...
loading cached base embedding...
compute query embedding...


100%|██████████| 10000/10000 [00:00<00:00, 12328.51it/s]


{'dataset': 'point_mnist', 'pooling': 'cov', 'ann': 'faiss-lsh', 'k': 16, 'code_length': 1024, 'emb_time_per_sample': 0.00011181581020355225, 'inf_time_per_sample': 0.00672577064037323, 'acc': 0.2813, 'precision_k': 0.24806875}


### GeM-1

In [None]:
for k in ks:
    exp = Experiment(dataset, 'gem', 'faiss-lsh',
                     k=k, ref_size=set_size_median, code_length=code_length, power=1)
    exp.test()
    report = exp.get_exp_report()
    print(report)

loading dataset...
loading cached base embedding...
compute query embedding...


100%|██████████| 10000/10000 [00:00<00:00, 39215.12it/s]


{'dataset': 'point_mnist', 'pooling': 'gem', 'ann': 'faiss-lsh', 'k': 4, 'code_length': 1024, 'power': 1, 'emb_time_per_sample': 5.385570526123047e-05, 'inf_time_per_sample': 0.006747601437568664, 'acc': 0.1084, 'precision_k': 0.104175}
loading dataset...
loading cached base embedding...
compute query embedding...


100%|██████████| 10000/10000 [00:00<00:00, 40601.84it/s]


{'dataset': 'point_mnist', 'pooling': 'gem', 'ann': 'faiss-lsh', 'k': 8, 'code_length': 1024, 'power': 1, 'emb_time_per_sample': 5.332963466644287e-05, 'inf_time_per_sample': 0.006728264784812928, 'acc': 0.1041, 'precision_k': 0.1022375}
loading dataset...
loading cached base embedding...
compute query embedding...


100%|██████████| 10000/10000 [00:00<00:00, 41483.98it/s]


{'dataset': 'point_mnist', 'pooling': 'gem', 'ann': 'faiss-lsh', 'k': 16, 'code_length': 1024, 'power': 1, 'emb_time_per_sample': 5.30287504196167e-05, 'inf_time_per_sample': 0.006730451941490173, 'acc': 0.0999, 'precision_k': 0.1011875}


### GeM-2

In [None]:
for k in ks:
    exp = Experiment(dataset, 'gem', 'faiss-lsh',
                     k=k, ref_size=set_size_median, code_length=code_length, power=2)
    exp.test()
    report = exp.get_exp_report()
    print(report)

loading dataset...
loading cached base embedding...
compute query embedding...


100%|██████████| 10000/10000 [00:00<00:00, 24658.52it/s]


{'dataset': 'point_mnist', 'pooling': 'gem', 'ann': 'faiss-lsh', 'k': 4, 'code_length': 1024, 'power': 2, 'emb_time_per_sample': 6.860001087188721e-05, 'inf_time_per_sample': 0.006760328674316406, 'acc': 0.3216, 'precision_k': 0.286925}
loading dataset...
loading cached base embedding...
compute query embedding...


100%|██████████| 10000/10000 [00:00<00:00, 21465.04it/s]


{'dataset': 'point_mnist', 'pooling': 'gem', 'ann': 'faiss-lsh', 'k': 8, 'code_length': 1024, 'power': 2, 'emb_time_per_sample': 7.587172985076905e-05, 'inf_time_per_sample': 0.006759798049926758, 'acc': 0.347, 'precision_k': 0.285625}
loading dataset...
loading cached base embedding...
compute query embedding...


100%|██████████| 10000/10000 [00:00<00:00, 23305.97it/s]


{'dataset': 'point_mnist', 'pooling': 'gem', 'ann': 'faiss-lsh', 'k': 16, 'code_length': 1024, 'power': 2, 'emb_time_per_sample': 7.175891399383545e-05, 'inf_time_per_sample': 0.006735078716278076, 'acc': 0.3706, 'precision_k': 0.28910625}


### GeM-4

In [None]:
for k in ks:
    exp = Experiment(dataset, 'gem', 'faiss-lsh',
                     k=k, ref_size=set_size_median, code_length=code_length, power=4)
    exp.test()
    report = exp.get_exp_report()
    print(report)

loading dataset...
loading cached base embedding...
compute query embedding...


100%|██████████| 10000/10000 [00:00<00:00, 10338.38it/s]


{'dataset': 'point_mnist', 'pooling': 'gem', 'ann': 'faiss-lsh', 'k': 4, 'code_length': 1024, 'power': 4, 'emb_time_per_sample': 0.00012458126544952393, 'inf_time_per_sample': 0.0067163489341735836, 'acc': 0.4459, 'precision_k': 0.393425}
loading dataset...
loading cached base embedding...
compute query embedding...


100%|██████████| 10000/10000 [00:00<00:00, 13317.65it/s]


{'dataset': 'point_mnist', 'pooling': 'gem', 'ann': 'faiss-lsh', 'k': 8, 'code_length': 1024, 'power': 4, 'emb_time_per_sample': 0.00010380027294158935, 'inf_time_per_sample': 0.006785089993476868, 'acc': 0.4712, 'precision_k': 0.3903625}
loading dataset...
loading cached base embedding...
compute query embedding...


100%|██████████| 10000/10000 [00:00<00:00, 12673.06it/s]


{'dataset': 'point_mnist', 'pooling': 'gem', 'ann': 'faiss-lsh', 'k': 16, 'code_length': 1024, 'power': 4, 'emb_time_per_sample': 0.00010741360187530518, 'inf_time_per_sample': 0.006788885045051575, 'acc': 0.4872, 'precision_k': 0.38295625}
