In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import json
from utils.experiment import Experiment

In [2]:
df_train = pd.read_csv('../dataset/pointcloud_mnist_2d/train.csv')

X = df_train[df_train.columns[1:]].to_numpy()
y = df_train[df_train.columns[0]].to_numpy()

X = X.reshape(X.shape[0], -1, 3)

In [3]:
num_points = np.sum((X[:, :, 2] > 0).astype(int), axis=1)

In [4]:
set_size_median = np.min(num_points).astype(int)

In [5]:
dataset = 'point_mnist'
code_length = 1024
ks = [4]
reports = []

###  hash code length = 1024, num_slices = 16

In [6]:
n_slices = 16
ref = 'rand'

for k in ks:
    exp = Experiment(dataset, 'swe', 'faiss-lsh', ref_func=ref, k=k, ref_size=set_size_median, code_length=code_length, num_slices=n_slices)
    exp.test()
    report = exp.get_exp_report()
    print(report)
    reports.append(report)

loading dataset...
loading cached base embedding...
compute query embedding...


100%|██████████| 10000/10000 [00:05<00:00, 1722.89it/s]


{'dataset': 'point_mnist', 'pooling': 'swe', 'ann': 'faiss-lsh', 'k': 4, 'code_length': 1024, 'emb_time_per_sample': 0.0006262514114379883, 'inf_time_per_sample': 0.0003870807886123657, 'acc': 0.9247, 'precision_k': 0.899125}


### WE

In [7]:
dataset = 'point_mnist'
code_length = 1024
ref = 'rand'

for k in ks:
    exp = Experiment(dataset, 'we', 'faiss-lsh', ref_func=ref, k=k, ref_size=set_size_median, code_length=code_length)
    exp.test()
    report = exp.get_exp_report()
    print(report)
    reports.append(report)

loading dataset...
loading cached base embedding...
compute query embedding...


100%|██████████| 10000/10000 [00:10<00:00, 973.91it/s]


{'dataset': 'point_mnist', 'pooling': 'we', 'ann': 'faiss-lsh', 'k': 4, 'code_length': 1024, 'emb_time_per_sample': 0.0010608826875686645, 'inf_time_per_sample': 0.0003666826009750366, 'acc': 0.92, 'precision_k': 0.8942}


### FS

In [8]:
for k in ks:
    exp = Experiment(dataset, 'fs', 'faiss-lsh', ref_func=ref,
                     k=k, ref_size=set_size_median, code_length=code_length)
    exp.test()
    report = exp.get_exp_report()
    print(report)
    reports.append(report)

loading dataset...
loading cached base embedding...
compute query embedding...


100%|██████████| 10000/10000 [00:01<00:00, 5949.09it/s]


{'dataset': 'point_mnist', 'pooling': 'fs', 'ann': 'faiss-lsh', 'k': 4, 'code_length': 1024, 'emb_time_per_sample': 0.00023378560543060303, 'inf_time_per_sample': 0.0003498089075088501, 'acc': 0.7898, 'precision_k': 0.74325}


### Cov

In [9]:
for k in ks:
    exp = Experiment(dataset, 'cov', 'faiss-lsh',
                     k=k, ref_size=set_size_median, code_length=code_length)
    exp.test()
    report = exp.get_exp_report()
    print(report)
    reports.append(report)

loading dataset...
loading cached base embedding...
compute query embedding...


100%|██████████| 10000/10000 [00:00<00:00, 17117.87it/s]


{'dataset': 'point_mnist', 'pooling': 'cov', 'ann': 'faiss-lsh', 'k': 4, 'code_length': 1024, 'emb_time_per_sample': 8.51360321044922e-05, 'inf_time_per_sample': 0.00036351778507232664, 'acc': 0.2649, 'precision_k': 0.247375}


### GeM-1

In [10]:
for k in ks:
    exp = Experiment(dataset, 'gem', 'faiss-lsh',
                     k=k, ref_size=set_size_median, code_length=code_length, power=1)
    exp.test()
    report = exp.get_exp_report()
    print(report)
    reports.append(report)

loading dataset...
loading cached base embedding...
compute query embedding...


100%|██████████| 10000/10000 [00:00<00:00, 38430.92it/s]


{'dataset': 'point_mnist', 'pooling': 'gem-1', 'ann': 'faiss-lsh', 'k': 4, 'code_length': 1024, 'power': 1, 'emb_time_per_sample': 8.306031227111816e-05, 'inf_time_per_sample': 0.00047839798927307126, 'acc': 0.1087, 'precision_k': 0.104325}


### GeM-2

In [11]:
for k in ks:
    exp = Experiment(dataset, 'gem', 'faiss-lsh',
                     k=k, ref_size=set_size_median, code_length=code_length, power=2)
    exp.test()
    report = exp.get_exp_report()
    print(report)
    reports.append(report)

loading dataset...
loading cached base embedding...
compute query embedding...


100%|██████████| 10000/10000 [00:00<00:00, 30696.15it/s]


{'dataset': 'point_mnist', 'pooling': 'gem-2', 'ann': 'faiss-lsh', 'k': 4, 'code_length': 1024, 'power': 2, 'emb_time_per_sample': 6.64353609085083e-05, 'inf_time_per_sample': 0.0003809842348098755, 'acc': 0.3215, 'precision_k': 0.28705}


### GeM-4

In [12]:
for k in ks:
    exp = Experiment(dataset, 'gem', 'faiss-lsh',
                     k=k, ref_size=set_size_median, code_length=code_length, power=4)
    exp.test()
    report = exp.get_exp_report()
    print(report)
    reports.append(report)

loading dataset...
loading cached base embedding...
compute query embedding...


100%|██████████| 10000/10000 [00:00<00:00, 15045.94it/s]


{'dataset': 'point_mnist', 'pooling': 'gem-4', 'ann': 'faiss-lsh', 'k': 4, 'code_length': 1024, 'power': 4, 'emb_time_per_sample': 0.00014939179420471192, 'inf_time_per_sample': 0.000393113112449646, 'acc': 0.446, 'precision_k': 0.393575}


In [13]:
import altair as alt

In [14]:
data = pd.DataFrame(reports)

In [15]:
bar = alt.Chart(data).mark_point().encode(
    alt.X('emb_time_per_sample:Q'),
    alt.Y('acc:Q', title='acc'),
    color='pooling:N',
)

In [16]:
bar