In [1]:
import time
import math
import numpy as np
from snnpy import *
from sklearn.neighbors import BallTree
from sklearn.neighbors import NearestNeighbors
from sklearn.neighbors import KDTree
from bf_search import *

In [2]:
def bvecs_read(fname):
    a = np.fromfile(fname, dtype=np.int32, count=1)
    b = np.fromfile(fname, dtype=np.uint8)
    d = a[0]
    return b.reshape(-1, d + 4)[:, 4:].copy()


def ivecs_read(fname):
    a = np.fromfile(fname, dtype='int32')
    d = a[0]
    return a.reshape(-1, d + 1)[:, 1:].copy()


def fvecs_read2(fname):
    return ivecs_read(fname).view('float32')


def sigificant_digit(number, digits=2):
    return round(number, digits - int(math.floor(math.log10(abs(number)))) - 1)


### fashion mnist

In [3]:
fmn_train = np.load("fashion_mnist/train.npy")
fmn_query = np.load("fashion_mnist/queries.npy")

mu = fmn_train.mean(axis=0)
scl = fmn_train.std(axis=0)
fmn_train = (fmn_train - mu) / scl
fmn_query = (fmn_query - mu) / scl

fmn_kdtree_index_timing = list()
fmn_balltree_index_timing = list()
fmn_sn_index_timing = list()

fmn_bf_run_timing1 = list()
fmn_bf_run_timing2 = list()
fmn_kdtree_run_timing = list()
fmn_balltree_run_timing = list()
fmn_sn_run_timing = list()


In [4]:
radius =  [10, 12, 14, 16, 18] # test various radius

In [5]:
# SNN
st = time.time()
snn = build_snn_model(fmn_train)
et = time.time() - st
fmn_sn_index_timing.append(et)
print("snn index time:", et)


for R in radius:
    fmn_sn_nn_num = list()
    st = time.time()
    for j in range(fmn_query.shape[0]): 
        ind = snn.radius_single_query(fmn_query[j], R)
        fmn_sn_nn_num.append(len(ind))
    et = time.time() - st
    print("Return: ", sigificant_digit(np.mean(fmn_sn_nn_num)), " neighbors")
    fmn_sn_run_timing.append(et)
    print("snn query time:", et)

snn index time: 1.1902434825897217
Return:  9.3  neighbors
snn query time: 81.41821265220642
Return:  39.0  neighbors
snn query time: 96.53143572807312
Return:  130.0  neighbors
snn query time: 110.18136858940125
Return:  340.0  neighbors
snn query time: 121.65405797958374
Return:  800.0  neighbors
snn query time: 133.60131239891052


### siftsmall

In [6]:
sift_train = fvecs_read2('siftsmall/siftsmall_learn.fvecs')
sift_query = fvecs_read2('siftsmall/siftsmall_query.fvecs')

mu = sift_train.mean(axis=0)
scl = sift_train.std(axis=0)
sift_train = (sift_train - mu) / scl
sift_query = (sift_query - mu) / scl

sift_kdtree_index_timing = list()
sift_balltree_index_timing = list()
sift_sn_index_timing = list()

sift_bf_run_timing1 = list()
sift_bf_run_timing2 = list()
sift_kdtree_run_timing = list()
sift_balltree_run_timing = list()
sift_sn_run_timing = list()

sift_bf_nn_num1 = list()
sift_bf_nn_num2 = list()
sift_kdtree_nn_num = list()
sift_balltree_nn_num = list()
sift_sn_nn_num = list()

In [7]:
radius = [10, 12, 14, 16, 18] # test various radius

In [8]:
# SNN
st = time.time()
snn = build_snn_model(sift_train)
et = time.time() - st
sift_sn_index_timing.append(et)
print("snn build time:", et)


for R in radius:
    sift_sn_nn_num  = list()
    st = time.time()
    for j in range(sift_query.shape[0]):
        ind = snn.radius_single_query(sift_query[j], R)
        sift_sn_nn_num.append(len(ind))
    et = time.time() - st
    print("Return: ", sigificant_digit(np.mean(sift_sn_nn_num)), " neighbors")
    sift_sn_run_timing.append(et)
    # print(ind[np.argsort(dist)][:3], '\n', dist[np.argsort(dist)][:3])
    print("snn query time:", et)

snn build time: 0.03547072410583496
Return:  350.0  neighbors
snn query time: 0.08532047271728516
Return:  2000.0  neighbors
snn query time: 0.11270689964294434
Return:  6100.0  neighbors
snn query time: 0.12796497344970703
Return:  11000.0  neighbors
snn query time: 0.13000798225402832
Return:  20000.0  neighbors
snn query time: 0.13477563858032227


#### sift

In [9]:
sift_train = fvecs_read2('sift/sift_learn.fvecs')
sift_query = fvecs_read2('sift/sift_query.fvecs')

mu = sift_train.mean(axis=0)
scl = sift_train.std(axis=0)
sift_train = (sift_train - mu) / scl
sift_query = (sift_query - mu) / scl

sift_kdtree_index_timing = list()
sift_balltree_index_timing = list()
sift_sn_index_timing = list()

sift_bf_run_timing1 = list()
sift_bf_run_timing2 = list()
sift_kdtree_run_timing = list()
sift_balltree_run_timing = list()
sift_sn_run_timing = list()

sift_bf_nn_num1 = list()
sift_bf_nn_num2 = list()
sift_kdtree_nn_num = list()
sift_balltree_nn_num = list()
sift_sn_nn_num = list()

In [10]:
radius = [10, 12, 14, 16, 18] # test various radius

In [11]:



# SNN
st = time.time()
snn = build_snn_model(sift_train)
et = time.time() - st
sift_sn_index_timing.append(et)
print("snn build time:", et)


for R in radius:
    sift_sn_nn_num = list()
    st = time.time()
    for j in range(sift_query.shape[0]):
        ind = snn.radius_single_query(sift_query[j], R)
        sift_sn_nn_num.append(len(ind))
    et = time.time() - st
    print("Return: ", sigificant_digit(np.mean(sift_sn_nn_num)), " neighbors")
    sift_sn_run_timing.append(et)
    # print(ind[np.argsort(dist)][:3], '\n', dist[np.argsort(dist)][:3])
    print("snn query time:", et)

snn build time: 0.17121434211730957
Return:  1300.0  neighbors
snn query time: 58.20711398124695
Return:  7700.0  neighbors
snn query time: 80.18133807182312
Return:  24000.0  neighbors
snn query time: 91.30198049545288
Return:  45000.0  neighbors
snn query time: 92.79111409187317
Return:  82000.0  neighbors
snn query time: 93.82447719573975


### gist

In [12]:
gist_train = np.load("gist/train.npy")
gist_query = np.load("gist/queries.npy")

mu = gist_train.mean(axis=0)
scl = gist_train.std(axis=0)
gist_train = (gist_train - mu) / scl
gist_query = (gist_query - mu) / scl

gist_kdtree_index_timing = list()
gist_balltree_index_timing = list()
gist_sn_index_timing = list()

gist_bf_run_timing1 = list()
gist_bf_run_timing2 = list()
gist_kdtree_run_timing = list()
gist_balltree_run_timing = list()
gist_sn_run_timing = list()

gist_bf_nn_num1 = list()
gist_bf_nn_num2 = list()
gist_kdtree_nn_num = list()
gist_balltree_nn_num = list()
gist_sn_nn_num = list()

In [13]:
radius = [10, 12, 14, 16, 18] # test various radius

In [14]:
# SNN
st = time.time()
snn = build_snn_model(gist_train)
et = time.time() - st
gist_sn_index_timing.append(et)
print("snn build time:", et)


for R in radius:
    gist_sn_nn_num = list()
    st = time.time()
    for j in range(gist_query.shape[0]):
        ind = snn.radius_single_query(gist_query[j], R)
        gist_sn_nn_num.append(len(ind))
    et = time.time() - st
    print("Return: ", sigificant_digit(np.mean(gist_sn_nn_num)), " neighbors")
    gist_sn_run_timing.append(et)
    # print(ind[np.argsort(dist)][:3], '\n', dist[np.argsort(dist)][:3])
    print("snn query time:", et)

snn build time: 25.67221212387085
Return:  42.0  neighbors
snn query time: 167.64955687522888
Return:  180.0  neighbors
snn query time: 196.65022659301758
Return:  470.0  neighbors
snn query time: 223.76151728630066
Return:  930.0  neighbors
snn query time: 248.23459839820862
Return:  1700.0  neighbors
snn query time: 271.54558396339417
