In [1]:
import time
import numpy as np
from snnpy import *
from sklearn.neighbors import BallTree
from sklearn.neighbors import NearestNeighbors
from sklearn.neighbors import KDTree
from bf_search import *

In [2]:
def bvecs_read(fname):
    a = np.fromfile(fname, dtype=np.int32, count=1)
    b = np.fromfile(fname, dtype=np.uint8)
    d = a[0]
    return b.reshape(-1, d + 4)[:, 4:].copy()


def ivecs_read(fname):
    a = np.fromfile(fname, dtype='int32')
    d = a[0]
    return a.reshape(-1, d + 1)[:, 1:].copy()


def fvecs_read2(fname):
    return ivecs_read(fname).view('float32')

### fashion mnist

In [3]:
fmn_train = np.load("fashion_mnist/train.npy")
fmn_query = np.load("fashion_mnist/queries.npy")

mu = fmn_train.mean(axis=0)
scl = fmn_train.std(axis=0)
fmn_train = (fmn_train - mu) / scl
fmn_query = (fmn_query - mu) / scl

fmn_kdtree_index_timing = list()
fmn_balltree_index_timing = list()
fmn_sn_index_timing = list()

fmn_bf_run_timing1 = list()
fmn_bf_run_timing2 = list()
fmn_kdtree_run_timing = list()
fmn_balltree_run_timing = list()
fmn_sn_run_timing = list()


#fmn_bf_nn_num1 = list()
#fmn_bf_nn_num2 = list()
#fmn_kdtree_nn_num = list()
#fmn_balltree_nn_num = list()
#fmn_sn_nn_num = list()

In [4]:
radius = [10, 12, 14, 16, 18] # test various radius

In [5]:
# Brute force 1
for R in radius:
    st = time.time()
    neigh = NearestNeighbors(radius=R, algorithm='brute')
    neigh.fit(fmn_train)
    for j in range(fmn_query.shape[0]): 
        ind = neigh.radius_neighbors(
           fmn_query[j:j+1], radius=R, return_distance=False
        )
    et = time.time() - st
    #print("Return: ", np.mean(fmn_bf_nn_num1), " neighbors")
    fmn_bf_run_timing1.append(et)
    print("brute force 1 query time:", et)
print()

# Brute force 2
for R in radius:
    st = time.time()
    for j in range(fmn_query.shape[0]): 
        ind = bf_radius_fairness(fmn_query[j], fmn_train, R, return_distance=False)
    et = time.time() - st
    #print("Return: ", np.mean(fmn_bf_nn_num2), " neighbors")
    fmn_bf_run_timing2.append(et)
    print("brute force 2 query time:", et)
print()


    
# KDtree
st = time.time()
tree = KDTree(fmn_train)   
et = time.time() - st
fmn_kdtree_index_timing.append(et)
print("kdtree index time:", et)
    
for R in radius:
    st = time.time()
    for j in range(fmn_query.shape[0]): 
        ind = tree.query_radius(fmn_query[j:j+1], r=R, return_distance=False)
    et = time.time() - st
    #print("Return: ", np.mean(fmn_kdtree_nn_num), " neighbors")
    fmn_kdtree_run_timing.append(et)
    # print(ind[0][np.argsort(dist[0])][:3], '\n', dist[0][np.argsort(dist[0])][:3])
    print("kdtree query time:", et)
print()


# Balltree
st = time.time()
tree = BallTree(fmn_train)   
et = time.time() - st
fmn_balltree_index_timing.append(et)
print("ball tree index time:", et)

for R in radius:
    st = time.time()
    for j in range(fmn_query.shape[0]): 
        ind = tree.query_radius(fmn_query[j:j+1], r=R, return_distance=False)
    et = time.time() - st
    #print("Return: ",  np.mean(fmn_balltree_nn_num), " neighbors")
    fmn_balltree_run_timing.append(et)
    # print(ind[0][np.argsort(dist[0])][:3], '\n', dist[0][np.argsort(dist[0])][:3])
    print("ball tree query time:", et)
print()

# SNN
st = time.time()
snn = build_snn_model(fmn_train)
et = time.time() - st
fmn_sn_index_timing.append(et)
print("snn index time:", et)


for R in radius:
    st = time.time()
    for j in range(fmn_query.shape[0]): 
        ind = snn.radius_single_query(fmn_query[j], R)
    et = time.time() - st
    #print("Return: ", np.mean(fmn_sn_nn_num), " neighbors")
    fmn_sn_run_timing.append(et)
    print("snn query time:", et)

brute force 1 query time: 1937.1840212345123
brute force 1 query time: 1920.8855392932892
brute force 1 query time: 1920.8190817832947
brute force 1 query time: 1919.7405717372894
brute force 1 query time: 1919.4307796955109

brute force 2 query time: 500.7676362991333
brute force 2 query time: 500.928995847702
brute force 2 query time: 500.9469265937805
brute force 2 query time: 500.91677927970886
brute force 2 query time: 501.09615755081177

kdtree index time: 7.5076985359191895
kdtree query time: 1327.5464997291565
kdtree query time: 1341.8920352458954
kdtree query time: 1353.900962114334
kdtree query time: 1364.1208710670471
kdtree query time: 1370.7692666053772

ball tree index time: 5.743338108062744
ball tree query time: 904.4928455352783
ball tree query time: 905.2608087062836
ball tree query time: 905.8919825553894
ball tree query time: 906.6556673049927
ball tree query time: 907.5693707466125

snn index time: 1.132098913192749
snn query time: 78.17794728279114
snn query time:

In [6]:
with open('result/real_world/fmn_kdtree_index_timing_norm.npy', 'wb') as f:
    np.save(f, np.array(fmn_kdtree_index_timing))

with open('result/real_world/fmn_balltree_index_timing_norm.npy', 'wb') as f:
    np.save(f, np.array(fmn_balltree_index_timing))

with open('result/real_world/fmn_sn_index_timing_norm.npy', 'wb') as f:
    np.save(f, np.array(fmn_sn_index_timing))

with open('result/real_world/fmn_bf_run_timing1_norm.npy', 'wb') as f:
    np.save(f, np.array(fmn_bf_run_timing1))
    
with open('result/real_world/fmn_bf_run_timing2_norm.npy', 'wb') as f:
    np.save(f, np.array(fmn_bf_run_timing2))
    
with open('result/real_world/fmn_kdtree_run_timing_norm.npy', 'wb') as f:
    np.save(f, np.array(fmn_kdtree_run_timing))

with open('result/real_world/fmn_balltree_run_timing_norm.npy', 'wb') as f:
    np.save(f, np.array(fmn_balltree_run_timing))

with open('result/real_world/fmn_sn_run_timing_norm.npy', 'wb') as f:
    np.save(f, np.array(fmn_sn_run_timing))
    


### siftsmall

In [7]:
sift_train = fvecs_read2('siftsmall/siftsmall_learn.fvecs')
sift_query = fvecs_read2('siftsmall/siftsmall_query.fvecs')

mu = sift_train.mean(axis=0)
scl = sift_train.std(axis=0)
sift_train = (sift_train - mu) / scl
sift_query = (sift_query - mu) / scl

sift_kdtree_index_timing = list()
sift_balltree_index_timing = list()
sift_sn_index_timing = list()

sift_bf_run_timing1 = list()
sift_bf_run_timing2 = list()
sift_kdtree_run_timing = list()
sift_balltree_run_timing = list()
sift_sn_run_timing = list()

# sift_bf_nn_num1 = list()
# sift_bf_nn_num2 = list()
# sift_kdtree_nn_num = list()
# sift_balltree_nn_num = list()
# sift_sn_nn_num = list()

In [8]:
radius = [10, 12, 14, 16, 18]  # test various radius

In [9]:
# Brute force 1
for R in radius:
    st = time.time()
    neigh = NearestNeighbors(radius=R, algorithm='brute')
    neigh.fit(sift_train)
    for j in range(sift_query.shape[0]): 
        ind = neigh.radius_neighbors(
           sift_query[j:j+1], radius=R, return_distance=False
        )
    et = time.time() - st
    # print("Return: ", np.mean(sift_bf_nn_num1), " neighbors")
    sift_bf_run_timing1.append(et)
    print("brute force 1 query time:", et)
print()
    

# Brute force 2
for R in radius:
    st = time.time()
    for j in range(sift_query.shape[0]): 
        ind = bf_radius_fairness(sift_query[j], sift_train, R, return_distance=False)
    et = time.time() - st
    # print("Return: ", np.mean(sift_bf_nn_num2), " neighbors")
    sift_bf_run_timing2.append(et)
    print("brute force 2 query time:", et)
print()
    
    
# KDtree
st = time.time()
tree = KDTree(sift_train)   
et = time.time() - st
sift_kdtree_index_timing.append(et)
print("kdtree build time:", et)
for R in radius:
    st = time.time()
    for j in range(sift_query.shape[0]):
        ind = tree.query_radius(sift_query[j:j+1], r=R, return_distance=False)
    et = time.time() - st
    # print("Return: ", np.mean(sift_kdtree_nn_num), " neighbors")
    sift_kdtree_run_timing.append(et)
    # print(ind[0][np.argsort(dist[0])][:3], '\n', dist[0][np.argsort(dist[0])][:3])
    print("kdtree query time:", et)
print()


# Balltree
st = time.time()
tree = BallTree(sift_train)   
et = time.time() - st
sift_balltree_index_timing.append(et)
print("balltree build time:", et)

for R in radius:
    st = time.time()
    for j in range(sift_query.shape[0]):
        ind = tree.query_radius(sift_query[j:j+1], r=R, return_distance=False)
    et = time.time() - st
    # print("Return: ", np.mean(sift_balltree_nn_num), " neighbors")
    sift_balltree_run_timing.append(et)
    # print(ind[0][np.argsort(dist[0])][:3], '\n', dist[0][np.argsort(dist[0])][:3])
    print("balltree query time:", et)

print()


# SNN
st = time.time()
snn = build_snn_model(sift_train)
et = time.time() - st
sift_sn_index_timing.append(et)
print("snn build time:", et)


for R in radius:
    st = time.time()
    for j in range(sift_query.shape[0]):
        ind = snn.radius_single_query(sift_query[j], R)
    et = time.time() - st
    # print("Return: ", np.mean(sift_sn_nn_num), " neighbors")
    sift_sn_run_timing.append(et)
    # print(ind[np.argsort(dist)][:3], '\n', dist[np.argsort(dist)][:3])
    print("snn query time:", et)

brute force 1 query time: 1.0328214168548584
brute force 1 query time: 1.0347516536712646
brute force 1 query time: 1.046635627746582
brute force 1 query time: 1.0552589893341064
brute force 1 query time: 1.060554027557373

brute force 2 query time: 0.2132854461669922
brute force 2 query time: 0.21696233749389648
brute force 2 query time: 0.22576475143432617
brute force 2 query time: 0.24015021324157715
brute force 2 query time: 0.2154543399810791

kdtree build time: 0.46120142936706543
kdtree query time: 1.3674664497375488
kdtree query time: 1.3628807067871094
kdtree query time: 1.366441249847412
kdtree query time: 1.3714039325714111
kdtree query time: 1.3713347911834717

balltree build time: 0.3671071529388428
balltree query time: 0.9311270713806152
balltree query time: 0.9326043128967285
balltree query time: 0.9357585906982422
balltree query time: 0.9386839866638184
balltree query time: 0.9414076805114746

snn build time: 0.03509688377380371
snn query time: 0.07893776893615723
snn q

In [10]:
with open('result/real_world/siftsmall_kdtree_index_timing_norm.npy', 'wb') as f:
    np.save(f, np.array(sift_kdtree_index_timing))

with open('result/real_world/siftsmall_balltree_index_timing_norm.npy', 'wb') as f:
    np.save(f, np.array(sift_balltree_index_timing))

with open('result/real_world/siftsmall_sn_index_timing_norm.npy', 'wb') as f:
    np.save(f, np.array(sift_sn_index_timing))
    
with open('result/real_world/siftsmall_bf_run_timing1_norm.npy', 'wb') as f:
    np.save(f, np.array(sift_bf_run_timing1))

with open('result/real_world/siftsmall_bf_run_timing2_norm.npy', 'wb') as f:
    np.save(f, np.array(sift_bf_run_timing2))
    
with open('result/real_world/siftsmall_kdtree_run_timing_norm.npy', 'wb') as f:
    np.save(f, np.array(sift_kdtree_run_timing))

with open('result/real_world/siftsmall_balltree_run_timing_norm.npy', 'wb') as f:
    np.save(f, np.array(sift_balltree_run_timing))

with open('result/real_world/siftsmall_sn_run_timing_norm.npy', 'wb') as f:
    np.save(f, np.array(sift_sn_run_timing))
    

#### sift

In [11]:
sift_train = fvecs_read2('sift/sift_learn.fvecs')
sift_query = fvecs_read2('sift/sift_query.fvecs')

mu = sift_train.mean(axis=0)
scl = sift_train.std(axis=0)
sift_train = (sift_train - mu) / scl
sift_query = (sift_query - mu) / scl

sift_kdtree_index_timing = list()
sift_balltree_index_timing = list()
sift_sn_index_timing = list()

sift_bf_run_timing1 = list()
sift_bf_run_timing2 = list()
sift_kdtree_run_timing = list()
sift_balltree_run_timing = list()
sift_sn_run_timing = list()

# sift_bf_nn_num1 = list()
# sift_bf_nn_num2 = list()
# sift_kdtree_nn_num = list()
# sift_balltree_nn_num = list()
# sift_sn_nn_num = list()

In [12]:
radius = [10, 12, 14, 16, 18] # test various radius

In [13]:
# Brute force 1
for R in radius:
    st = time.time()
    neigh = NearestNeighbors(radius=R, algorithm='brute')
    neigh.fit(sift_train)
    for j in range(sift_query.shape[0]): 
        ind = neigh.radius_neighbors(
           sift_query[j:j+1], radius=R, return_distance=True
        )
    et = time.time() - st
    # print("Return: ", np.mean(sift_bf_nn_num1), " neighbors")
    sift_bf_run_timing1.append(et)
    print("brute force 1 query time:", et)
print()
    

# Brute force 2
for R in radius:
    st = time.time()
    for j in range(sift_query.shape[0]): 
        ind = bf_radius_fairness(sift_query[j], sift_train, R, return_distance=False)
    et = time.time() - st
    # print("Return: ", np.mean(sift_bf_nn_num2), " neighbors")
    sift_bf_run_timing2.append(et)
    print("brute force 2 query time:", et)
print()
    
    
# KDtree
st = time.time()
tree = KDTree(sift_train)   
et = time.time() - st
sift_kdtree_index_timing.append(et)
print("kdtree build time:", et)
for R in radius:
    st = time.time()
    for j in range(sift_query.shape[0]):
        ind = tree.query_radius(sift_query[j:j+1], r=R, return_distance=False)
    et = time.time() - st
    # print("Return: ", np.mean(sift_kdtree_nn_num), " neighbors")
    sift_kdtree_run_timing.append(et)
    # print(ind[0][np.argsort(dist[0])][:3], '\n', dist[0][np.argsort(dist[0])][:3])
    print("kdtree query time:", et)
print()

# Balltree
st = time.time()
tree = BallTree(sift_train)   
et = time.time() - st
sift_balltree_index_timing.append(et)
print("balltree build time:", et)

for R in radius:
    st = time.time()
    for j in range(sift_query.shape[0]):
        ind = tree.query_radius(sift_query[j:j+1], r=R, return_distance=False)
    et = time.time() - st
    # print("Return: ", np.mean(sift_balltree_nn_num), " neighbors")
    sift_balltree_run_timing.append(et)
    # print(ind[0][np.argsort(dist[0])][:3], '\n', dist[0][np.argsort(dist[0])][:3])
    print("balltree query time:", et)

print()


# SNN
st = time.time()
snn = build_snn_model(sift_train)
et = time.time() - st
sift_sn_index_timing.append(et)
print("snn build time:", et)


for R in radius:
    st = time.time()
    for j in range(sift_query.shape[0]):
        ind = snn.radius_single_query(sift_query[j], R)
    et = time.time() - st
    # print("Return: ", np.mean(sift_sn_nn_num), " neighbors")
    sift_sn_run_timing.append(et)
    # print(ind[np.argsort(dist)][:3], '\n', dist[np.argsort(dist)][:3])
    print("snn query time:", et)

brute force 1 query time: 456.90143728256226
brute force 1 query time: 458.4933137893677
brute force 1 query time: 461.2143409252167
brute force 1 query time: 466.2012288570404
brute force 1 query time: 466.70664620399475

brute force 2 query time: 153.15934443473816
brute force 2 query time: 154.18523979187012
brute force 2 query time: 156.49867725372314
brute force 2 query time: 158.1924228668213
brute force 2 query time: 155.48067259788513

kdtree build time: 2.7932474613189697
kdtree query time: 583.7876822948456
kdtree query time: 585.2774837017059
kdtree query time: 587.1097865104675
kdtree query time: 588.4481356143951
kdtree query time: 589.4487428665161

balltree build time: 2.2919700145721436
balltree query time: 401.4681761264801
balltree query time: 402.1432297229767
balltree query time: 404.55952882766724
balltree query time: 406.3749725818634
balltree query time: 407.92165541648865

snn build time: 0.157975435256958
snn query time: 52.88389468193054
snn query time: 70.722

In [14]:
with open('result/real_world/sift_kdtree_index_timing_norm.npy', 'wb') as f:
    np.save(f, np.array(sift_kdtree_index_timing))

with open('result/real_world/sift_balltree_index_timing_norm.npy', 'wb') as f:
    np.save(f, np.array(sift_balltree_index_timing))

with open('result/real_world/sift_sn_index_timing_norm.npy', 'wb') as f:
    np.save(f, np.array(sift_sn_index_timing))
    
with open('result/real_world/sift_bf_run_timing1_norm.npy', 'wb') as f:
    np.save(f, np.array(sift_bf_run_timing1))

with open('result/real_world/sift_bf_run_timing2_norm.npy', 'wb') as f:
    np.save(f, np.array(sift_bf_run_timing2))
    
with open('result/real_world/sift_kdtree_run_timing_norm.npy', 'wb') as f:
    np.save(f, np.array(sift_kdtree_run_timing))

with open('result/real_world/sift_balltree_run_timing_norm.npy', 'wb') as f:
    np.save(f, np.array(sift_balltree_run_timing))

with open('result/real_world/sift_sn_run_timing_norm.npy', 'wb') as f:
    np.save(f, np.array(sift_sn_run_timing))
    

### gist

In [15]:
gist_train = np.load("gist/train.npy")
gist_query = np.load("gist/queries.npy")

mu = gist_train.mean(axis=0)
scl = gist_train.std(axis=0)
gist_train = (gist_train - mu) / scl
gist_query = (gist_query - mu) / scl

gist_kdtree_index_timing = list()
gist_balltree_index_timing = list()
gist_sn_index_timing = list()

gist_bf_run_timing1 = list()
gist_bf_run_timing2 = list()
gist_kdtree_run_timing = list()
gist_balltree_run_timing = list()
gist_sn_run_timing = list()

# gist_bf_nn_num1 = list()
# gist_bf_nn_num2 = list()
# gist_kdtree_nn_num = list()
# gist_balltree_nn_num = list()
# gist_sn_nn_num = list()

In [16]:
radius = [10, 12, 14, 16, 18] # test various radius

In [17]:
# Brute force 1
for R in radius:
    st = time.time()
    neigh = NearestNeighbors(radius=R, algorithm='brute')
    neigh.fit(gist_train)
    for j in range(gist_query.shape[0]): 
        ind = neigh.radius_neighbors(
           gist_query[j:j+1], radius=R, return_distance=True
        )
    et = time.time() - st
    # print("Return: ", np.mean(gist_bf_nn_num1), " neighbors")
    gist_bf_run_timing1.append(et)
    print("brute force 1 query time:", et)
print()
    

# Brute force 2
for R in radius:
    st = time.time()
    for j in range(gist_query.shape[0]): 
        ind = bf_radius_fairness(gist_query[j], gist_train, R, return_distance=False)
    et = time.time() - st
    # print("Return: ", np.mean(gist_bf_nn_num2), " neighbors")
    gist_bf_run_timing2.append(et)
    print("brute force 2 query time:", et)
print()
    
    
# KDtree
st = time.time()
tree = KDTree(gist_train)   
et = time.time() - st
gist_kdtree_index_timing.append(et)
print("kdtree build time:", et)
for R in radius:
    st = time.time()
    for j in range(gist_query.shape[0]):
        ind = tree.query_radius(gist_query[j:j+1], r=R, return_distance=False)
    et = time.time() - st
    # print("Return: ", np.mean(gist_kdtree_nn_num), " neighbors")
    gist_kdtree_run_timing.append(et)
    # print(ind[0][np.argsort(dist[0])][:3], '\n', dist[0][np.argsort(dist[0])][:3])
    print("kdtree query time:", et)
print()


# Balltree
st = time.time()
tree = BallTree(gist_train)   
et = time.time() - st
gist_balltree_index_timing.append(et)
print("balltree build time:", et)

for R in radius:
    st = time.time()
    for j in range(gist_query.shape[0]):
        ind = tree.query_radius(gist_query[j:j+1], r=R, return_distance=False)
    et = time.time() - st
    # print("Return: ", np.mean(gist_balltree_nn_num), " neighbors")
    gist_balltree_run_timing.append(et)
    # print(ind[0][np.argsort(dist[0])][:3], '\n', dist[0][np.argsort(dist[0])][:3])
    print("balltree query time:", et)

print()


# SNN
st = time.time()
snn = build_snn_model(gist_train)
et = time.time() - st
gist_sn_index_timing.append(et)
print("snn build time:", et)


for R in radius:
    st = time.time()
    for j in range(gist_query.shape[0]):
        ind = snn.radius_single_query(gist_query[j], R)
    et = time.time() - st
    # print("Return: ", np.mean(gist_sn_nn_num), " neighbors")
    gist_sn_run_timing.append(et)
    # print(ind[np.argsort(dist)][:3], '\n', dist[np.argsort(dist)][:3])
    print("snn query time:", et)

brute force 1 query time: 6132.811296701431
brute force 1 query time: 6124.0621790885925
brute force 1 query time: 6121.540109872818
brute force 1 query time: 6123.257114648819
brute force 1 query time: 6130.417793273926

brute force 2 query time: 1052.6667928695679
brute force 2 query time: 1050.1799709796906
brute force 2 query time: 1047.2780179977417
brute force 2 query time: 1047.074282169342
brute force 2 query time: 1048.5210845470428

kdtree build time: 313.92735743522644
kdtree query time: 2484.5537118911743
kdtree query time: 2641.9785969257355
kdtree query time: 2746.655764579773
kdtree query time: 2819.3269991874695
kdtree query time: 2870.023309469223

balltree build time: 267.6973166465759
balltree query time: 1879.7257385253906
balltree query time: 1905.5452105998993
balltree query time: 1924.0615212917328
balltree query time: 1940.7102136611938
balltree query time: 1951.3301141262054

snn build time: 25.207237482070923
snn query time: 165.57272839546204
snn query time: 

In [18]:
with open('result/real_world/gist_kdtree_index_timing_norm.npy', 'wb') as f:
    np.save(f, np.array(gist_kdtree_index_timing))

with open('result/real_world/gist_balltree_index_timing_norm.npy', 'wb') as f:
    np.save(f, np.array(gist_balltree_index_timing))

with open('result/real_world/gist_sn_index_timing_norm.npy', 'wb') as f:
    np.save(f, np.array(gist_sn_index_timing))
    
with open('result/real_world/gist_bf_run_timing1_norm.npy', 'wb') as f:
    np.save(f, np.array(gist_bf_run_timing1))

with open('result/real_world/gist_bf_run_timing2_norm.npy', 'wb') as f:
    np.save(f, np.array(gist_bf_run_timing2))
    
with open('result/real_world/gist_kdtree_run_timing_norm.npy', 'wb') as f:
    np.save(f, np.array(gist_kdtree_run_timing))

with open('result/real_world/gist_balltree_run_timing_norm.npy', 'wb') as f:
    np.save(f, np.array(gist_balltree_run_timing))

with open('result/real_world/gist_sn_run_timing_norm.npy', 'wb') as f:
    np.save(f, np.array(gist_sn_run_timing))
    
    