In [1]:
import faiss
import numpy as np
from sklearn.neighbors import NearestNeighbors

In [2]:
embeddings=np.load('output/embeddings.npy')
embeddings.shape

(30400, 2048)

In [3]:
query_vector = embeddings[40:41]
query_vector.shape

(1, 2048)

### Linear search using numpy

In [18]:
%%time
k=20
# compute distances
distances = np.linalg.norm(embeddings - query_vector, axis = 1)
# select indices of vectors having the lowest distances from the query vector (sorted!)
neighbors = np.argpartition(distances, range(0, k))[:k]

CPU times: user 53.2 ms, sys: 96.7 ms, total: 150 ms
Wall time: 149 ms


In [19]:
neighbors

array([   40,    30, 22512, 23212, 22446, 24105, 22527, 22575, 22525,
          34, 22515, 22555, 17436, 22504, 16864, 29565, 14535, 14549,
       17016, 22518])

### Search with scikit-learn

In [7]:
# set desired number of neighbors
k=20
neigh = NearestNeighbors(n_neighbors=k)
neigh.fit(embeddings)
# select indices of k nearest neighbors of the vectors in the input list

NearestNeighbors(n_neighbors=20)

In [8]:
%%time
k_neighbors = neigh.kneighbors(query_vector,20, return_distance = False)

CPU times: user 1.5 s, sys: 5.47 s, total: 6.97 s
Wall time: 151 ms


In [9]:
np.in1d(neighbors,k_neighbors[0])

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True])

### FAISSS - Inverted File

In [10]:
%%time
dim = 2048
index = faiss.index_factory(dim, "Flat")
index.train(embeddings)
index.add(embeddings)
factory_distances, factory_neighbors = index.search(query_vector.reshape(1,-1).astype(np.float32), k)

CPU times: user 96.3 ms, sys: 82.1 ms, total: 178 ms
Wall time: 178 ms


In [11]:
np.in1d(neighbors,factory_neighbors)

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True])

### FAISS - HNSW

In [12]:
conn_vertex = 32 #Connections for each vertex
dim = 2048  # dimensionality of output features
ef_search = 32
ef_construction = 64

In [13]:
index = faiss.IndexHNSWFlat(dim,conn_vertex)

#Depth of search during build
index.hnsw.efSearch =  ef_search

#Depth of search during search
index.hnsw.efConstruction =  ef_construction

index.add(embeddings)

In [14]:
%%time
#Number of nearest neighbors to return
n = 20
hnsw_distance, hnsw_neighbors = index.search(query_vector,n)

CPU times: user 840 ms, sys: 6.75 ms, total: 847 ms
Wall time: 9.89 ms


In [15]:
np.in1d(neighbors,hnsw_neighbors)

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True])

### Check filenames

In [16]:
with open('output/filenames.txt', 'r') as f:
    filenames = eval(f.readline())

In [17]:
for i in neighbors:
    print(filenames[i])

airplane041.jpg
airplane031.jpg
runway113.jpg
runwaymarking013.jpg
runway047.jpg
shippingyard106.jpg
runway128.jpg
runway176.jpg
runway126.jpg
airplane035.jpg
runway116.jpg
runway156.jpg
oilgasfield637.jpg
runway105.jpg
oilgasfield065.jpg
transformerstation766.jpg
intersection136.jpg
intersection150.jpg
oilgasfield217.jpg
runway119.jpg
