In [1]:
import numpy as np
import faiss

In [2]:
dimension = 64                           # dimension
num_db = 100000                      # database size
num_queries = 10000                       # nb of queries
np.random.seed(1234)             # make reproducible
db_vecs = np.random.random((num_db, dimension)).astype('float32')
db_vecs[:, 0] += np.arange(num_db) / 1000.
query_vecs = np.random.random((num_queries, dimension)).astype('float32')
query_vecs[:, 0] += np.arange(num_queries) / 1000.

In [3]:
index = faiss.IndexFlatL2(dimension)   # build the index
# index = faiss.IndexFlatIP(dimension)
print(index.is_trained)
index.add(db_vecs)                  # add vectors to the index
print(index.ntotal)

True
100000


In [4]:
k = 4                      # we want to see 4 nearest neighbors
distances, indices = index.search(db_vecs[:5], k) # sanity check
print(indices)
print(distances)

distances, indices = index.search(query_vecs, k)     # actual search
print(indices[:5])                   # neighbors of the 5 first queries
print(indices[-5:])                  # neighbors of the 5 last queries

[[  0 393 363  78]
 [  1 555 277 364]
 [  2 304 101  13]
 [  3 173  18 182]
 [  4 288 370 531]]
[[0.        7.1751733 7.207629  7.2511625]
 [0.        6.3235645 6.684581  6.7999454]
 [0.        5.7964087 6.391736  7.2815123]
 [0.        7.2779055 7.5279865 7.6628466]
 [0.        6.7638035 7.2951202 7.3688145]]
[[ 381  207  210  477]
 [ 526  911  142   72]
 [ 838  527 1290  425]
 [ 196  184  164  359]
 [ 526  377  120  425]]
[[ 9900 10500  9309  9831]
 [11055 10895 10812 11321]
 [11353 11103 10164  9787]
 [10571 10664 10632  9638]
 [ 9628  9554 10036  9582]]
