In [2]:
import numpy as np
# noinspection PyPackageRequirements
import faiss

d = 64                           # dimension
nb = 100000                      # database size
nq = 10000                       # nb of queries
np.random.seed(1234)             # make reproducible
xb = np.random.random((nb, d)).astype('float32')
xb[:, 0] += np.arange(nb) / 1000.

In [4]:
xq = np.random.random((nq, d)).astype('float32')
xq[:, 0] += np.arange(nq) / 1000.

In [3]:
index = faiss.IndexFlatL2(d)   # build the index
print(index.is_trained)
index.add(xb)                  # add vectors to the index
print(index.ntotal)

True
100000


In [5]:
k = 4                          # we want to see 4 nearest neighbors
D, I = index.search(xq, k)     # actual search
print(I[:5])                   # neighbors of the 5 first queries
print(D[-5:])                  # neighbors of the 5 last queries

[[1053   78 1287 1077]
 [ 186  196  227  425]
 [ 630  541  153  238]
 [ 474 1099  457  772]
 [ 795  937  609  473]]
[[6.380005  6.694702  6.7855988 6.8912964]
 [6.757477  6.8468323 6.853668  6.8719177]
 [6.1107635 6.1976013 6.2461853 6.4429474]
 [5.5250244 5.718109  6.0959473 6.203949 ]
 [5.9106903 6.3909607 6.5147552 6.569931 ]]


In [None]:
nlist = 100                       #聚类中心的个数
k = 4
quantizer = faiss.IndexFlatL2(d)  # the other index
index = faiss.IndexIVFFlat(quantizer, d, nlist, faiss.METRIC_L2)
       # here we specify METRIC_L2, by default it performs inner-product search
assert not index.is_trained
index.train(xb)
assert index.is_trained

index.add(xb)                  # add may be a bit slower as well
D, I = index.search(xq, k)     # actual search
print(I[-5:])                  # neighbors of the 5 last queries
index.nprobe = 10              # default nprobe is 1, try a few more
D, I = index.search(xq, k)
print(I[-5:])                  # neighbors of the 5 last queries