In [1]:
import numpy as np

# 1000 vectors of 128 dimensions
data = np.random.random((1000, 128)).astype('float32')

# Query vector
query = np.random.random((1, 128)).astype('float32')

In [2]:
data.shape, query.shape

((1000, 128), (1, 128))

# Build a simple FAISS index (exact)

In [3]:
import faiss

index = faiss.IndexFlatL2(128)  # L2 = Euclidean distance
index.add(data)  # Add vectors to the index

# Search
k = 5  # top-5
D, I = index.search(query, k)

print("Indices:", I)
print("Distances:", D)

Indices: [[983  11 661 400 542]]
Distances: [[13.7595625 14.354604  14.505953  14.839863  15.3623295]]


# Use FAISS with approximate search

In [5]:
nlist = 10 # Number of clusters
quantizer = faiss.IndexFlatL2(128)
index = faiss.IndexIVFFlat(quantizer, 128, nlist)

index.train(data)      # Train the clustering
index.add(data)        # Add vectors
index.nprobe = 10      # Number of clusters to search

D, I = index.search(query, k)

In [6]:
print("Indices:", I)
print("Distances:", D)

Indices: [[983  11 661 400 542]]
Distances: [[13.7595625 14.354604  14.505953  14.839863  15.3623295]]
