In [1]:
pip install faiss-cpu

Note: you may need to restart the kernel to use updated packages.


In [2]:
import numpy as np
import faiss

# Create a random dataset of vectors
np.random.seed(42)  # For reproducibility
d = 128  # Dimensionality of the vectors
nb = 10000  # Number of vectors in the dataset
nq = 5  # Number of query vectors

# Generate random vectors (dataset and query)
data_vectors = np.random.random((nb, d)).astype('float32')
query_vectors = np.random.random((nq, d)).astype('float32')


In [5]:
#Setting Up and Using IVF Index - Inverted File Index
# Number of clusters (number of inverted lists)
nlist = 100

# Build the index
quantizer = faiss.IndexFlatL2(d)  # The quantizer is an IndexFlatL2, it is used for coarse quantization
index_ivf = faiss.IndexIVFFlat(quantizer, d, nlist, faiss.METRIC_L2)

# Train the index
index_ivf.train(data_vectors)

# Add vectors to the index
index_ivf.add(data_vectors)

# Search the index
k = 4  # Number of nearest neighbors to search for
index_ivf.nprobe = 10  # Number of clusters to search in
distances, indices = index_ivf.search(query_vectors, k)

# Output the results
print("IVF Index Results:")
print("Indices of Nearest Neighbors:\n", indices)
print("Distances to Nearest Neighbors:\n", distances)


IVF Index Results:
Indices of Nearest Neighbors:
 [[8769 9571 3948 4436]
 [3314 7078 8916  957]
 [6304 7784 1416 2755]
 [8190 3455 7757 7333]
 [  59 6033 9316 6865]]
Distances to Nearest Neighbors:
 [[13.346962  14.837142  15.330732  15.379183 ]
 [12.774042  13.365513  13.785868  13.9384775]
 [15.7264385 15.7710285 15.820051  15.835105 ]
 [13.868846  14.173682  14.198276  14.473603 ]
 [13.91506   14.19504   14.624182  14.83807  ]]


In [6]:
#HNSW - graph based approximate nearest neighbor search algorithm - Hierarchical Navigable Small World
# Build the index
index_hnsw = faiss.IndexHNSWFlat(d, 32)  # 32 is the number of neighbors per layer (M parameter)

# Add vectors to the index
index_hnsw.add(data_vectors)

# Search the index
k = 4  # Number of nearest neighbors to search for
distances_hnsw, indices_hnsw = index_hnsw.search(query_vectors, k)

# Output the results
print("\nHNSW Index Results:")
print("Indices of Nearest Neighbors:\n", indices_hnsw)
print("Distances to Nearest Neighbors:\n", distances_hnsw)



HNSW Index Results:
Indices of Nearest Neighbors:
 [[9385 5125 9571 3491]
 [2779 8916  957 8861]
 [3014 4097 5784 2755]
 [8808 9253 7506 8190]
 [7630   59 6994 6033]]
Distances to Nearest Neighbors:
 [[14.548448  14.756077  14.837141  14.912471 ]
 [13.566153  13.785869  13.9384775 14.088596 ]
 [15.4164095 15.693481  15.824047  15.835105 ]
 [12.672464  13.022816  13.64159   13.868846 ]
 [13.786429  13.91506   14.109114  14.19504  ]]
