<a href="https://colab.research.google.com/github/prakash-bisht/GAI-LLM/blob/main/faiss_indexing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install faiss-cpu

In [2]:
import numpy as np
import faiss

In [3]:
# 1. FLAT (Exact Search)
# L2
d = 128
xb = np.random.random((10000, d)).astype('float32')
xq = np.random.random((100, d)).astype('float32')

index = faiss.IndexFlatL2(d)   # exact L2
index.add(xb)

k = 5
D, I = index.search(xq, k)
print("Flat L2:", I[:5])


Flat L2: [[2541 6019 1699 8645 9424]
 [5819 8403 6767 2004  394]
 [7405 4114  272 5636 9960]
 [4164 8417 7270 7016 1864]
 [1216  349 8477 4845 6630]]


In [4]:
# 1. FLAT (Exact Search)
# Inner Product (for cosine)
index_ip = faiss.IndexFlatIP(d)
faiss.normalize_L2(xb)
faiss.normalize_L2(xq)

index_ip.add(xb)
D, I = index_ip.search(xq, 5)
print("Flat IP:", I[:5])


Flat IP: [[2541 6019 3020 9639  330]
 [8403 5819 2004 1775  479]
 [7405  272 4114 4363 7198]
 [4164 5098 2881  807 5978]
 [ 349 4845 1216 3556 5860]]


In [5]:
# IVF + Flat
nlist = 100
quantizer = faiss.IndexFlatL2(d) #Used to assign vectors to clusters

index_ivf = faiss.IndexIVFFlat(quantizer, d, nlist, faiss.METRIC_L2)

# Above line creates IVF index
# Structure:
# cluster 0 → vectors
# cluster 1 → vectors


index_ivf.train(xb) #Runs k-means
index_ivf.add(xb)

index_ivf.nprobe = 10
# 10 ~ Number of clusters searched at query time
# Higher = better recall, slower search

D,I = index_ivf.search(xq, 5)
print("IVF Flat:", I[:5])


IVF Flat: [[6019 9639  330 8479  129]
 [8403 5819 9864 8911 6401]
 [ 272 4114 7583 9490 9673]
 [3082 8044 7590 2715 8417]
 [4845 1216 5860 7605 6630]]


In [6]:
# IVF + PQ
m = 16   #number of sub-vectors
nbits = 8  #bits per sub-vector (256 values),256 centroids, 2 ** 8
# Each sub-vector can be represented by one of 256 possible values

index_ivfpq = faiss.IndexIVFPQ(quantizer, d, nlist, m, nbits)

index_ivfpq.train(xb)
index_ivfpq.add(xb)
index_ivfpq.nprobe = 10

D,I = index_ivfpq.search(xq, 5)
print("IVF PQ:", I[:5])


IVF PQ: [[8479 5154 6019 9849 5191]
 [ 318 6323 1523 5903 3515]
 [4114 6400 3416  699  272]
 [4418 8044 8417 8482 2431]
 [2922 6630 1216 8498 2440]]


In [None]:
# PQ (Product Quantization Only)
m = 16

index_pq = faiss.IndexPQ(d, m, 8)  # 8 bits per code
index_pq.train(xb)
index_pq.add(xb)

D,I = index_pq.search(xq, 5)
print("PQ only:", I[:5])


In [7]:
# SQ (Scalar Quantization)
index_sq = faiss.IndexScalarQuantizer(d, faiss.ScalarQuantizer.QT_8bit)
index_sq.train(xb)
index_sq.add(xb)

D,I = index_sq.search(xq, 5)
print("SQ:", I[:5])


SQ: [[2541 6019 3020 9639  330]
 [8403 5819 2004 1775  479]
 [7405  272 4114 4363 7198]
 [4164 5098 2881  807 5978]
 [ 349 4845 1216 3556 5860]]


In [8]:
# HNSW (Graph Based)
index_hnsw = faiss.IndexHNSWFlat(d, 32)  # 32 neighbors
# 32 = neighbors per node

index_hnsw.hnsw.efConstruction = 40

# 40 ~ Controls graph quality
# Higher = better graph, slower build

index_hnsw.add(xb)

index_hnsw.hnsw.efSearch = 16
# 16 ~ Controls search depth
# Higher = better recall

D,I = index_hnsw.search(xq, 5)
print("HNSW:", I[:5])


HNSW: [[3020  330 8479 6885 6619]
 [8403 5819 1775 9864 7380]
 [ 272 4363 4321 3489 7583]
 [5098 2881  807 8044 7590]
 [ 349 3556 7605 8785 8498]]


In [None]:
# 7. GPU Index (Example)
import faiss

res = faiss.StandardGpuResources()
index_cpu = faiss.IndexFlatL2(d)
index_gpu = faiss.index_cpu_to_gpu(res, 0, index_cpu)

index_gpu.add(xb)
D,I = index_gpu.search(xq, 5)
print("GPU Index:", I[:5])


In [11]:
# 8. Combined Index (IVF + HNSW + PQ)
quantizer = faiss.IndexHNSWFlat(d, 32)
index_combined = faiss.IndexIVFPQ(quantizer, d, 100, 16, 8)

index_combined.train(xb)
index_combined.add(xb)

index_combined.nprobe = 15

D,I = index_combined.search(xq, 5)
print("IVF-HNSW-PQ:", I[:5])

IVF-HNSW-PQ: [[1911 8479 5191 2950 7092]
 [6401 1523  899 6767  394]
 [4114  272 2993 1849 8834]
 [8044 4059 8417  826 4458]
 [2440 2922  618 4845  892]]
