In [1]:
import numpy as np
import faiss
import time

# Create sample data - 10,000 vectors with 128 dimensions
dimension = 128
num_vectors = 10000
vectors = np.random.random((num_vectors, dimension)).astype(np.float32)
query = np.random.random((1, dimension)).astype(np.float32)

In [2]:
vectors

array([[0.27412277, 0.90362036, 0.8425896 , ..., 0.82546043, 0.6318611 ,
        0.954885  ],
       [0.64607143, 0.7073742 , 0.50662154, ..., 0.3929032 , 0.5758515 ,
        0.8119512 ],
       [0.5326433 , 0.49472833, 0.35727006, ..., 0.43423846, 0.90118873,
        0.7214608 ],
       ...,
       [0.34490615, 0.7350706 , 0.71708006, ..., 0.61812687, 0.41543674,
        0.9836821 ],
       [0.65040445, 0.9967052 , 0.7845443 , ..., 0.9100172 , 0.3200968 ,
        0.45370603],
       [0.7680963 , 0.8477374 , 0.37442106, ..., 0.22362429, 0.9601763 ,
        0.16690987]], shape=(10000, 128), dtype=float32)

In [3]:
query

array([[0.40241605, 0.7977795 , 0.7318906 , 0.19723697, 0.8442542 ,
        0.5694432 , 0.6142105 , 0.2964006 , 0.06672271, 0.12829708,
        0.19659263, 0.61766833, 0.0104667 , 0.24531247, 0.41956827,
        0.85851705, 0.02280342, 0.48527583, 0.460888  , 0.8214168 ,
        0.793887  , 0.40248948, 0.65828955, 0.26644078, 0.8146191 ,
        0.27896154, 0.8142557 , 0.21979773, 0.9498974 , 0.93251455,
        0.96303296, 0.8493269 , 0.23321691, 0.8957781 , 0.77955616,
        0.11244316, 0.29904488, 0.25221083, 0.49953863, 0.51933664,
        0.6384863 , 0.10062018, 0.7954636 , 0.6610519 , 0.89992446,
        0.83744216, 0.05588955, 0.40948442, 0.57395744, 0.87467414,
        0.9391812 , 0.6945718 , 0.62753415, 0.6057913 , 0.56724197,
        0.61328846, 0.23782551, 0.5584207 , 0.8863147 , 0.6250477 ,
        0.57485646, 0.5019436 , 0.29136333, 0.66030115, 0.9806604 ,
        0.03936557, 0.67538804, 0.9730308 , 0.6934819 , 0.20538616,
        0.8701518 , 0.750155  , 0.5783978 , 0.71

In [4]:
# Exact search index
exact_index = faiss.IndexFlatL2(dimension)
exact_index.add(vectors)

In [5]:
# HNSW index (approximate but faster)
hnsw_index = faiss.IndexHNSWFlat(dimension, 32)
hnsw_index.add(vectors)

In [6]:
# Compare search times
start_time = time.time()
exact_D, exact_I = exact_index.search(query, k=10) # Search for 10nearest neighbors
exact_time = time.time() - start_time
start_time = time.time()
hnsw_D, hnsw_I = hnsw_index.search(query, k=10)
hnsw_time = time.time() - start_time

In [7]:
# Calculate overlap (how many of the same results were found)
overlap = len(set(exact_I[0]).intersection(set(hnsw_I[0])))
overlap_percentage = overlap * 100 / 10
print(f"Exact search time: {exact_time:.6f} seconds")
print(f"HNSW search time: {hnsw_time:.6f} seconds")
print(f"Speed improvement: {exact_time/hnsw_time:.2f}x faster")
print(f"Result overlap: {overlap_percentage:.1f}%")

Exact search time: 0.001920 seconds
HNSW search time: 0.001346 seconds
Speed improvement: 1.43x faster
Result overlap: 80.0%
