In [None]:
import faiss

# Create a regular in-RAM index
index = faiss.IndexFlatIP(768)  # 768-dimensional vectors

# For large indices that don't fit in RAM, use memory mapping
index = faiss.read_index("large_index.faiss", faiss.IO_FLAG_MMAP)

In [None]:
# Limited ram size
# 1. Use compressed indices
index = faiss.IndexIVFPQ(quantizer, d, nlist, M, 8)

# 2. Sharding across multiple machines
indices = [faiss.IndexFlatIP(d) for _ in range(shards)]

# 3. Memory-mapped files for large indices
index = faiss.read_index("large_index.faiss", faiss.IO_FLAG_MMAP)

In [None]:
# basic faiss
import faiss
import numpy as np

# Generate sample data
d = 768  # dimension
n_data = 100000
n_query = 1000

# Random vectors (in RAM)
data_vectors = np.random.random((n_data, d)).astype('float32')
query_vectors = np.random.random((n_query, d)).astype('float32')

# Create index in RAM
index = faiss.IndexFlatIP(d)
print(f"Index size: {index.ntotal}")  # 0 initially

# Add vectors to RAM
index.add(data_vectors)
print(f"Index size after add: {index.ntotal}")

# Search (all in RAM)
k = 10  # number of nearest neighbors
distances, indices = index.search(query_vectors, k)

In [None]:
# with quantization
# More memory-efficient index
nlist = 100  # number of clusters
quantizer = faiss.IndexFlatIP(d)
index = faiss.IndexIVFPQ(quantizer, d, nlist, 8, 8)  # 8 bytes per vector

# Train and add
index.train(data_vectors)
index.add(data_vectors)

# Search with compression
distances, indices = index.search(query_vectors, k)

In [None]:
# memory mapping
# Create large index and save
faiss.write_index(index, "large_index.faiss")

# Load with memory mapping
mmap_index = faiss.read_index("large_index.faiss", faiss.IO_FLAG_MMAP)

In [None]:
# monitor memory useages
import psutil
import faiss

def check_memory_usage(index):
    process = psutil.Process()
    memory_mb = process.memory_info().rss / 1024 / 1024
    print(f"Process memory: {memory_mb:.2f} MB")
    print(f"Index vectors: {index.ntotal}")
    
    # Estimate index memory
    if hasattr(index, 'd'):
        est_memory = index.ntotal * index.d * 4 / 1024 / 1024  # MB
        print(f"Estimated index memory: {est_memory:.2f} MB")