# Performance Tuning Guide

This notebook covers advanced performance tuning techniques for VectorDB.

## Topics Covered

1. Index parameter optimization
2. Batch operations
3. Memory management
4. Query optimization
5. Hardware considerations

In [None]:
import numpy as np
import time
import psutil
import os
from typing import List, Dict, Any

from vectordb import VectorDatabase

np.random.seed(42)

def get_memory_usage():
    """Get current memory usage in MB."""
    process = psutil.Process(os.getpid())
    return process.memory_info().rss / 1024 / 1024

print(f"Initial memory usage: {get_memory_usage():.1f} MB")

## 1. HNSW Parameter Tuning

HNSW has three main parameters:

- **M**: Number of connections per node (affects memory and recall)
- **ef_construction**: Search depth during build (affects build time and recall)
- **ef_search**: Search depth during query (affects query time and recall)

In [None]:
# Generate test data
N_VECTORS = 50000
DIMENSION = 128

print(f"Generating {N_VECTORS} vectors...")
data = np.random.randn(N_VECTORS, DIMENSION).astype(np.float32)
queries = np.random.randn(100, DIMENSION).astype(np.float32)
ids = [f"vec_{i}" for i in range(N_VECTORS)]

In [None]:
# Test different M values
m_values = [8, 16, 32, 48]
m_results = []

print("\nTesting different M values:")
print(f"{'M':>6} {'Build (s)':>12} {'Memory (MB)':>14} {'Search (ms)':>12}")
print("-" * 50)

for m in m_values:
    mem_before = get_memory_usage()
    
    db = VectorDatabase()
    collection = db.create_collection(
        name="test",
        dimension=DIMENSION,
        index_type="hnsw",
        index_params={"M": m, "ef_construction": 100}
    )
    
    # Build
    start = time.time()
    collection.add(data, ids=ids)
    build_time = time.time() - start
    
    mem_after = get_memory_usage()
    
    # Search
    start = time.time()
    for q in queries[:10]:
        collection.search(q, k=10)
    search_time = (time.time() - start) / 10 * 1000  # ms per query
    
    m_results.append({
        'M': m,
        'build_time': build_time,
        'memory': mem_after - mem_before,
        'search_time': search_time
    })
    
    print(f"{m:>6} {build_time:>12.2f} {mem_after - mem_before:>14.1f} {search_time:>12.2f}")
    
    db.close()

In [None]:
# Test different ef_construction values
ef_construction_values = [50, 100, 200, 400]

print("\nTesting different ef_construction values (M=16):")
print(f"{'ef_c':>8} {'Build (s)':>12} {'Recall':>10}")
print("-" * 35)

for ef_c in ef_construction_values:
    db = VectorDatabase()
    collection = db.create_collection(
        name="test",
        dimension=DIMENSION,
        index_type="hnsw",
        index_params={"M": 16, "ef_construction": ef_c}
    )
    
    start = time.time()
    collection.add(data, ids=ids)
    build_time = time.time() - start
    
    # Note: In production, you'd compute actual recall against ground truth
    print(f"{ef_c:>8} {build_time:>12.2f} {'~varies':>10}")
    
    db.close()

### HNSW Tuning Guidelines

| Use Case | M | ef_construction | ef_search |
|----------|---|-----------------|------------|
| Low memory | 8-12 | 50-100 | 20-50 |
| Balanced | 16-24 | 100-200 | 50-100 |
| High recall | 32-48 | 200-500 | 100-500 |

## 2. Batch Operations

Batching operations significantly improves throughput.

In [None]:
# Compare single vs batch insertions
db = VectorDatabase()
collection = db.create_collection(name="batch_test", dimension=DIMENSION)

test_vectors = data[:5000]
test_ids = ids[:5000]

# Single insertions
start = time.time()
for i in range(1000):
    collection.add(test_vectors[i:i+1], ids=[test_ids[i]])
single_time = time.time() - start
single_rate = 1000 / single_time

db.delete_collection("batch_test")
collection = db.create_collection(name="batch_test", dimension=DIMENSION)

# Batch insertions
batch_sizes = [10, 100, 500, 1000]
print(f"{'Batch Size':>12} {'Time (s)':>12} {'Rate (vec/s)':>14} {'Speedup':>10}")
print("-" * 52)
print(f"{'1 (single)':>12} {single_time:>12.3f} {single_rate:>14.1f} {'1.0x':>10}")

for batch_size in batch_sizes:
    db.delete_collection("batch_test")
    collection = db.create_collection(name="batch_test", dimension=DIMENSION)
    
    start = time.time()
    for i in range(0, 1000, batch_size):
        end = min(i + batch_size, 1000)
        collection.add(test_vectors[i:end], ids=test_ids[i:end])
    batch_time = time.time() - start
    batch_rate = 1000 / batch_time
    speedup = batch_rate / single_rate
    
    print(f"{batch_size:>12} {batch_time:>12.3f} {batch_rate:>14.1f} {speedup:>9.1f}x")

db.close()

In [None]:
# Batch search performance
db = VectorDatabase()
collection = db.create_collection(name="search_test", dimension=DIMENSION, index_type="hnsw")
collection.add(data[:10000], ids=ids[:10000])

n_queries = 100
test_queries = queries[:n_queries]

# Individual searches
start = time.time()
for q in test_queries:
    collection.search(q, k=10)
individual_time = time.time() - start

# Batch search
start = time.time()
collection.search_batch(test_queries, k=10)
batch_time = time.time() - start

print(f"\nSearch Performance ({n_queries} queries):")
print(f"  Individual: {individual_time:.3f}s ({n_queries/individual_time:.0f} QPS)")
print(f"  Batch:      {batch_time:.3f}s ({n_queries/batch_time:.0f} QPS)")
print(f"  Speedup:    {individual_time/batch_time:.2f}x")

db.close()

## 3. Memory Optimization

In [None]:
# Compare memory usage of different configurations
configs = [
    {"name": "Flat", "index_type": "flat", "index_params": {}},
    {"name": "HNSW (M=16)", "index_type": "hnsw", "index_params": {"M": 16}},
    {"name": "HNSW (M=32)", "index_type": "hnsw", "index_params": {"M": 32}},
    {"name": "IVF (100 clusters)", "index_type": "ivf", "index_params": {"n_clusters": 100}},
]

print(f"Memory usage for {N_VECTORS} vectors ({DIMENSION}D):")
print(f"Raw vector size: {N_VECTORS * DIMENSION * 4 / 1024 / 1024:.1f} MB")
print()
print(f"{'Configuration':<25} {'Total (MB)':>12} {'Overhead':>10}")
print("-" * 50)

raw_size = N_VECTORS * DIMENSION * 4 / 1024 / 1024

for config in configs:
    mem_before = get_memory_usage()
    
    db = VectorDatabase()
    collection = db.create_collection(
        name="mem_test",
        dimension=DIMENSION,
        index_type=config["index_type"],
        index_params=config["index_params"]
    )
    collection.add(data, ids=ids)
    
    mem_after = get_memory_usage()
    mem_used = mem_after - mem_before
    overhead = (mem_used / raw_size - 1) * 100
    
    print(f"{config['name']:<25} {mem_used:>12.1f} {overhead:>9.0f}%")
    
    db.close()

### Memory Reduction Strategies

1. **Use Product Quantization**: Compresses vectors to 1/4 - 1/16 of original size
2. **Lower M for HNSW**: Reduces graph storage overhead
3. **Use disk-based storage**: For datasets larger than RAM
4. **Reduce vector dimension**: Use dimensionality reduction (PCA, etc.)

## 4. Query Optimization

In [None]:
# Setup test collection with metadata
db = VectorDatabase()
collection = db.create_collection(
    name="query_opt",
    dimension=DIMENSION,
    index_type="hnsw"
)

# Add vectors with metadata
categories = ["A", "B", "C", "D", "E"]
metadata = [
    {"category": categories[i % 5], "value": i % 100, "active": i % 2 == 0}
    for i in range(N_VECTORS)
]
collection.add(data, ids=ids, metadata=metadata)

print(f"Added {collection.count()} vectors with metadata")

In [None]:
# Compare filtered vs unfiltered search
query = queries[0]
n_iterations = 50

# Unfiltered
start = time.time()
for _ in range(n_iterations):
    collection.search(query, k=10)
unfiltered_time = (time.time() - start) / n_iterations * 1000

# Simple filter (matches 20% of data)
start = time.time()
for _ in range(n_iterations):
    collection.search(query, k=10, filter={"category": "A"})
simple_filter_time = (time.time() - start) / n_iterations * 1000

# Complex filter
start = time.time()
for _ in range(n_iterations):
    collection.search(query, k=10, filter={
        "$and": [
            {"category": {"$in": ["A", "B"]}},
            {"value": {"$gte": 50}},
            {"active": True}
        ]
    })
complex_filter_time = (time.time() - start) / n_iterations * 1000

print("Query Performance Comparison:")
print(f"  Unfiltered:     {unfiltered_time:.2f} ms")
print(f"  Simple filter:  {simple_filter_time:.2f} ms ({simple_filter_time/unfiltered_time:.1f}x)")
print(f"  Complex filter: {complex_filter_time:.2f} ms ({complex_filter_time/unfiltered_time:.1f}x)")

In [None]:
# Impact of k on query time
k_values = [1, 10, 50, 100, 500]

print("\nQuery time vs k:")
print(f"{'k':>8} {'Time (ms)':>12}")
print("-" * 22)

for k in k_values:
    start = time.time()
    for _ in range(n_iterations):
        collection.search(query, k=k)
    query_time = (time.time() - start) / n_iterations * 1000
    print(f"{k:>8} {query_time:>12.2f}")

db.close()

### Query Optimization Tips

1. **Use appropriate k**: Only retrieve as many results as needed
2. **Optimize filters**: More selective filters = faster queries
3. **Batch queries**: Use `search_batch` for multiple queries
4. **Tune ef_search**: Lower values for speed, higher for recall

## 5. Persistence and I/O

In [None]:
import tempfile
import shutil

# Create persistent database
temp_dir = tempfile.mkdtemp()
db_path = f"{temp_dir}/vectordb"

# Write test
print("Testing persistence performance...")
db = VectorDatabase(storage_path=db_path)
collection = db.create_collection(
    name="persist_test",
    dimension=DIMENSION,
    index_type="hnsw"
)

start = time.time()
collection.add(data[:10000], ids=ids[:10000])
db.close()  # Triggers save
write_time = time.time() - start

# Read test
start = time.time()
db = VectorDatabase(storage_path=db_path)
collection = db.get_collection("persist_test")
load_time = time.time() - start

# Verify
results = collection.search(queries[0], k=5)

print(f"\nPersistence Performance (10,000 vectors):")
print(f"  Write time: {write_time:.2f}s")
print(f"  Load time:  {load_time:.2f}s")
print(f"  Loaded vectors: {collection.count()}")

db.close()
shutil.rmtree(temp_dir)

## 6. Best Practices Summary

### Index Selection
- **< 10k vectors**: Use Flat
- **10k - 1M vectors**: Use HNSW with M=16-24
- **> 1M vectors**: Consider IVF-PQ hybrid

### HNSW Tuning
- Start with M=16, ef_construction=100
- Increase M for higher recall (at memory cost)
- Tune ef_search at query time for speed/recall tradeoff

### Operations
- Batch insertions: 100-1000 vectors per batch
- Batch queries: Use search_batch for multiple queries
- Pre-filter when possible to reduce search space

### Memory
- Monitor memory usage with large datasets
- Use PQ for memory-constrained environments
- Consider disk-based storage for very large datasets