# Scaling Vector Databases

As we think about going to production, there are multiple considerations we need to keep top of mind:
1. Speed vs. Accuracy
2. Resource Limitations
3. Horizontal Scaling

In [None]:
import chromadb
from chromadb.utils import embedding_functions
import time

client = chromadb.Client()
embedding_function = embedding_functions.SentenceTransformerEmbeddingFunction(
  model_name="all-MiniLM-L6-v2"
)

# Create collections with different HNSW configurations
collections = {}

print("\n=== ANN IMPLEMENTATION ===")

# 1. Default settings
collections["default"] = client.create_collection(
    name="default_index",
    embedding_function=embedding_function
)

# 2. High accuracy configuration
collections["high_accuracy"] = client.create_collection(
    name="high_accuracy_index",
    embedding_function=embedding_function,
    metadata={"hnsw:space": "cosine", "hnsw:construction_ef": 500, "hnsw:search_ef": 250, "hnsw:M": 36}
)

# 3. Fast search configuration
collections["fast_search"] = client.create_collection(
    name="fast_search_index",
    embedding_function=embedding_function,
    metadata={"hnsw:space": "cosine", "hnsw:construction_ef": 80, "hnsw:search_ef": 40, "hnsw:M": 12}
)

# Generate sample data
num_docs = 5000
print(f"Generating {num_docs} sample documents...")

# Create documents with some patterns for testing
categories = ["technology", "science", "health", "business", "entertainment"]
documents = []
ids = []

for i in range(num_docs):
    category = categories[i % len(categories)]
    document = f"This is document {i} about {category} with some additional text to make it more unique."
    documents.append(document)
    ids.append(f"doc_{i}")

# Add documents to all collections
print("Adding documents to collections with different index configurations...")
for name, collection in collections.items():
    collection.add(
        documents=documents,
        ids=ids
    )
    print(f"  Added {num_docs} documents to {name} collection")

# Benchmark query performance
print("\nBenchmarking query performance across different configurations...")

# Prepare queries
query_texts = [
    "Latest technology trends in artificial intelligence",
    "Scientific research on climate change",
    "Health benefits of regular exercise",
    "Business strategies for startups",
    "Entertainment news about recent movie releases"
]

# Run benchmark
results = {}
num_trials = 5

for name, collection in collections.items():
    print(f"\nTesting {name} configuration:")
    times = []
    
    for query in query_texts:
        query_times = []
        
        for _ in range(num_trials):
            start_time = time.time()
            collection.query(
                query_texts=[query],
                n_results=10
            )
            query_time = time.time() - start_time
            query_times.append(query_time)
        
        avg_time = sum(query_times) / len(query_times)
        times.append(avg_time)
        print(f"  Query: '{query[:30]}...': {avg_time:.4f} seconds")
    
    results[name] = {
        "mean": sum(times) / len(times),
        "min": min(times),
        "max": max(times),
        "times": times
    }

# Print summary
print("\nPerformance Summary:")
for name, metrics in results.items():
    print(f"  {name}: Mean={metrics['mean']:.4f}s, Min={metrics['min']:.4f}s, Max={metrics['max']:.4f}s")




=== ANN IMPLEMENTATION ===
Generating 5000 sample documents...
Adding documents to collections with different index configurations...
  Added 5000 documents to default collection
  Added 5000 documents to high_accuracy collection
  Added 5000 documents to fast_search collection

Benchmarking query performance across different configurations...

Testing default configuration:
  Query: 'Latest technology trends in ar...': 0.0122 seconds
  Query: 'Scientific research on climate...': 0.0117 seconds
  Query: 'Health benefits of regular exe...': 0.0115 seconds
  Query: 'Business strategies for startu...': 0.0121 seconds
  Query: 'Entertainment news about recen...': 0.0116 seconds

Testing high_accuracy configuration:
  Query: 'Latest technology trends in ar...': 0.0116 seconds
  Query: 'Scientific research on climate...': 0.0115 seconds
  Query: 'Health benefits of regular exe...': 0.0118 seconds
  Query: 'Business strategies for startu...': 0.0118 seconds
  Query: 'Entertainment news about

# Caching

In [2]:
import chromadb
from chromadb.utils import embedding_functions
import time
import random

print("\n=== CACHING IMPLEMENTATION ===")

# Implement a simple LRU cache
class LRUCache:
    def __init__(self, capacity=100):
        self.capacity = capacity
        self.cache = {}
        self.usage_order = []
    
    def get(self, key):
        if key in self.cache:
            # Update usage order
            self.usage_order.remove(key)
            self.usage_order.append(key)
            return self.cache[key]
        return None
    
    def put(self, key, value):
        if key in self.cache:
            # Update existing entry
            self.cache[key] = value
            self.usage_order.remove(key)
            self.usage_order.append(key)
        else:
            # Add new entry
            if len(self.cache) >= self.capacity:
                # Evict least recently used
                lru_key = self.usage_order.pop(0)
                del self.cache[lru_key]
            
            self.cache[key] = value
            self.usage_order.append(key)
    
    def clear(self):
        self.cache = {}
        self.usage_order = []
    
    def __len__(self):
        return len(self.cache)


# Initialize Chroma
client = chromadb.Client()
embedding_function = embedding_functions.SentenceTransformerEmbeddingFunction(
    model_name="all-MiniLM-L6-v2"
)

# Create a collection
collection = client.create_collection(
    name="cache_test",
    embedding_function=embedding_function
)

# Add sample documents
num_docs = 1000
documents = [f"This is a sample document {i} with various content for testing caching" for i in range(num_docs)]
ids = [f"cache_doc_{i}" for i in range(num_docs)]

for i in range(0, num_docs, 100):
    end_idx = min(i + 100, num_docs)
    
    collection.add(
        documents=documents[i:end_idx],
        ids=ids[i:end_idx]
    )

print(f"Added {num_docs} documents to the collection")

# Initialize cache
query_cache = LRUCache(capacity=50)

# Function to query with caching
def cached_query(query_text, n_results=10, use_cache=True):
    cache_key = f"{query_text}:{n_results}"
    
    if use_cache:
        # Check cache first
        cached_result = query_cache.get(cache_key)
        if cached_result is not None:
            return cached_result, True  # Cache hit
    
    # Cache miss or cache disabled, perform actual query
    result = collection.query(
        query_texts=[query_text],
        n_results=n_results
    )
    
    if use_cache:
        # Update cache
        query_cache.put(cache_key, result)
    
    return result, False  # Cache miss

# Test queries with varying cache hit rates
print("\nTesting query performance with caching:")

# Prepare query mix (some repeated, some unique)
common_queries = [
    "document with content",
    "sample document",
    "testing caching",
    "various content"
]

unique_queries = [f"unique query {i}" for i in range(50)]

# Mix queries with different distributions to test cache performance
mixed_queries = []
for _ in range(20):
    # Add common queries (higher probability)
    mixed_queries.extend(common_queries)
    
    # Add some unique queries
    mixed_queries.extend(random.sample(unique_queries, 5))

random.shuffle(mixed_queries)

# Run without cache
print("Running queries without cache...")
start_time = time.time()

for query in mixed_queries:
    _, _ = cached_query(query, use_cache=False)

no_cache_time = time.time() - start_time

# Run with cache
print("Running queries with cache...")
query_cache.clear()  # Clear the cache

start_time = time.time()
hits = 0

for query in mixed_queries:
    _, is_hit = cached_query(query, use_cache=True)
    if is_hit:
        hits += 1

with_cache_time = time.time() - start_time
hit_rate = hits / len(mixed_queries)

# Report results
print("\nCache Performance Results:")
print(f"  Without cache: {no_cache_time:.4f} seconds")
print(f"  With cache: {with_cache_time:.4f} seconds")
print(f"  Time saved: {no_cache_time - with_cache_time:.4f} seconds ({(1 - with_cache_time/no_cache_time) * 100:.1f}%)")
print(f"  Cache hit rate: {hit_rate:.1%}")
print(f"  Cache size: {len(query_cache)}")


=== CACHING IMPLEMENTATION ===
Added 1000 documents to the collection

Testing query performance with caching:
Running queries without cache...
Running queries with cache...

Cache Performance Results:
  Without cache: 2.1211 seconds
  With cache: 0.6190 seconds
  Time saved: 1.5022 seconds (70.8%)
  Cache hit rate: 71.7%
  Cache size: 50
