In [1]:
"""
Example demonstrating different similarity metrics
using Hugging Face embeddings.
"""

from fastvecdb import FastVecDB, SimilarityMetric
from sentence_transformers import SentenceTransformer


# Load embedding model once
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")


def embed_text(text: str) -> list:
    """Generate a normalized embedding vector using Hugging Face."""
    embedding = model.encode(text, normalize_embeddings=True)
    return embedding.tolist()


def main():
    # Sample texts
    texts = [
        "I love machine learning",
        "Artificial intelligence is fascinating",
        "Fast vector databases are useful",
        "I enjoy reading about deep learning",
        "Python is my favorite programming language",
        "Databases store information efficiently",
        "Neural networks learn patterns",
        "I like pizza and burgers",
        "Natural language processing is fun",
        "I write code every day",
    ]

    # Create vectors
    print("Creating text embeddings...")
    vectors = [(embed_text(text), f"text_{i}") for i, text in enumerate(texts)]

    dimension = len(vectors[0][0])
    print(f"Embedding dimension: {dimension}")

    # Test each similarity metric
    metrics = [
        SimilarityMetric.COSINE,
        SimilarityMetric.DOT_PRODUCT,
        SimilarityMetric.EUCLIDEAN,
    ]

    for metric in metrics:
        print(f"\n{'='*60}")
        print(f"Testing {metric.value.upper()} similarity")
        print('='*60)

        # Create database
        db = FastVecDB(
            storage_path=f"./example_data_{metric.value}",
            dimension=dimension,
            similarity_metric=metric,
        )

        # Insert vectors
        for vector, vec_id in vectors:
            db.insert(vec_id, vector, metadata={"metric": metric.value})

        # Query
        query_text = "I like learning about AI"
        query_vector = embed_text(query_text)

        results = db.search(query_vector, top_k=5)

        print(f"\nQuery: \"{query_text}\"")
        print("Top 5 results:")

        for i, result in enumerate(results, 1):
            score = result["score"]
            if metric == SimilarityMetric.EUCLIDEAN:
                distance = -score
                print(f"{i}. {result['id']}: distance={distance:.4f}")
            else:
                print(f"{i}. {result['id']}: similarity={score:.4f}")

        db.close()

    print("\n" + "=" * 60)
    print("Comparison complete!")
    print("=" * 60)
    print("\nNote:")
    print("- Cosine similarity: Range [-1, 1], higher = more similar")
    print("- Dot product: Higher = more similar (depends on norms)")
    print("- Euclidean distance: Lower = more similar")


if __name__ == "__main__":
    main()

  from .autonotebook import tqdm as notebook_tqdm
Loading weights: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 103/103 [00:01<00:00, 89.61it/s, Materializing param=pooler.dense.weight]                              
[1mBertModel LOAD REPORT[0m from: sentence-transformers/all-MiniLM-L6-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

[3mNotes:
- UNEXPECTED[3m	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.[0m


Creating text embeddings...
Embedding dimension: 384

Testing COSINE similarity

Query: "I like learning about AI"
Top 5 results:
1. text_1: similarity=0.6881
2. text_3: similarity=0.6638
3. text_0: similarity=0.6467
4. text_8: similarity=0.4706
5. text_6: similarity=0.4290

Testing DOT_PRODUCT similarity

Query: "I like learning about AI"
Top 5 results:
1. text_1: similarity=0.6881
2. text_3: similarity=0.6638
3. text_0: similarity=0.6467
4. text_8: similarity=0.4706
5. text_6: similarity=0.4290

Testing EUCLIDEAN similarity

Query: "I like learning about AI"
Top 5 results:
1. text_1: distance=0.7898
2. text_3: distance=0.8200
3. text_0: distance=0.8406
4. text_8: distance=1.0290
5. text_6: distance=1.0686

Comparison complete!

Note:
- Cosine similarity: Range [-1, 1], higher = more similar
- Dot product: Higher = more similar (depends on norms)
- Euclidean distance: Lower = more similar


## Cache Use 

In [2]:
"""
FastVecDB Cache Quick Demo - Hugging Face Embeddings

Demonstrates how FastVecDB caching speeds up repeated
semantic searches using real text embeddings.
"""

import time
from fastvecdb import FastVecDB, SimilarityMetric
from sentence_transformers import SentenceTransformer


# Load embedding model once (important for performance)
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")


def embed_text(text: str) -> list:
    """Generate a normalized embedding vector from text."""
    embedding = model.encode(text, normalize_embeddings=True)
    return embedding.tolist()


def main():
    print("=" * 60)
    print("FastVecDB Cache Quick Demo (Hugging Face)")
    print("=" * 60)

    # Sample documents
    documents = [
        ("FastVecDB is a fast vector database", "doc1"),
        ("Vector databases enable semantic search", "doc2"),
        ("Caching makes repeated queries faster", "doc3"),
    ]

    print("\n1. Creating embeddings...")
    vectors = []
    for text, doc_id in documents:
        vectors.append((embed_text(text), doc_id, {"text": text}))
        print(f"   âœ“ Embedded {doc_id}")

    dimension = len(vectors[0][0])
    print(f"   Embedding dimension: {dimension}")

    # Create database with caching enabled
    print("\n2. Creating database with intelligent caching...")
    db = FastVecDB(
        storage_path="./quick_cache_demo_hf",
        dimension=dimension,
        similarity_metric=SimilarityMetric.COSINE,
        enable_cache=True,
    )

    # Insert vectors
    print("\n3. Inserting documents...")
    for vector, vec_id, metadata in vectors:
        db.insert(vec_id, vector, metadata)
        print(f"   âœ“ Inserted {vec_id}")

    # Query text
    query_text = "How does caching improve vector search?"
    query_vector = embed_text(query_text)

    # First search (COLD)
    print("\n4. First search (COLD - no cache):")
    start = time.time()
    results1 = db.search(query_vector, top_k=2)
    cold_time = time.time() - start
    print(f"   Time: {cold_time*1000:.2f}ms")

    for r in results1:
        print(f"   â†’ {r['id']} | score={r['score']:.4f}")

    # Second search (WARM)
    print("\n5. Second search (WARM - cache hit!):")
    start = time.time()
    results2 = db.search(query_vector, top_k=2)  # Same query
    warm_time = time.time() - start
    print(f"   Time: {warm_time*1000:.2f}ms")

    for r in results2:
        print(f"   â†’ {r['id']} | score={r['score']:.4f}")

    # Speedup
    if warm_time > 0:
        speedup = cold_time / warm_time
        print(f"\n   ðŸš€ Cache made it {speedup:.1f}x faster!")

    # Cache stats
    print("\n6. Cache Statistics:")
    stats = db.get_stats()
    cache_stats = stats.get("cache_stats", {})
    print(f"   Query cache size: {cache_stats.get('query_cache', {}).get('size', 0)}")
    print(f"   Hot vector cache size: {cache_stats.get('hot_vector_cache', {}).get('size', 0)}")

    print("\n" + "=" * 60)
    print("Key Benefit: Semantic search + caching = instant results")
    print("=" * 60)

    db.close()


if __name__ == "__main__":
    main()


Loading weights: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 103/103 [00:00<00:00, 210.20it/s, Materializing param=pooler.dense.weight]                             
[1mBertModel LOAD REPORT[0m from: sentence-transformers/all-MiniLM-L6-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

[3mNotes:
- UNEXPECTED[3m	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.[0m


FastVecDB Cache Quick Demo (Hugging Face)

1. Creating embeddings...
   âœ“ Embedded doc1
   âœ“ Embedded doc2
   âœ“ Embedded doc3
   Embedding dimension: 384

2. Creating database with intelligent caching...

3. Inserting documents...
   âœ“ Inserted doc1
   âœ“ Inserted doc2
   âœ“ Inserted doc3

4. First search (COLD - no cache):
   Time: 2.97ms
   â†’ doc3 | score=0.6246
   â†’ doc2 | score=0.5251

5. Second search (WARM - cache hit!):
   Time: 0.00ms
   â†’ doc3 | score=0.6246
   â†’ doc2 | score=0.5251

6. Cache Statistics:
   Query cache size: 1
   Hot vector cache size: 3

Key Benefit: Semantic search + caching = instant results


# RAG Demo

In [None]:
"""
Example RAG (Retrieval-Augmented Generation) pipeline
using FastVecDB + Hugging Face embeddings.
"""

import time
from fastvecdb import FastVecDB, SimilarityMetric
from sentence_transformers import SentenceTransformer


# Load embedding model once
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")


def embed_text(text: str) -> list:
    """Generate a normalized embedding for text."""
    embedding = model.encode(text, normalize_embeddings=True)
    return embedding.tolist()


def main():
    # Knowledge base documents
    documents = [
        {
            "id": "doc1",
            "text": "Python is a high-level programming language known for its simplicity and readability.",
            "category": "programming",
        },
        {
            "id": "doc2",
            "text": "Machine learning is a subset of artificial intelligence that enables systems to learn from data.",
            "category": "ai",
        },
        {
            "id": "doc3",
            "text": "Vector databases are specialized databases designed to store and query high-dimensional vectors.",
            "category": "databases",
        },
        {
            "id": "doc4",
            "text": "FastVecDB is a pure-Python vector search framework with intelligent caching.",
            "category": "databases",
        },
        {
            "id": "doc5",
            "text": "RAG combines retrieval of relevant documents with language model generation.",
            "category": "ai",
        },
    ]

    print("Initializing FastVecDB for RAG pipeline...")

    # Create embeddings first to get dimension
    print("\nCreating document embeddings...")
    vectors = []
    for doc in documents:
        vector = embed_text(doc["text"])
        vectors.append((doc, vector))
        print(f"  Embedded: {doc['id']}")

    dimension = len(vectors[0][1])
    print(f"Embedding dimension: {dimension}")

    db = FastVecDB(
        storage_path="./rag_example_data_hf",
        dimension=dimension,
        similarity_metric=SimilarityMetric.COSINE,
        enable_cache=True,
    )

    # Index documents
    print("\nIndexing documents...")
    for doc, vector in vectors:
        db.insert(
            vector_id=doc["id"],
            vector=vector,
            metadata={
                "text": doc["text"],
                "category": doc["category"],
            },
            bucket_id="knowledge_base",
        )
        print(f"  Indexed: {doc['id']}")

    print("\n" + "=" * 60)
    print("RAG Query Pipeline")
    print("=" * 60)

    queries = [
        "What is Python?",
        "Tell me about vector databases",
        "How does machine learning work?",
    ]

    for query in queries:
        print(f"\nQuery: {query}")
        print("-" * 60)

        query_vector = embed_text(query)

        # Retrieve documents
        results = db.search(
            query_vector=query_vector,
            top_k=3,
            bucket_ids=["knowledge_base"],
        )

        print(f"Retrieved {len(results)} relevant documents:")
        for i, result in enumerate(results, 1):
            print(f"\n{i}. {result['id']} (score: {result['score']:.4f})")
            print(f"   Category: {result['metadata']['category']}")
            print(f"   Text: {result['metadata']['text']}")

        # In a real RAG pipeline:
        # - Combine retrieved texts into context
        # - Send (context + query) to an LLM
        # - Return generated answer

    print("\n" + "=" * 60)
    print("Cache Performance Test")
    print("=" * 60)

    query_vector = embed_text("Python programming")

    # Cold query
    start = time.time()
    _ = db.search(query_vector, top_k=5)
    time1 = time.time() - start

    # Warm query
    start = time.time()
    _ = db.search(query_vector, top_k=5)
    time2 = time.time() - start

    print(f"First query (cold): {time1 * 1000:.2f}ms")
    print(f"Second query (warm): {time2 * 1000:.2f}ms")
    if time2 > 0:
        print(f"Speedup: {time1 / time2:.1f}x")

    stats = db.get_stats()
    print("\nDatabase stats:")
    print(f"  Total documents: {stats['total_vectors']}")
    if stats.get("cache_stats"):
        print(f"  Cache stats: {stats['cache_stats']}")

    db.close()
    print("\nRAG pipeline example complete!")


if __name__ == "__main__":
    main()


In [None]:
"""
Fully functional RAG (Retrieval-Augmented Generation) pipeline
using FastVecDB + Hugging Face embeddings + Hugging Face LLM (FLAN-T5-small).
"""

import time
from fastvecdb import FastVecDB, SimilarityMetric
from sentence_transformers import SentenceTransformer
from transformers import pipeline

# -----------------------------
# 1. LOAD MODELS
# -----------------------------

# Load Hugging Face embedding model (SentenceTransformers)
embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

# Load Hugging Face instruction-tuned LLM (FLAN-T5-small)
llm = pipeline(
    task="text-generation",      # Must use text-generation
    model="google/flan-t5-small",
    max_new_tokens=200
)

# -----------------------------
# 2. EMBEDDING FUNCTION
# -----------------------------

def embed_text(text: str) -> list:
    """
    Convert text into a normalized vector embedding compatible with FastVecDB.
    """
    embedding = embedding_model.encode(text, normalize_embeddings=True)
    return embedding.tolist()

# -----------------------------
# 3. MAIN RAG PIPELINE
# -----------------------------

def main():
    # -----------------------------
    # KNOWLEDGE BASE DOCUMENTS
    # -----------------------------
    documents = [
        {"id": "doc1", "text": "Python is a high-level programming language known for its simplicity and readability.", "category": "programming"},
        {"id": "doc2", "text": "Machine learning is a subset of artificial intelligence that enables systems to learn from data.", "category": "ai"},
        {"id": "doc3", "text": "Vector databases are specialized databases designed to store and query high-dimensional vectors.", "category": "databases"},
        {"id": "doc4", "text": "FastVecDB is a pure-Python vector search framework with intelligent caching.", "category": "databases"},
        {"id": "doc5", "text": "RAG combines retrieval of relevant documents with language model generation.", "category": "ai"},
    ]

    print("Initializing RAG pipeline...")

    # -----------------------------
    # GENERATE DOCUMENT EMBEDDINGS
    # -----------------------------
    vectors = []
    for doc in documents:
        vector = embed_text(doc["text"])
        vectors.append((doc, vector))

    dimension = len(vectors[0][1])

    # -----------------------------
    # INITIALIZE FASTVECDB
    # -----------------------------
    db = FastVecDB(
        storage_path="./rag_example_data_hf",
        dimension=dimension,
        similarity_metric=SimilarityMetric.COSINE,
        enable_cache=True
    )

    # -----------------------------
    # INDEX DOCUMENTS
    # -----------------------------
    for doc, vector in vectors:
        db.insert(
            vector_id=doc["id"],
            vector=vector,
            metadata={"text": doc["text"], "category": doc["category"]},
            bucket_id="knowledge_base"
        )

    # -----------------------------
    # RAG QUERY LOOP
    # -----------------------------
    queries = [
        "What is Python?",
        "Explain vector databases",
        "How does machine learning work?"
    ]

    for query in queries:
        print("\n" + "="*60)
        print(f"User Query: {query}")
        print("="*60)

        # -----------------------------
        # QUERY EMBEDDING
        # -----------------------------
        query_vector = embed_text(query)

        # -----------------------------
        # VECTOR RETRIEVAL
        # -----------------------------
        results = db.search(query_vector, top_k=5, bucket_ids=["knowledge_base"])

        # -----------------------------
        # METADATA FILTERING (POST-SEARCH)
        # -----------------------------
        filtered_results = [
            r for r in results
            if r["metadata"]["category"] in {"ai", "programming"}
        ]

        # -----------------------------
        # CONTEXT CONSTRUCTION
        # -----------------------------
        context = "\n".join(f"- {r['metadata']['text']}" for r in filtered_results)

        # -----------------------------
        # PROMPT CONSTRUCTION
        # -----------------------------
        prompt = f"""
Answer the question using only the context below.

Context:
{context}

Question:
{query}
"""

        # -----------------------------
        # LLM GENERATION (FLAN-T5)
        # -----------------------------
        # response = llm(
        #     prompt,
        #     max_new_tokens=200,
        #     do_sample=True,
        #     return_full_text=False
        # )[0]["generated_text"]
        response = llm(
            prompt,
            max_new_tokens=200,
            do_sample=True,
            return_full_text=False
        )
        # print(response)

        # -----------------------------
        # OUTPUT
        # -----------------------------
        print("\nRetrieved Documents:")
        for r in filtered_results:
            print(f"- {r['id']} | {r['metadata']['category']}")

        print("\nLLM Answer:")
        print(response)

    # -----------------------------
    # CACHE PERFORMANCE DEMO
    # -----------------------------
    print("\n" + "="*60)
    print("Cache Performance Test")
    print("="*60)

    query_vector = embed_text("Python programming")

    start = time.perf_counter()
    db.search(query_vector, top_k=3)
    cold_time = time.perf_counter() - start

    start = time.perf_counter()
    db.search(query_vector, top_k=3)
    warm_time = time.perf_counter() - start

    print(f"Cold query: {cold_time*1000:.2f} ms")
    print(f"Warm query: {warm_time*1000:.2f} ms")
    if warm_time > 0:
        print(f"Speedup: {cold_time / warm_time:.1f}x")
    else:
        print("Speedup: warm query was instantaneous (too fast to measure) âœ…")

    # -----------------------------
    # CLEANUP
    # -----------------------------
    db.close()
    print("\nRAG pipeline complete!")

# -----------------------------
# RUN
# -----------------------------
if __name__ == "__main__":
    main()


Loading weights: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 103/103 [00:00<00:00, 166.73it/s, Materializing param=pooler.dense.weight]                             
[1mBertModel LOAD REPORT[0m from: sentence-transformers/all-MiniLM-L6-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

[3mNotes:
- UNEXPECTED[3m	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.[0m
Loading weights: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 190/190 [00:01<00:00, 171.03it/s, Materializing param=shared.weight]                                                       
The model 'T5ForConditionalGeneration' is not supported for text-generation. Supported models are ['PeftModelForCausalLM', 'AfmoeForCausalLM', 'ApertusForCausalLM', 'ArceeForCausalLM', 'AriaTextForCausalLM', 'BambaForCausalLM', 'BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasu

Initializing RAG pipeline...


Both `max_new_tokens` (=200) and `max_length`(=20) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)



User Query: What is Python?


Both `max_new_tokens` (=200) and `max_length`(=20) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[{'generated_text': ''}]

Retrieved Documents:
- doc1 | programming
- doc2 | ai
- doc5 | ai

LLM Answer:

User Query: Explain vector databases
[{'generated_text': ''}]

Retrieved Documents:
- doc2 | ai
- doc1 | programming
- doc5 | ai

LLM Answer:

User Query: How does machine learning work?


Both `max_new_tokens` (=200) and `max_length`(=20) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[{'generated_text': ''}]

Retrieved Documents:
- doc2 | ai
- doc5 | ai
- doc1 | programming

LLM Answer:

Cache Performance Test
Cold query: 2.71 ms
Warm query: 0.79 ms
Speedup: 3.4x

RAG pipeline complete!


## Cache Test 

In [9]:
"""
FastVecDB Cache Quick Demo with Hugging Face Embeddings

This demo illustrates how enabling caching in FastVecDB
can dramatically speed up repeated similarity searches using
real embeddings from a Hugging Face SentenceTransformer model.
"""

import time  # Used to measure execution time
from fastvecdb import FastVecDB, SimilarityMetric  # Vector database and similarity metric
from sentence_transformers import SentenceTransformer  # Pre-trained NLP embeddings

def main():
    # ------------------------------------------------------
    # 0. Header
    # ------------------------------------------------------
    print("=" * 60)
    print("FastVecDB Cache Demo with Hugging Face Embeddings")
    print("=" * 60)

    # ------------------------------------------------------
    # 1. Load the Hugging Face embedding model
    # ------------------------------------------------------
    print("\n1. Loading Hugging Face embedding model...")
    # all-MiniLM-L6-v2 is a lightweight model with 384-dimensional embeddings
    # Small enough to be fast, but still produces meaningful semantic vectors
    model = SentenceTransformer("all-MiniLM-L6-v2")  

    # ------------------------------------------------------
    # 2. Prepare example documents
    # ------------------------------------------------------
    # Each document has a unique ID and some text content
    documents = [
        {"id": "doc1", "text": "FastVecDB is a high-performance vector database."},
        {"id": "doc2", "text": "Caching can make repeated queries much faster."},
        {"id": "doc3", "text": "Hugging Face provides state-of-the-art NLP models."},
        {"id": "doc4", "text": "Vector search allows finding similar items quickly."},
    ]

    # ------------------------------------------------------
    # 3. Initialize FastVecDB with caching enabled
    # ------------------------------------------------------
    print("\n2. Creating database with intelligent caching...")
    db = FastVecDB(
        storage_path="./huggingface_cache_demo",   # Where the database is stored on disk
        dimension=384,                              # Must match the embedding size from the model
        similarity_metric=SimilarityMetric.COSINE, # Use cosine similarity to compare vectors
        enable_cache=True                           # Enable caching for faster repeated queries
    )

    # ------------------------------------------------------
    # 4. Insert document embeddings into the database
    # ------------------------------------------------------
    print("\n3. Inserting embeddings into FastVecDB...")
    for doc in documents:
        # Convert the text into a numerical vector (embedding)
        embedding = model.encode(doc["text"]).tolist()
        # Insert the embedding and metadata into the database
        db.insert(doc["id"], embedding, metadata={"text": doc["text"]})
        print(f"   âœ“ Inserted {doc['id']}")

    # ------------------------------------------------------
    # 5. Prepare a query embedding
    # ------------------------------------------------------
    # The query text we want to find similar documents for
    query_text = "How can I search vectors quickly?"
    # Encode the query into an embedding vector
    query_embedding = model.encode(query_text).tolist()

    # ------------------------------------------------------
    # 6. First search (COLD cache)
    # ------------------------------------------------------
    # Since this is the first time searching, the cache is empty
    print("\n4. First search (COLD - no cache):")
    start = time.time()
    results1 = db.search(query_embedding, top_k=2)  # Retrieve top 2 most similar documents
    cold_time = time.time() - start  # Measure time taken
    print(f"   Time: {cold_time*1000:.2f}ms")
    # Display results
    for res in results1:
        print(f"   Found: {res['id']} (score: {res['score']:.4f})")
    
    # ------------------------------------------------------
    # 7. Second search (WARM cache)
    # ------------------------------------------------------
    # Now the cache has stored information from the first search
    # This makes retrieval much faster
    print("\n5. Second search (WARM - cache hit!):")
    start = time.time()
    results2 = db.search(query_embedding, top_k=2)
    warm_time = time.time() - start
    print(f"   Time: {warm_time*1000:.2f}ms")
    for res in results2:
        print(f"   Found: {res['id']} (score: {res['score']:.4f})")

    # Calculate and display speedup due to caching
    if warm_time > 0:
        speedup = cold_time / warm_time
        print(f"\n   ðŸš€ Cache made it {speedup:.1f}x faster!")

    # ------------------------------------------------------
    # 8. Show cache statistics
    # ------------------------------------------------------
    print("\n6. Cache Statistics:")
    stats = db.get_stats()
    if stats.get("cache_stats"):
        cache_stats = stats["cache_stats"]
        # Number of query results stored in the cache
        print(f"   Query cache: {cache_stats.get('query_cache', {}).get('size', 0)} queries cached")
        # Number of vectors kept in the hot vector cache
        print(f"   Hot vectors: {cache_stats.get('hot_vector_cache', {}).get('size', 0)} vectors cached")

    # ------------------------------------------------------
    # 9. Footer
    # ------------------------------------------------------
    print("\n" + "=" * 60)
    print("Key Benefit: Repeated queries with real embeddings are instant!")
    print("=" * 60)

    # Close database to release resources properly
    db.close()

if __name__ == "__main__":
    main()


FastVecDB Cache Demo with Hugging Face Embeddings

1. Loading Hugging Face embedding model...


Loading weights: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 103/103 [00:00<00:00, 135.35it/s, Materializing param=pooler.dense.weight]                             
[1mBertModel LOAD REPORT[0m from: sentence-transformers/all-MiniLM-L6-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

[3mNotes:
- UNEXPECTED[3m	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.[0m



2. Creating database with intelligent caching...

3. Inserting embeddings into FastVecDB...
   âœ“ Inserted doc1
   âœ“ Inserted doc2
   âœ“ Inserted doc3
   âœ“ Inserted doc4

4. First search (COLD - no cache):
   Time: 65.07ms
   Found: doc4 (score: 0.7179)
   Found: doc1 (score: 0.5489)

5. Second search (WARM - cache hit!):
   Time: 0.00ms
   Found: doc4 (score: 0.7179)
   Found: doc1 (score: 0.5489)

6. Cache Statistics:
   Query cache: 1 queries cached
   Hot vectors: 4 vectors cached

Key Benefit: Repeated queries with real embeddings are instant!
