In [None]:
# Install necessary dependencies
%pip install chromadb sentence-transformers

In [1]:
import chromadb
from chromadb.utils import embedding_functions

In [2]:
# Basic client initialization
"""Initialize a basic in-memory Chroma client"""
print("Initializing Chroma client...")
client = chromadb.Client()

# Create embedding function
embedding_function = embedding_functions.SentenceTransformerEmbeddingFunction(
    model_name="all-MiniLM-L6-v2"
)

Initializing Chroma client...


In [3]:
# Create new collection
collection = client.create_collection(
    name="documents",
    embedding_function=embedding_function
)

In [4]:
# Display results helper
def display_results(results):
  print("\nResults:")
  for i, (doc, doc_id, metadata, distance) in enumerate(zip(
      results['documents'][0],
      results['ids'][0],
      results['metadatas'][0],
      results['distances'][0]
  )):
      print(f"{i+1}. Document: {doc}")
      print(f"   ID: {doc_id}")
      print(f"   Metadata: {metadata}")
      print(f"   Distance: {distance}")
      print()

In [5]:
# Demo 1: Basic Vector Operations
print("\n=== BASIC VECTOR OPERATIONS ===")

# Some example documents
documents = [
    "The quick brown fox jumps over the lazy dog",
    "A man is walking his dog in the park",
    "The weather is sunny and warm today",
    "Artificial intelligence is transforming the technology landscape",
    "Vector databases are essential for semantic search applications",
    "Deep learning models require substantial computational resources",
    "The city skyline looks beautiful at sunset",
    "Machine learning algorithms find patterns in data"
]
ids = ["doc1", "doc2", "doc3", "doc4", "doc5", "doc6", "doc7", "doc8"]

# Add these documents to collection
print("Adding documents to collection...")
collection.add(
    documents=documents,
    ids=ids
)

# Get collection count
count = collection.count()
print(f"Collection now contains {count} documents")

# Query the collection
query_text = "AI and technology trends"
print(f"\nPerforming similarity search for: '{query_text}'")

results = collection.query(
    query_texts=[query_text],
    n_results=3
)

# Display results
display_results(results)


=== BASIC VECTOR OPERATIONS ===
Adding documents to collection...
Collection now contains 8 documents

Performing similarity search for: 'AI and technology trends'

Results:
1. Document: Artificial intelligence is transforming the technology landscape
   ID: doc4
   Metadata: None
   Distance: 0.6472851634025574

2. Document: Machine learning algorithms find patterns in data
   ID: doc8
   Metadata: None
   Distance: 1.364165186882019

3. Document: Deep learning models require substantial computational resources
   ID: doc6
   Metadata: None
   Distance: 1.4002103805541992



In [6]:
print("\n=== METADATA AND FILTERING ===")
filtered_docs_collection = client.create_collection(
            name="filtered_documents",
            embedding_function=embedding_function
            )

metadatas = [
    {"category": "animal", "length": "short", "year": 2021},
    {"category": "lifestyle", "length": "short", "year": 2022},
    {"category": "weather", "length": "short", "year": 2023},
    {"category": "technology", "length": "medium", "year": 2023},
    {"category": "technology", "length": "medium", "year": 2024},
    {"category": "technology", "length": "long", "year": 2024},
    {"category": "travel", "length": "short", "year": 2023},
    {"category": "technology", "length": "medium", "year": 2024}
]

# Add documents with metadata
print("Adding documents with metadata...")
filtered_docs_collection.add(
    documents=documents,
    ids=ids,
    metadatas=metadatas
)

# Get collection count
count = filtered_docs_collection.count()
print(f"Filtered Docs Collection now contains {count} documents.")


=== METADATA AND FILTERING ===
Adding documents with metadata...
Filtered Docs Collection now contains 8 documents.


In [7]:
# Simple metadata filtering
print("\nFiltering by category 'technology':")
results = filtered_docs_collection.query(
    query_texts=["AI advancements"],
    n_results=3,
    where={"category": "technology"}
)

# Display results
display_results(results)


Filtering by category 'technology':

Results:
1. Document: Artificial intelligence is transforming the technology landscape
   ID: doc4
   Metadata: {'category': 'technology', 'length': 'medium', 'year': 2023}
   Distance: 0.8660715818405151

2. Document: Machine learning algorithms find patterns in data
   ID: doc8
   Metadata: {'category': 'technology', 'length': 'medium', 'year': 2024}
   Distance: 1.3540159463882446

3. Document: Deep learning models require substantial computational resources
   ID: doc6
   Metadata: {'category': 'technology', 'length': 'long', 'year': 2024}
   Distance: 1.3604629039764404



In [7]:
# Complex filtering
print("\nComplex filtering (technology documents from 2024):")
results = filtered_docs_collection.query(
    query_texts=["AI advancements"],
    n_results=3,
    where={"$and": [
        {"category": {"$eq": "technology"}},
        {"year": {"$eq": 2024}}
    ]}
)

display_results(results)


Complex filtering (technology documents from 2024):

Results:
1. Document: Machine learning algorithms find patterns in data
   ID: doc8
   Metadata: {'category': 'technology', 'length': 'medium', 'year': 2024}
   Distance: 1.3540159463882446

2. Document: Deep learning models require substantial computational resources
   ID: doc6
   Metadata: {'category': 'technology', 'length': 'long', 'year': 2024}
   Distance: 1.3604629039764404

3. Document: Vector databases are essential for semantic search applications
   ID: doc5
   Metadata: {'category': 'technology', 'length': 'medium', 'year': 2024}
   Distance: 1.5392829179763794



In [9]:
# Using where_document
print("\nFiltering documents containing 'Artificial intelligence ':")
results = filtered_docs_collection.query(
    query_texts=["AI advancements"],
    n_results=3,
    where_document={"$contains": "Artificial intelligence"}
)
display_results(results)


Filtering documents containing 'Artificial intelligence ':

Results:
1. Document: Artificial intelligence is transforming the technology landscape
   ID: doc4
   Metadata: {'category': 'technology', 'length': 'medium', 'year': 2023}
   Distance: 0.8660715818405151

