# What is ChromaDB
[Refer](00_ChromaDB.ipynb)



In [2]:
# Virtual Env.
# cd /Users/tripathimachine/Desktop/Apps/GitHub_Repo/Python-Playground/VectorDB
# python3 -m venv .vectordbenv

# Activate the virtual environment
# source .vectordbenv/bin/activate

# Restart the VS Code is the virtual environment is not accessiable while changing the Kernel.

In [3]:
# !pip3 install chromadb pandas

In [4]:
# CHeck if chromadb is installed with what version
import importlib.metadata
def check_chromadb_version():
    try:
        version = importlib.metadata.version("chromadb")
        print(f"chromadb version: {version}")
    except importlib.metadata.PackageNotFoundError:
        print("chromadb is not installed.")

check_chromadb_version()

chromadb version: 1.0.11


In [6]:
import chromadb
from datetime import datetime
chroma_client = chromadb.Client()

In [7]:
# Crete a collection
collection = chroma_client.create_collection(
    name="test_collection",
    metadata={
        "description": "A collection for testing purposes",
        "created_at": datetime.now().isoformat()
    }    
)

In [8]:
# Add documents to the collection
collection.add(
    documents=[
        "Hello, world!", 
        "ChromaDB is great!", 
        "Python is awesome!",
        "Messi and Ronaldo are two of the greatest footballers of all time.",
        "India is a famous country and rich in culture.",
        "Weather is getting hotter every year.",
        "The stock market is unpredictable.",
        "Apples are a great source of vitamins.",
        "Bananas are a good source of potassium.",
        "Oranges are rich in vitamin C.",
        "The Eiffel Tower is in Paris.",
    ],
    metadatas=[
        {"source": "greeting"}, 
        {"source": "statement"}, 
        {"source": "opinion"},
        {"source": "sports"},
        {"source": "geography"},
        {"source": "climate"},
        {"source": "finance"},
        {"source": "nutrition"},
        {"source": "nutrition"},
        {"source": "nutrition"},
        {"source": "landmark"}
    ],
    ids=["doc1", "doc2", "doc3", "doc4", "doc5", "doc6", "doc7", "doc8", "doc9", "doc10", "doc11"]
)


/Users/tripathimachine/.cache/chroma/onnx_models/all-MiniLM-L6-v2/onnx.tar.gz: 100%|██████████| 79.3M/79.3M [00:07<00:00, 10.4MiB/s]


In [9]:
collection.get()

{'ids': ['doc1',
  'doc2',
  'doc3',
  'doc4',
  'doc5',
  'doc6',
  'doc7',
  'doc8',
  'doc9',
  'doc10',
  'doc11'],
 'embeddings': None,
 'documents': ['Hello, world!',
  'ChromaDB is great!',
  'Python is awesome!',
  'Messi and Ronaldo are two of the greatest footballers of all time.',
  'India is a famous country and rich in culture.',
  'Weather is getting hotter every year.',
  'The stock market is unpredictable.',
  'Apples are a great source of vitamins.',
  'Bananas are a good source of potassium.',
  'Oranges are rich in vitamin C.',
  'The Eiffel Tower is in Paris.'],
 'uris': None,
 'included': ['metadatas', 'documents'],
 'data': None,
 'metadatas': [{'source': 'greeting'},
  {'source': 'statement'},
  {'source': 'opinion'},
  {'source': 'sports'},
  {'source': 'geography'},
  {'source': 'climate'},
  {'source': 'finance'},
  {'source': 'nutrition'},
  {'source': 'nutrition'},
  {'source': 'nutrition'},
  {'source': 'landmark'}]}

In [11]:
# Query the collection
results = collection.query(
    query_texts=[
        "What is the capital of France?", 
        "What is the weather like?"
    ],
    n_results=2
)

print("Query Results:")
print(results)

Query Results:
{'ids': [['doc11', 'doc5'], ['doc6', 'doc1']], 'embeddings': None, 'documents': [['The Eiffel Tower is in Paris.', 'India is a famous country and rich in culture.'], ['Weather is getting hotter every year.', 'Hello, world!']], 'uris': None, 'included': ['metadatas', 'documents', 'distances'], 'data': None, 'metadatas': [[{'source': 'landmark'}, {'source': 'geography'}], [{'source': 'climate'}, {'source': 'greeting'}]], 'distances': [[1.1760705709457397, 1.6459715366363525], [1.0603718757629395, 1.5871087312698364]]}


In [12]:
# add / Upsert a document
collection.upsert(
    documents=["The capital of France is Paris."],
    metadatas=[{"source": "geography"}],
    ids=["doc12"]
) 

In [13]:
collection.get()

{'ids': ['doc1',
  'doc2',
  'doc3',
  'doc4',
  'doc5',
  'doc6',
  'doc7',
  'doc8',
  'doc9',
  'doc10',
  'doc11',
  'doc12'],
 'embeddings': None,
 'documents': ['Hello, world!',
  'ChromaDB is great!',
  'Python is awesome!',
  'Messi and Ronaldo are two of the greatest footballers of all time.',
  'India is a famous country and rich in culture.',
  'Weather is getting hotter every year.',
  'The stock market is unpredictable.',
  'Apples are a great source of vitamins.',
  'Bananas are a good source of potassium.',
  'Oranges are rich in vitamin C.',
  'The Eiffel Tower is in Paris.',
  'The capital of France is Paris.'],
 'uris': None,
 'included': ['metadatas', 'documents'],
 'data': None,
 'metadatas': [{'source': 'greeting'},
  {'source': 'statement'},
  {'source': 'opinion'},
  {'source': 'sports'},
  {'source': 'geography'},
  {'source': 'climate'},
  {'source': 'finance'},
  {'source': 'nutrition'},
  {'source': 'nutrition'},
  {'source': 'nutrition'},
  {'source': 'landm