## Basic Usage - ChromaDb

In [6]:
import chromadb

# No data is persisted
chroma_client = chromadb.Client()
collection = chroma_client.create_collection(name="new_collection")

In [8]:
collection.add(
    documents=[
        "This is a document about football",
        "This is a document about rugby"
    ],
    ids=["id1", "id2"]
)


Add of existing embedding ID: id1
Add of existing embedding ID: id2
Insert of existing embedding ID: id1
Insert of existing embedding ID: id2


In [9]:
import json

results = collection.query(
    query_texts=["This is a query document about cricket"], 
    n_results=2 
)

print(json.dumps(results, indent=4))


{
    "ids": [
        [
            "id1",
            "id2"
        ]
    ],
    "embeddings": null,
    "documents": [
        [
            "This is a document about pineapple",
            "This is a document about oranges"
        ]
    ],
    "uris": null,
    "data": null,
    "metadatas": [
        [
            null,
            null
        ]
    ],
    "distances": [
        [
            1.4212119579315186,
            1.6473313570022583
        ]
    ],
    "included": [
        "distances",
        "documents",
        "metadatas"
    ]
}


## Using Embedding Pipeline from Hugging Face Transformers

In [75]:
import chromadb
from chromadb.utils import embedding_functions

client = chromadb.Client()


In [78]:
embedding_func = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="all-MiniLM-L6-v2")


# Create or load a collection
collection_name = "text_embeddings"
collection = client.get_or_create_collection(name=collection_name, embedding_function=embedding_func, metadata={"hnsw:space": "cosine"},)

In [72]:
# Example data
documents = [
    "Machine learning models are widely used for predictions.",
    "Neural networks are inspired by the human brain.",
    "ChromaDB is a vector database for embeddings.",
    "Python is a versatile programming language.",
    "Artificial intelligence is transforming industries."
]


Device set to use mps:0


In [79]:
# Add documents to the ChromaDB collection

ids = [f"doc_{i}" for i in range(len(documents))]

collection.add(
    ids=ids,
    documents=documents
)

print(f"Added {len(documents)} documents to the ChromaDB collection.")



Added 5 documents to the ChromaDB collection.


In [87]:
# Query ChromaDB
query = "Explain neural networks"


# Perform similarity search
results = collection.query(
    query_texts=[query],
    n_results=3
)

# Display results
print("\nQuery Results:")
for doc_id, doc, distance in zip(results['ids'][0], results['documents'][0], results['distances'][0]):
    print(f"Document ID: {doc_id}, Distance: {distance:.4f}")
    print(f"Content: {doc}\n")

# Optional: Persist database
#client.persist()``


Query Results:
Document ID: doc_1, Distance: 0.6707
Content: Neural networks are inspired by the human brain.

Document ID: doc_0, Distance: 1.2591
Content: Machine learning models are widely used for predictions.

Document ID: doc_4, Distance: 1.4476
Content: Artificial intelligence is transforming industries.

