In [None]:
# 📦 Install required packages
%pip install chromadb

# 💾 Embeddings with Persistent Storage in ChromaDB
This notebook demonstrates how to use ChromaDB with persistent storage to save your collection across runs.

In [None]:
import chromadb
from chromadb.utils import embedding_functions

# Set up default embedding function
default_ef = embedding_functions.DefaultEmbeddingFunction()

# Use PersistentClient to store Chroma data on disk
croma_client = chromadb.PersistentClient(path="./db/chroma_persist")

# Create or get collection
collection = croma_client.get_or_create_collection(
    name="my_story",
    embedding_function=default_ef
)

In [None]:
# Define text documents
documents = [
    {"id": "doc1", "text": "Hello, world!"},
    {"id": "doc2", "text": "How are you today?"},
    {"id": "doc3", "text": "Goodbye, see you later!"},
    {
        "id": "doc4",
        "text": "Microsoft is a technology company that develops software. It was founded by Bill Gates and Paul Allen in 1975."
    },
]

for doc in documents:
    collection.upsert(ids=doc["id"], documents=[doc["text"]])

In [None]:
# Define a query
query_text = "find document related to technology company"

results = collection.query(query_texts=[query_text], n_results=2)

for idx, document in enumerate(results["documents"][0]):
    doc_id = results["ids"][0][idx]
    distance = results["distances"][0][idx]
    print(f" For the query: {query_text},\n Found similar document: {document} (ID: {doc_id}, Distance: {distance})")