In [None]:
import chromadb
from chromadb.utils import embedding_functions

In [None]:
CHROMA_DATA_PATH = "./.chroma_data/"
EMBED_MODEL = "all-MiniLM-L6-v2"
COLLECTION_NAME = "demo_docs"

In [None]:
client = chromadb.PersistentClient(path=CHROMA_DATA_PATH)

In [None]:
embedding_func = embedding_functions.SentenceTransformerEmbeddingFunction(
    model_name=EMBED_MODEL, device="cuda:0"
)

In [None]:
collection = client.create_collection(
    name=COLLECTION_NAME,
    embedding_function=embedding_func,
    metadata={"hnsw:space": "cosine"},
)

In [None]:
documents = [
    "The latest iPhone model comes with impressive features and a powerful camera.",
    "Exploring the beautiful beaches and vibrant culture of Bali is a dream for many travelers.",
    "Einstein's theory of relativity revolutionized our understanding of space and time.",
    "Traditional Italian pizza is famous for its thin crust, fresh ingredients, and wood-fired ovens.",
    "The American Revolution had a profound impact on the birth of the United States as a nation.",
    "Regular exercise and a balanced diet are essential for maintaining good physical health.",
    "Leonardo da Vinci's Mona Lisa is considered one of the most iconic paintings in art history.",
    "Climate change poses a significant threat to the planet's ecosystems and biodiversity.",
    "Startup companies often face challenges in securing funding and scaling their operations.",
    "Beethoven's Symphony No. 9 is celebrated for its powerful choral finale, 'Ode to Joy.'",
]

In [None]:
genres = [
    "technology",
    "travel",
    "science",
    "food",
    "history",
    "fitness",
    "art",
    "climate change",
    "business",
    "music",
]

In [None]:
collection.add(
    ids=[f"id{i}" for i in range(len(documents))],
    metadatas=[{"gen": g} for g in genres],
    documents=documents,
)

In [None]:
query_results = collection.query(
    query_texts=["Find me some delicious food!", "Beethoven"], n_results=2
)

In [None]:
query_results["documents"]

In [None]:
query_results["ids"]

In [None]:
query_results["distances"]

In [None]:
query_results["metadatas"]

In [None]:
collection.query(
    query_texts=["Teach me about music history"],
    n_results=2,
    where={"gen": {"$in": ["music", "history"]}},
)

In [None]:
collection.update(
    ids=["id1", "id2"],
    documents=["The new iPhone is awesome!", "Bali has beautiful beaches"],
    metadatas=[{"gen": "tech"}, {"gen": "beaches"}],
)

In [None]:
collection.get(ids=["id1", "id2"])

In [None]:
collection.delete(ids=["id1", "id2"])

collection.count()