In [None]:
import chromadb
from chromadb.utils import embedding_functions
import sentence_transformers

In [None]:
CHROMA_DATA_PATH = "/Users/davidkolb/Documents/Code/kolbeuk-data/vectordb/chroma_data/"
EMBED_MODEL = "all-MiniLM-L6-v2"
COLLECTION_NAME = "demo_docs"

client = chromadb.PersistentClient(path=CHROMA_DATA_PATH)

In [None]:
embedding_func = embedding_functions.SentenceTransformerEmbeddingFunction(
    model_name=EMBED_MODEL
)

collection = client.create_collection(
    name=COLLECTION_NAME,
    embedding_function=embedding_func,
    metadata={"hnsw:space": "cosine"},
)

In [None]:
documents = [
    "The latest iPhone model comes with impressive features and a powerful camera.",
    "Exploring the beautiful beaches and vibrant culture of Bali is a dream for many travelers.",
    "Einstein's theory of relativity revolutionized our understanding of space and time.",
    "Traditional Italian pizza is famous for its thin crust, fresh ingredients, and wood-fired ovens.",
    "The American Revolution had a profound impact on the birth of the United States as a nation.",
    "Regular exercise and a balanced diet are essential for maintaining good physical health.",
    "Leonardo da Vinci's Mona Lisa is considered one of the most iconic paintings in art history.",
    "Climate change poses a significant threat to the planet's ecosystems and biodiversity.",
    "Startup companies often face challenges in securing funding and scaling their operations.",
    "Beethoven's Symphony No. 9 is celebrated for its powerful choral finale, 'Ode to Joy.'",
]

genres = [
    "technology",
    "travel",
    "science",
    "food",
    "history",
    "fitness",
    "art",
    "climate change",
    "business",
    "music",
]

collection.add(
    documents=documents,
    ids=[f"id{i}" for i in range(len(documents))],
    metadatas=[{"genre": g} for g in genres]
)

In [None]:
query_results = collection.query(
    query_texts=["Find me some delicious food!"],
    n_results=1,
)
query_results.keys()
query_results["documents"]
query_results["ids"]
query_results["distances"]
query_results["metadatas"]

In [None]:
query_results = collection.query(
    query_texts=["Teach me about history",
                 "What's going on in the world?"],
    include=["documents", "distances"],
    n_results=2
)
query_results["documents"][0]
query_results["distances"][0]
query_results["documents"][1]
query_results["distances"][1]

In [None]:
print(query_results)

In [None]:
collection.query(
    query_texts=["Teach me about music history"],
    n_results=1
)


In [None]:
collection.query(
    query_texts=["Teach me about music history"],
    where={"genre": {"$eq": "music"}},
    n_results=1,
)

In [None]:
collection.update(
    ids=["id1", "id2"],
    documents=["The new iPhone is awesome!",
               "Bali has beautiful beaches"],
    metadatas=[{"genre": "tech"}, {"genre": "beaches"}]
)
query_results = collection.get(ids=["id1", "id2"])
query_results["documents"]
query_results["metadatas"]

In [None]:
collection.delete(ids=["id1", "id2"])
collection.count()
collection.get(["id1", "id2"])