## Test chromadb

pip install chromadb


In [1]:
import chromadb
from chromadb.utils import embedding_functions
from chromadb.config import Settings

fake data

In [2]:
documents = [
    "Mars, often called the 'Red Planet', has captured the imagination of scientists and space enthusiasts alike.",
    "The Hubble Space Telescope has provided us with breathtaking images of distant galaxies and nebulae.",
    "The concept of a black hole, where gravity is so strong that nothing can escape it, was first theorized by Albert Einstein's theory of general relativity.",
    "The Renaissance was a pivotal period in history that saw a flourishing of art, science, and culture in Europe.",
    "The Industrial Revolution marked a significant shift in human society, leading to urbanization and technological advancements.",
    "The ancient city of Rome was once the center of a powerful empire that spanned across three continents.",
    "Dolphins are known for their high intelligence and social behavior, often displaying playful interactions with humans.",
    "The chameleon is a remarkable creature that can change its skin color to blend into its surroundings or communicate with other chameleons.",
    "The migration of monarch butterflies spans thousands of miles and involves multiple generations to complete.",
    "Christopher Nolan's 'Inception' is a mind-bending movie that explores the boundaries of reality and dreams.",
    "The 'Lord of the Rings' trilogy, directed by Peter Jackson, brought J.R.R. Tolkien's epic fantasy world to life on the big screen.",
    "Pixar's 'Toy Story' was the first feature-length film entirely animated using computer-generated imagery (CGI).",
    "Superman, known for his incredible strength and ability to fly, is one of the most iconic superheroes in comic book history.",
    "Black Widow, portrayed by Scarlett Johansson, is a skilled spy and assassin in the Marvel Cinematic Universe.",
    "The character of Iron Man, played by Robert Downey Jr., kickstarted the immensely successful Marvel movie franchise in 2008."
]

COnnect without auth

In [3]:
import requests

response = requests.get("http://localhost:5000/api/v1/heartbeat")
print(f"Status code: {response.status_code}")
print(f"Response: {response.json()}")

Status code: 200
Response: {'nanosecond heartbeat': 1741433003468940316}


In [4]:
!pip install -q langchain-chroma langchain-huggingface

In [5]:
from langchain_huggingface import HuggingFaceEmbeddings

embedding_function = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
text = "This is a test document."
embeddings = embedding_function.embed_query(text)

embeddings[:3]

[-0.04895172640681267, -0.039861924946308136, -0.021562771871685982]

In [7]:
# persist directory
persist_directory = "./database"
collection_name = "test_collection"

In [8]:
from langchain_chroma import Chroma

vector_store = Chroma(
    collection_name=collection_name,
    embedding_function=embedding_function,
    persist_directory=persist_directory,  # Where to save data locally, remove if not necessary
)

Add documents

In [10]:
from uuid import uuid4

from langchain_core.documents import Document

docs = []
for i, doc in enumerate(documents):
    doc_id = i+1
    document = Document(
        page_content=doc,
        id=doc_id,
        metadata={"title": f"None"}
        )
    docs.append(document)

#print(docs)

uuids = [str(uuid4()) for _ in range(len(docs))]

vector_store.add_documents(documents=docs, ids=uuids)

['b610c145-662c-4ab8-8a32-ee1ad6b4654c',
 '69fe1ccb-9370-4170-80c0-8261cbfb803b',
 '3b539818-3677-4757-94e4-1dec22fdb9f6',
 '470514b5-4e32-4a5b-a125-3442fe7889a8',
 '96fde4eb-db52-47a8-9d2a-4a6eba5a8371',
 'b3aa7b30-8cf4-4537-b956-dd6115c67f72',
 '4029e0ab-cfc5-4d54-81a9-5c372c902f32',
 'd067e6ad-868c-43d7-bd20-0584984a7827',
 'eb7e5d49-7940-4dc9-a215-8a45a2a77de8',
 'c0c797f9-495a-452f-b1d5-67147ecd11d9',
 'ae59dd12-7b6f-4db1-b8bb-43f9beed55eb',
 '1e0f6ca7-be0e-4891-93db-a84b4246d260',
 'b13f1f37-3b87-47b2-a69d-9dbbe4ab9f0b',
 'c8ae857a-5678-41db-844c-5d1fc09dfc8f',
 '751a616a-528a-43f8-a09a-d32505787d0a']

Test 

In [11]:
results = vector_store.similarity_search(
    "Christopher Nolan's 'Inception'",
    k=2,
    #filter={"source": "tweet"},
)
for res in results:
    print(f"* {res.page_content} [{res.metadata}]")

* Christopher Nolan's 'Inception' is a mind-bending movie that explores the boundaries of reality and dreams. [{'title': 'None'}]
* The 'Lord of the Rings' trilogy, directed by Peter Jackson, brought J.R.R. Tolkien's epic fantasy world to life on the big screen. [{'title': 'None'}]


## Test with chromadb client

In [12]:
# Connect with no authentication
#client = chromadb.HttpClient(host="localhost", port=5000)
client = chromadb.PersistentClient(path="./database")

# List available collections
print(client.list_collections())
# Create a Chroma instance
collection = client.get_collection(collection_name)


['test_collection']


In [13]:
query_text = "Christopher Nolan's 'Inception'"
# Embed the query text
query_embeddings = embedding_function.embed_query(query_text)

# Query the collection
results = collection.query(
    query_embeddings=query_embeddings,
    n_results=2,
)

# Check the results
#print(results)

# Print the results
for res in results["documents"][0]:
    print(res)
    #print(f"* {res.page_content} [{res.metadata}]")

Christopher Nolan's 'Inception' is a mind-bending movie that explores the boundaries of reality and dreams.
The 'Lord of the Rings' trilogy, directed by Peter Jackson, brought J.R.R. Tolkien's epic fantasy world to life on the big screen.


In [None]:
# delete collection
client.delete_collection(collection_name)

# List available collections
print(client.list_collections())

ValueError: Collection sample_collection does not exist.