In [2]:
from sentence_transformers import SentenceTransformer

# Load model
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

# Some text to embed
texts = [
    "Cats love sleeping.",
    "Dogs are playful animals.",
    "I enjoy programming in Python.",
    "Redis can store vectors too!"
]

# Get embeddings: a list of lists of numbers
embeddings = model.encode(texts)

print("Embeddings shape:", embeddings.shape)
print("Example vector:", embeddings[0])

Embeddings shape: (4, 384)
Example vector: [ 7.87396207e-02  1.93886999e-02  2.76507586e-02  8.64654109e-02
 -1.48537653e-02  1.60661265e-02 -1.60285213e-03 -3.76146659e-02
  1.26939770e-02  6.24378286e-02 -1.05130471e-01 -1.56330224e-02
  5.26266731e-02  5.99600859e-02 -1.59648228e-02  3.70125659e-02
 -4.81112301e-02  1.78382788e-02  1.17884483e-02  3.16210203e-02
 -7.95206148e-03  4.22672741e-02  1.44481838e-01  6.89757580e-04
  5.40208910e-03  2.98592951e-02 -6.59526139e-03 -2.89774537e-02
 -2.43121497e-02  2.26081605e-03 -1.11427456e-01  4.93873581e-02
 -4.31133546e-02 -2.09824666e-02  3.36226784e-02 -1.79526005e-02
  1.24976179e-02  1.35822315e-02  6.13042433e-03  6.93586171e-02
  4.46974672e-02 -1.30910855e-02  2.06832960e-02 -3.53311598e-02
 -1.30466491e-01  1.02433870e-02  1.59708143e-03 -5.77524342e-02
  5.72035611e-02  3.62285562e-02 -1.71939284e-03  3.27521153e-02
 -8.71543884e-02 -2.84104291e-02  1.83426198e-02  2.30656378e-02
  5.38111664e-02 -1.74625050e-02  2.92159021e-0

In [3]:
import chromadb
from chromadb.config import Settings

# Create Chroma client
client = chromadb.Client(Settings(
    chroma_db_impl="duckdb+parquet",
    persist_directory="./chroma_db"
))

# Create or get collection
collection = client.get_or_create_collection("my_collection")

# Add embeddings
collection.add(
    documents=texts,
    embeddings=embeddings.tolist(),  # Make sure it's a list of lists
    ids=[f"id_{i}" for i in range(len(texts))]
)

# Search for similar text
query = "Which animal sleeps a lot?"
query_embedding = model.encode([query])

results = collection.query(
    query_embeddings=query_embedding.tolist(),
    n_results=2
)

print("Results:", results)


ValueError: [91mYou are using a deprecated configuration of Chroma.

[94mIf you do not have data you wish to migrate, you only need to change how you construct
your Chroma client. Please see the "New Clients" section of https://docs.trychroma.com/deployment/migration.
________________________________________________________________________________________________

If you do have data you wish to migrate, we have a migration tool you can use in order to
migrate your data to the new Chroma architecture.
Please `pip install chroma-migrate` and run `chroma-migrate` to migrate your data and then
change how you construct your Chroma client.

See https://docs.trychroma.com/deployment/migration for more information or join our discord at https://discord.gg/MMeYNTmh3x for help![0m

In [1]:
from langchain_huggingface import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")

In [None]:
from langchain_chroma import Chroma

vector_store = Chroma(
    collection_name="example_collection",
    embedding_function=embeddings,
    persist_directory="./chroma_langchain_db",
)

: 

In [None]:
from uuid import uuid4

from langchain_core.documents import Document

document_1 = Document(
    page_content="I had chocolate chip pancakes and scrambled eggs for breakfast this morning.",
    metadata={"source": "tweet"},
    id=1,
)

document_2 = Document(
    page_content="The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.",
    metadata={"source": "news"},
    id=2,
)

document_3 = Document(
    page_content="Building an exciting new project with LangChain - come check it out!",
    metadata={"source": "tweet"},
    id=3,
)

document_4 = Document(
    page_content="Robbers broke into the city bank and stole $1 million in cash.",
    metadata={"source": "news"},
    id=4,
)

document_5 = Document(
    page_content="Wow! That was an amazing movie. I can't wait to see it again.",
    metadata={"source": "tweet"},
    id=5,
)

document_6 = Document(
    page_content="Is the new iPhone worth the price? Read this review to find out.",
    metadata={"source": "website"},
    id=6,
)

document_7 = Document(
    page_content="The top 10 soccer players in the world right now.",
    metadata={"source": "website"},
    id=7,
)

document_8 = Document(
    page_content="LangGraph is the best framework for building stateful, agentic applications!",
    metadata={"source": "tweet"},
    id=8,
)

document_9 = Document(
    page_content="The stock market is down 500 points today due to fears of a recession.",
    metadata={"source": "news"},
    id=9,
)

document_10 = Document(
    page_content="I have a bad feeling I am going to get deleted :(",
    metadata={"source": "tweet"},
    id=10,
)

documents = [
    document_1,
    document_2,
    document_3,
    document_4,
    document_5,
    document_6,
    document_7,
    document_8,
    document_9,
    document_10,
]
uuids = [str(uuid4()) for _ in range(len(documents))]

vector_store.add_documents(documents=documents, ids=uuids)

In [None]:
updated_document_1 = Document(
    page_content="I had chocolate chip pancakes and fried eggs for breakfast this morning.",
    metadata={"source": "tweet"},
    id=1,
)

updated_document_2 = Document(
    page_content="The weather forecast for tomorrow is sunny and warm, with a high of 82 degrees.",
    metadata={"source": "news"},
    id=2,
)

vector_store.update_document(document_id=uuids[0], document=updated_document_1)
# You can also update multiple documents at once
vector_store.update_documents(
    ids=uuids[:2], documents=[updated_document_1, updated_document_2]
)

In [None]:
results = vector_store.similarity_search(
    "LangChain provides abstractions to make working with LLMs easy",
    k=2,
    filter={"source": "tweet"},
)
for res in results:
    print(f"* {res.page_content} [{res.metadata}]")

In [None]:
results = vector_store.similarity_search_with_score(
    "Will it be hot tomorrow?", k=1, filter={"source": "news"}
)
for res, score in results:
    print(f"* [SIM={score:3f}] {res.page_content} [{res.metadata}]")

In [None]:
results = vector_store.similarity_search_by_vector(
    embedding=embeddings.embed_query("I love green eggs and ham!"), k=1
)
for doc in results:
    print(f"* {doc.page_content} [{doc.metadata}]")

In [None]:
retriever = vector_store.as_retriever(
    search_type="mmr", search_kwargs={"k": 1, "fetch_k": 5}
)
retriever.invoke("Stealing from the bank is a crime", filter={"source": "news"})