In [7]:
# pip install chromadb

Collecting chromadb
  Downloading chromadb-1.0.3-cp39-abi3-win_amd64.whl.metadata (7.0 kB)
Collecting build>=1.0.3 (from chromadb)
  Using cached build-1.2.2.post1-py3-none-any.whl.metadata (6.5 kB)
Collecting chroma-hnswlib==0.7.6 (from chromadb)
  Using cached chroma_hnswlib-0.7.6-cp310-cp310-win_amd64.whl.metadata (262 bytes)
Collecting fastapi==0.115.9 (from chromadb)
  Downloading fastapi-0.115.9-py3-none-any.whl.metadata (27 kB)
Collecting uvicorn>=0.18.3 (from uvicorn[standard]>=0.18.3->chromadb)
  Using cached uvicorn-0.34.0-py3-none-any.whl.metadata (6.5 kB)
Collecting posthog>=2.4.0 (from chromadb)
  Downloading posthog-3.23.0-py2.py3-none-any.whl.metadata (3.0 kB)
Collecting onnxruntime>=1.14.1 (from chromadb)
  Downloading onnxruntime-1.21.0-cp310-cp310-win_amd64.whl.metadata (4.9 kB)
Collecting opentelemetry-api>=1.2.0 (from chromadb)
  Downloading opentelemetry_api-1.31.1-py3-none-any.whl.metadata (1.6 kB)
Collecting opentelemetry-exporter-otlp-proto-grpc>=1.2.0 (from chr

In [4]:
from langchain.schema import Document
from langchain_openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma


In [5]:
# create langchain document for iPL players
doc1 =    Document(page_content="Virat Kohli", metadata={"team": "RCB", "role": "Batsman"})
doc2 =    Document(page_content="MS Dhoni", metadata={"team": "CSK", "role": "Wicketkeeper"})
doc3 =    Document(page_content="Rohit Sharma", metadata={"team": "MI", "role": "Batsman"})
doc4 =    Document(page_content="Jasprit Bumrah", metadata={"team": "MI", "role": "Bowler"})
doc5 =    Document(page_content="Ravindra Jadeja", metadata={"team": "CSK", "role": "All-rounder"})
doc6 =    Document(page_content="AB de Villiers", metadata={"team": "RCB", "role": "Batsman"})
doc7 =    Document(page_content="David Warner", metadata={"team": "SRH", "role": "Batsman"})
doc8 =    Document(page_content="Kane Williamson", metadata={"team": "SRH", "role": "Batsman"})
doc9 =    Document(page_content="Ben Stokes", metadata={"team": "RR", "role": "All-rounder"})
doc10 = Document(page_content="Jos Buttler", metadata={"team": "RR", "role": "Wicketkeeper"})

In [8]:
vector_store = Chroma(embedding_function=OpenAIEmbeddings(), 
                    persist_directory="data/cheoma_db", 
                    collection_name="ipl_players")

In [9]:
vector_store.add_documents([doc1, doc2, doc3, doc4, doc5, doc6, doc7, doc8, doc9, doc10])

['276d26b6-4604-48fc-bdde-fba38fb07970',
 'f2e154d8-5b92-4381-9141-51b2ba6361f8',
 '0b7d9b48-82b5-4fcc-b307-ec43ff2a03d8',
 'dbb0e882-ce85-4489-999e-cb2e1e416fb8',
 '95b3b8cc-010b-4e45-a276-6989290d2903',
 '20d90905-a1e9-49b9-80d2-b189c39410ba',
 'fa02260c-b111-40df-a613-a3822dc40423',
 '70f64bd0-a763-43a4-ba4a-e591aed70aeb',
 '4eb0e6b0-1690-4896-86ff-cdda95969989',
 'e722182c-a5fd-4543-b719-80acc46e444f']

In [10]:
vector_store.get(include=["metadatas", "documents", "embeddings"])

{'ids': ['276d26b6-4604-48fc-bdde-fba38fb07970',
  'f2e154d8-5b92-4381-9141-51b2ba6361f8',
  '0b7d9b48-82b5-4fcc-b307-ec43ff2a03d8',
  'dbb0e882-ce85-4489-999e-cb2e1e416fb8',
  '95b3b8cc-010b-4e45-a276-6989290d2903',
  '20d90905-a1e9-49b9-80d2-b189c39410ba',
  'fa02260c-b111-40df-a613-a3822dc40423',
  '70f64bd0-a763-43a4-ba4a-e591aed70aeb',
  '4eb0e6b0-1690-4896-86ff-cdda95969989',
  'e722182c-a5fd-4543-b719-80acc46e444f'],
 'embeddings': array([[-0.01456909, -0.01055943,  0.02247441, ..., -0.00992599,
          0.00923554, -0.00790532],
        [-0.01855437,  0.00476378, -0.009264  , ..., -0.02157209,
          0.00710284,  0.00119671],
        [-0.01377398,  0.00108009,  0.00200977, ..., -0.01492608,
          0.01358196, -0.01201383],
        ...,
        [-0.01407709, -0.00359821, -0.0012005 , ..., -0.0081305 ,
          0.01522167,  0.00804499],
        [-0.01041185,  0.00083084, -0.00820288, ..., -0.01259445,
         -0.00714784, -0.00400912],
        [-0.03533101, -0.00546049, 

In [14]:
vector_store.similarity_search("who is a bowler", k=3)

[Document(metadata={'role': 'All-rounder', 'team': 'RR'}, page_content='Ben Stokes'),
 Document(metadata={'team': 'MI', 'role': 'Bowler'}, page_content='Jasprit Bumrah'),
 Document(metadata={'role': 'Batsman', 'team': 'SRH'}, page_content='David Warner')]

In [19]:
vector_store.similarity_search_with_score(query="", filter= {"team": "MI"})

[(Document(metadata={'role': 'Bowler', 'team': 'MI'}, page_content='Jasprit Bumrah'),
  0.6036392450332642),
 (Document(metadata={'team': 'MI', 'role': 'Batsman'}, page_content='Rohit Sharma'),
  0.6043736934661865)]

In [24]:
updated_doc1 = Document(page_content="Virat Kohli is the best batsman in thw world", metadata={"team": "RCB", "role": "Batsman", "age": 34})

In [25]:
vector_store.update_document(document_id = '276d26b6-4604-48fc-bdde-fba38fb07970',document=updated_doc1)

In [26]:
vector_store.get(include=["metadatas", "documents", "embeddings"])

{'ids': ['276d26b6-4604-48fc-bdde-fba38fb07970',
  'f2e154d8-5b92-4381-9141-51b2ba6361f8',
  '0b7d9b48-82b5-4fcc-b307-ec43ff2a03d8',
  'dbb0e882-ce85-4489-999e-cb2e1e416fb8',
  '95b3b8cc-010b-4e45-a276-6989290d2903',
  '20d90905-a1e9-49b9-80d2-b189c39410ba',
  'fa02260c-b111-40df-a613-a3822dc40423',
  '70f64bd0-a763-43a4-ba4a-e591aed70aeb',
  '4eb0e6b0-1690-4896-86ff-cdda95969989',
  'e722182c-a5fd-4543-b719-80acc46e444f'],
 'embeddings': array([[-0.00079955,  0.00703137,  0.02337117, ..., -0.00937724,
          0.00113149, -0.01079102],
        [-0.01855437,  0.00476378, -0.009264  , ..., -0.02157209,
          0.00710284,  0.00119671],
        [-0.01377398,  0.00108009,  0.00200977, ..., -0.01492608,
          0.01358196, -0.01201383],
        ...,
        [-0.01407709, -0.00359821, -0.0012005 , ..., -0.0081305 ,
          0.01522167,  0.00804499],
        [-0.01041185,  0.00083084, -0.00820288, ..., -0.01259445,
         -0.00714784, -0.00400912],
        [-0.03533101, -0.00546049, 

In [27]:
vector_store.delete(ids = ['276d26b6-4604-48fc-bdde-fba38fb07970'])

In [28]:
vector_store.get(include=["metadatas", "documents", "embeddings"])

{'ids': ['f2e154d8-5b92-4381-9141-51b2ba6361f8',
  '0b7d9b48-82b5-4fcc-b307-ec43ff2a03d8',
  'dbb0e882-ce85-4489-999e-cb2e1e416fb8',
  '95b3b8cc-010b-4e45-a276-6989290d2903',
  '20d90905-a1e9-49b9-80d2-b189c39410ba',
  'fa02260c-b111-40df-a613-a3822dc40423',
  '70f64bd0-a763-43a4-ba4a-e591aed70aeb',
  '4eb0e6b0-1690-4896-86ff-cdda95969989',
  'e722182c-a5fd-4543-b719-80acc46e444f'],
 'embeddings': array([[-0.01855437,  0.00476378, -0.009264  , ..., -0.02157209,
          0.00710284,  0.00119671],
        [-0.01377398,  0.00108009,  0.00200977, ..., -0.01492608,
          0.01358196, -0.01201383],
        [-0.03059122,  0.01082909,  0.02247258, ..., -0.02781714,
          0.00993197, -0.01889682],
        ...,
        [-0.01407709, -0.00359821, -0.0012005 , ..., -0.0081305 ,
          0.01522167,  0.00804499],
        [-0.01041185,  0.00083084, -0.00820288, ..., -0.01259445,
         -0.00714784, -0.00400912],
        [-0.03533101, -0.00546049, -0.00637959, ...,  0.00366961,
          0