In [1]:
#!/home/tst_imperial/langchain/venv/bin/python

In [2]:
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma

In [3]:
from langchain.schema import Document

docs = [
    Document(
        page_content="India chased 280 with 2 wickets in hand against Australia.",
        metadata={
            "id": "doc_001",
            "title": "India vs Australia Match Report",
            "document_type": "match_report"
        }
    ),
    Document(
        page_content="Right-handed opener with strong off-side play and weaknesses against spin.",
        metadata={
            "id": "doc_002",
            "title": "Batter Profile",
            "document_type": "player_profile"
        }
    ),
    Document(
        page_content="Left-arm pacer specializing in death overs with yorkers and slower balls.",
        metadata={
            "id": "doc_003",
            "title": "Bowler Profile",
            "document_type": "player_profile"
        }
    ),
    Document(
        page_content="ICC announces the 2025 Champions Trophy schedule in India and Sri Lanka.",
        metadata={
            "id": "doc_004",
            "title": "Champions Trophy 2025 Schedule",
            "document_type": "news"
        }
    ),
    Document(
        page_content="Pakistan vs England — focus on England's top order and Pakistan’s pace attack.",
        metadata={
            "id": "doc_005",
            "title": "Match Preview: Pakistan vs England",
            "document_type": "preview"
        }
    )
]


In [4]:
embedding = HuggingFaceEmbeddings(model="sentence-transformers/all-MiniLM-L6-v2")

In [5]:
vector_store = Chroma(
    embedding_function=embedding,
    persist_directory="1Vector_db",
    collection_name="test"
)

  vector_store = Chroma(


In [6]:
vector_store.add_documents(docs)

['5b907601-09a3-4919-ac97-d63e538081db',
 '08b1b4ca-26f7-4a56-8995-7d11e9e6c36f',
 '509adcdf-8ade-49dc-be08-360b8447cea2',
 '2fa9dcbe-1600-4990-84b2-f4f330a49beb',
 '0ff61a80-2ac3-4fb6-b7bb-aa9b307afcee']

In [7]:
vector_store.get(include=["embeddings","documents","metadatas"])

{'ids': ['56170061-61cb-498d-8ca6-8d0eb1de11ea',
  '66821d9c-6e20-4590-9076-803df98fe87d',
  '5b907601-09a3-4919-ac97-d63e538081db',
  '08b1b4ca-26f7-4a56-8995-7d11e9e6c36f',
  '509adcdf-8ade-49dc-be08-360b8447cea2',
  '2fa9dcbe-1600-4990-84b2-f4f330a49beb',
  '0ff61a80-2ac3-4fb6-b7bb-aa9b307afcee'],
 'embeddings': array([[-0.00649914,  0.09898605, -0.06050067, ...,  0.0725149 ,
          0.04145559, -0.01841052],
        [ 0.06280218,  0.14011365, -0.12169641, ...,  0.00817483,
          0.03673142, -0.02810368],
        [-0.00088962,  0.04321384, -0.10279673, ..., -0.05970604,
         -0.03915942, -0.019388  ],
        ...,
        [-0.01425033,  0.12296186, -0.03429231, ..., -0.03929055,
          0.0305845 ,  0.09356885],
        [-0.02922237,  0.02129599,  0.0040547 , ...,  0.0049027 ,
          0.01112   , -0.06170088],
        [-0.03655049,  0.08281964, -0.01548659, ...,  0.000214  ,
          0.09311094,  0.01251   ]], shape=(7, 384)),
 'documents': ['India defeated Australia 

In [10]:
vector_store.similarity_search(
    query="Schedule of the 2025 Champions Trophy?",
    k=1
)

[Document(metadata={'id': 'doc_004', 'title': 'Champions Trophy 2025 Schedule', 'document_type': 'news'}, page_content='ICC announces the 2025 Champions Trophy schedule in India and Sri Lanka.')]

In [11]:
vector_store.similarity_search_with_score(
    query="Schedule of the 2025 Champions Trophy?",
    k=3
)

[(Document(metadata={'id': 'doc_004', 'document_type': 'news', 'title': 'Champions Trophy 2025 Schedule'}, page_content='ICC announces the 2025 Champions Trophy schedule in India and Sri Lanka.'),
  0.6108624935150146),
 (Document(metadata={'document_type': 'preview', 'title': 'Match Preview: Pakistan vs England', 'id': 'doc_005'}, page_content="Pakistan vs England — focus on England's top order and Pakistan’s pace attack."),
  1.5457468032836914),
 (Document(metadata={'year': 2023, 'tags': 'player-performance, India'}, page_content='Virat Kohli scored a century.'),
  1.5836598873138428)]