In [20]:
from langchain.vectorstores import Chroma
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_core.documents.base import Document

embeddings= HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')

In [3]:
%pwd

'c:\\Users\\ROSHAN\\OneDrive\\Desktop\\Vs Code\\Python\\lang\\VectorStore'

In [None]:
from langchain_community.document_loaders.pdf import PyPDFLoader
from dotenv import load_dotenv

load_dotenv()

loader = PyPDFLoader('../Data/Time_and_Work_Formulas_Practice.pdf')

docs = list(loader.lazy_load()) # lazy laod returns an generator ...that why converting it into a list

print(docs)

[Document(metadata={'producer': 'PyFPDF 1.7.2 http://pyfpdf.googlecode.com/', 'creator': 'PyPDF', 'creationdate': 'D:20250719052708', 'source': '../Data/Time_and_Work_Formulas_Practice.pdf', 'total_pages': 2, 'page': 0, 'page_label': '1'}, page_content="Time & Work Formulas and Practice Problems\nFormulas:\n1. Individual Work Rate:\nIf A can do the work in X days, A's rate = 1/X work per day.\n2. Combined Work (Two People):\nIf A can do the work in X days and B in Y days,\nTime taken together = (X × Y) / (X + Y)\n3. Combined Work (Three People):\nIf A, B, and C can do the work in X, Y, Z days,\nCombined rate = 1/X + 1/Y + 1/Z\nTotal time = 1 / (Combined rate)\n4. Work Done in 'n' Days:\nWork done = n × (Individual or combined rate)\n5. Remaining Work Formula:\nIf partial work is done, Remaining work = 1 - (Work completed)\nPractice Problems:\n1. A can do a work in 12 days, B can do it in 16 days. How many days will they take together?\n2. A can finish a work in 10 days, B in 15 days, a

In [14]:
print(docs[0])

page_content='Time & Work Formulas and Practice Problems
Formulas:
1. Individual Work Rate:
If A can do the work in X days, A's rate = 1/X work per day.
2. Combined Work (Two People):
If A can do the work in X days and B in Y days,
Time taken together = (X × Y) / (X + Y)
3. Combined Work (Three People):
If A, B, and C can do the work in X, Y, Z days,
Combined rate = 1/X + 1/Y + 1/Z
Total time = 1 / (Combined rate)
4. Work Done in 'n' Days:
Work done = n × (Individual or combined rate)
5. Remaining Work Formula:
If partial work is done, Remaining work = 1 - (Work completed)
Practice Problems:
1. A can do a work in 12 days, B can do it in 16 days. How many days will they take together?
2. A can finish a work in 10 days, B in 15 days, and C in 30 days. How long will it take them to
complete the work together?
3. A and B can do a work together in 8 days. A alone can do it in 12 days. How many days will B
take to finish the work alone?' metadata={'producer': 'PyFPDF 1.7.2 http://pyfpdf.goog

In [15]:
print(type(docs[0]))

<class 'langchain_core.documents.base.Document'>


In [11]:
vectorstore=Chroma(
    embedding_function=embeddings,
    persist_directory='chroma_db',
    collection_name='sample'
)

In [12]:
vectorstore.add_documents(docs)

['405eaefc-f79c-476b-8bd6-a47057ff4403',
 '1fa2a150-0dfb-4ae8-8e9e-a6211a10db0a']

In [13]:
# view documents
vectorstore.get(include=['embeddings','documents', 'metadatas'])

{'ids': ['405eaefc-f79c-476b-8bd6-a47057ff4403',
  '1fa2a150-0dfb-4ae8-8e9e-a6211a10db0a'],
 'embeddings': array([[-3.03139985e-02,  4.33268994e-02,  2.98312050e-03,
         -3.91602702e-02, -5.31806350e-02, -2.68528741e-02,
         -4.08702940e-02, -3.63071114e-02, -4.40132013e-03,
          6.57053571e-03, -4.35499847e-03, -7.00094388e-04,
         -2.17926055e-02,  1.22700617e-01,  2.95118950e-02,
          3.05550694e-02, -6.37955591e-02, -1.81769431e-02,
         -1.11464515e-01, -1.35134179e-02,  2.65375841e-02,
         -1.22418568e-01, -2.33099423e-02,  6.74569700e-03,
          8.21573138e-02, -6.75434060e-03,  2.12259404e-02,
         -1.64648462e-02,  3.31578702e-02, -1.17377499e-02,
         -3.01582180e-03,  1.88926514e-02,  4.87720817e-02,
         -4.80002463e-02, -3.17066424e-02, -1.44992424e-02,
         -7.72012994e-02,  1.70090757e-02,  5.00453189e-02,
          1.45264287e-02, -7.25584850e-02,  1.24608204e-02,
          3.47472578e-02,  2.21436694e-02, -4.12524454

In [16]:
# search documents
vectorstore.similarity_search(
    query='which question is for work done in n days?',
    k=1
)

[Document(metadata={'page_label': '1', 'creator': 'PyPDF', 'producer': 'PyFPDF 1.7.2 http://pyfpdf.googlecode.com/', 'creationdate': 'D:20250719052708', 'total_pages': 2, 'page': 0, 'source': '../Data/Time_and_Work_Formulas_Practice.pdf'}, page_content="Time & Work Formulas and Practice Problems\nFormulas:\n1. Individual Work Rate:\nIf A can do the work in X days, A's rate = 1/X work per day.\n2. Combined Work (Two People):\nIf A can do the work in X days and B in Y days,\nTime taken together = (X × Y) / (X + Y)\n3. Combined Work (Three People):\nIf A, B, and C can do the work in X, Y, Z days,\nCombined rate = 1/X + 1/Y + 1/Z\nTotal time = 1 / (Combined rate)\n4. Work Done in 'n' Days:\nWork done = n × (Individual or combined rate)\n5. Remaining Work Formula:\nIf partial work is done, Remaining work = 1 - (Work completed)\nPractice Problems:\n1. A can do a work in 12 days, B can do it in 16 days. How many days will they take together?\n2. A can finish a work in 10 days, B in 15 days, a

In [17]:
# search documents
vectorstore.similarity_search_with_score(
    query='which question is for work done in n days?',
    k=1
)

[(Document(metadata={'page_label': '1', 'total_pages': 2, 'page': 0, 'creationdate': 'D:20250719052708', 'producer': 'PyFPDF 1.7.2 http://pyfpdf.googlecode.com/', 'source': '../Data/Time_and_Work_Formulas_Practice.pdf', 'creator': 'PyPDF'}, page_content="Time & Work Formulas and Practice Problems\nFormulas:\n1. Individual Work Rate:\nIf A can do the work in X days, A's rate = 1/X work per day.\n2. Combined Work (Two People):\nIf A can do the work in X days and B in Y days,\nTime taken together = (X × Y) / (X + Y)\n3. Combined Work (Three People):\nIf A, B, and C can do the work in X, Y, Z days,\nCombined rate = 1/X + 1/Y + 1/Z\nTotal time = 1 / (Combined rate)\n4. Work Done in 'n' Days:\nWork done = n × (Individual or combined rate)\n5. Remaining Work Formula:\nIf partial work is done, Remaining work = 1 - (Work completed)\nPractice Problems:\n1. A can do a work in 12 days, B can do it in 16 days. How many days will they take together?\n2. A can finish a work in 10 days, B in 15 days, 

In [25]:
# update document

update_doc1=Document(
    page_content='hello there i dont know what to write',
    metadata={'producer':'Roshan'}
)

vectorstore.update_document(document_id='405eaefc-f79c-476b-8bd6-a47057ff4403',document=update_doc1)

In [26]:
vectorstore.get(include=['embeddings','documents','metadatas'])

{'ids': ['405eaefc-f79c-476b-8bd6-a47057ff4403',
  '1fa2a150-0dfb-4ae8-8e9e-a6211a10db0a'],
 'embeddings': array([[-8.12359452e-02,  1.00782536e-01,  6.52246028e-02,
          3.46465483e-02, -2.71190964e-02, -5.96097019e-03,
          1.32117614e-01,  8.25791899e-03,  7.27630313e-03,
          2.57399231e-02, -1.96887739e-02, -3.98907065e-02,
         -6.15069456e-03, -5.27728572e-02,  5.89777902e-02,
          1.70831289e-02,  1.39872842e-02, -2.25598011e-02,
         -7.10151270e-02,  1.29074547e-02,  5.76988049e-02,
          5.15709445e-02, -5.95272668e-02,  1.10537810e-02,
         -1.39265522e-01,  4.12025526e-02,  2.20978223e-02,
          3.55228819e-02, -3.60905156e-02,  4.47143279e-02,
          2.80625623e-04,  1.89272910e-02,  4.15527374e-02,
         -1.43538564e-02,  5.78945838e-02,  6.11963794e-02,
         -8.47810321e-03, -6.46269843e-02, -6.03344990e-03,
         -3.34590301e-02,  4.17687669e-02, -6.92855716e-02,
          4.05624434e-02, -1.56532079e-02,  2.13690400

In [28]:
# delete document

vectorstore.delete(ids=['405eaefc-f79c-476b-8bd6-a47057ff4403'])

In [29]:
vectorstore.get(include=['embeddings','documents','metadatas'])

{'ids': ['1fa2a150-0dfb-4ae8-8e9e-a6211a10db0a'],
 'embeddings': array([[ 3.78106534e-02,  8.29224810e-02,  4.25691493e-02,
         -4.38981503e-03, -9.68769789e-02,  1.20759793e-02,
         -5.29643968e-02, -2.17280313e-02, -4.66065966e-02,
         -3.87680787e-03, -5.99372983e-02, -1.47981504e-02,
         -1.18080014e-02,  2.82115173e-02, -3.90375289e-03,
          3.64256427e-02, -8.12722817e-02, -2.68795397e-02,
         -1.59341484e-01, -5.45063987e-02,  2.07175184e-02,
         -1.32470727e-01, -3.59599367e-02,  1.96158532e-02,
          7.48308599e-02, -4.89939116e-02,  5.08938432e-02,
         -1.30740609e-02,  2.79074423e-02, -3.38515043e-02,
         -1.08565819e-02,  8.93222168e-03,  3.60554978e-02,
         -1.29768131e-02,  1.45618189e-02, -6.98679406e-03,
         -4.17214781e-02,  4.65083532e-02,  3.62482667e-02,
          4.35273051e-02, -3.93354110e-02,  1.24900835e-02,
         -2.69303098e-02,  5.30465730e-02, -5.88556379e-02,
          4.54862379e-02, -5.4137567