In [7]:
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain_ollama import OllamaEmbeddings
from langchain.vectorstores import Chroma

persist_directory = "../../data/chroma1/"

# Use Ollama based Llama model from local machine
embeddings = OllamaEmbeddings(
    model="llama3.2",
    temperature=0
)

# Load the docuemnts 
loader = PyPDFDirectoryLoader("../../data/pdf/")
docs = loader.load()

# Generate embeddings and persist in the vector store. Uses LangChain's Recursive Text chunking by default.
#TODO: Customise chunk size, overlap and other params
#TODO: Add additional metadata to the chunk Document objects for use during retrieval.  
chroma_db = Chroma.from_documents(
    documents=docs,
    embedding = embeddings,
    persist_directory=persist_directory,
    collection_name="doc_search_demo"
)
chroma_db.persist()

  chroma_db.persist()


In [None]:
# 
from langchain_ollama.llms import OllamaLLM
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate


llm = OllamaLLM(
    model="llama3.2",
    temperature=0
)

retriever = chroma_db.as_retriever(search_kwargs={'k': 3})

system_prompt = (
    "Use the given context to answer the question. "
    "If you don't know the answer, say you don't know. "
    "Use three sentence maximum and keep the answer concise. "
    "Context: {context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

question_answer_chain = create_stuff_documents_chain(llm, prompt)

chain = create_retrieval_chain(retriever, question_answer_chain)
chain.invoke({"input": "What is Taj Mahal famous for?"})


{'input': 'What is Taj Mahal famous for?',
 'context': [Document(metadata={'creationdate': '2025-03-27T12:33:00+00:00', 'creator': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/128.0.0.0 Safari/537.36', 'moddate': '2025-03-27T12:33:00+00:00', 'page': 0, 'page_label': '1', 'producer': 'Skia/PDF m128', 'source': '../../data/pdf/Chichen_Itza.pdf', 'title': 'Chichen Itza - Wikipedia', 'total_pages': 28}, page_content="Chichén Itzá\nThe Temple of Kukulcán (El Castillo) is the\nmost famous of the buildings in Chichen Itza.\nLocation within Mesoamerica\nLocation Yucatán, Mexico\nRegion Yucatán\nCoordinates 20°40′59″N 88°34′7″W\nHistory\nPeriods Late Classic to Early\nPostclassic\nCultures Maya civilization\nUNESCO World Heritage Site\nOfficial name Pre-Hispanic City of\nChichen-Itza\nType Cultural\nCriteria i, ii, iii\nDesignated 1988 (12th session)\nReference no. 483 (https://whc.unesco.org/\nen/list/483)\nRegion Latin America and the\nCaribbean\nChic