In [None]:
import os
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import OllamaEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain_community.llms import Ollama
import streamlit as st

In [75]:
def replace_t_with_space(documents):
        processed_docs = []
        for doc in documents:
            # Create a new document with processed content
            doc.page_content = doc.page_content.replace('\t', ' ')
            processed_docs.append(doc)
        return processed_docs

In [76]:
def encode_pdf(path,chunk_size=2000,chunk_overlap=200):
    """ 
    This method will chunk a pdf and then convert the chunks into embedding and store them into a vector database

    Args:
        path: path to the pdf file
        chunk_size: paragraph length of each chunk
        chunk_overlap: max amount of consicutive overlap between chunks

    Return:
        A fassi vector store containing the encoded pdf content
    """

    # Load pdf documents
    loader = PyPDFLoader(path,mode='page')
    docs = loader.load()

    # Load pdf documents into chunks
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size,chunk_overlap=chunk_overlap,length_function=len)
    texts = text_splitter.split_documents(docs)
    texts = replace_t_with_space(texts)
    embeddings = OllamaEmbeddings(model="nomic-embed-text:latest")
    vectorestore = FAISS.from_documents(texts,embeddings)
    return vectorestore





In [77]:
doc_path = "harry_potter_1.pdf"
vectorstore = encode_pdf(doc_path)

In [78]:
retriver = vectorstore.as_retriever(search_kwargs={"k":2})

In [79]:
query = "who are the father and mother of harry potter?"
docs=retriver.invoke(query)

In [84]:
for idx,doc in enumerate(docs):
    print(f"doc no {idx}")
    print(doc)

doc no 0
page_content='a Black boy even taller than Ron, joined Harry at the Gryffindor table.
"Turpin, Lisa," became a Ravenclaw and then it was Ron's turn. He was
pale green by now. Harry crossed his fingers under the table and a
second later the hat had shouted, "GRYFFINDOR!"
Harry clapped loudly with the rest as Ron collapsed into the chair next
to him.' metadata={'producer': 'Acrobat Distiller 4.0 for Windows', 'creator': 'Microsoft Word 8.0', 'creationdate': '2001-02-13T16:47:14+00:00', 'subject': 'Harry Potter', 'author': 'J.K. Rowling', 'moddate': '2005-11-26T18:01:39+02:00', 'title': "Harry Potter, Book 1; The Sorcerer's Stone", 'source': 'harry_potter_1.pdf', 'total_pages': 250, 'page': 97, 'page_label': '98'}
doc no 1
page_content='1
Harry Potter and the Sorcerer's Stone
CHAPTER ONE
THE BOY WHO LIVED
Mr. and Mrs. Dursley, of number four, Privet Drive, were proud to say
that they were perfectly normal, thank you very much. They were the last
people you'd expect to be involved

In [89]:
prompt_template = """Use the following context to answer the question. If you don’t know, say so.
Context: {context}
Question: {question}
Answer: """

llm = Ollama(model="qwen2.5:3b")
prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriver,  # Fix typo from 'retriver' to 'retriever'
    chain_type_kwargs={"prompt": prompt}  # Pass the prompt here
)

In [94]:
def chat(query):
    result=qa_chain({"query":query})
    return result["result"]

In [95]:
output=chat(query)
print(output)

Based on the context provided in "Harry Potter and the Sorcerer's Stone" (also known as Harry Potter and the Philosopher's Stone), Mr. and Mrs. Dursley are the parents of Harry Potter, not his biological parents. They live at number four, Privet Drive, and they have a small son called Dudley.

The passage states:

"The Dursleys had everything they wanted, but they also had a secret, and their greatest fear was that somebody would discover it. They didn't think they could bear it if anyone found out about the Potters. Mrs. Potter was Mrs. Dursley's sister, but they hadn't met for several years; in fact, Mrs. Dursely pretended she didn't have a sister, because her sister and her good-for-nothing husband were as unDursleyish as it was possible to be."

So, Mr. Dursley is the father of Harry Potter (by his marriage to the mother, who is not named in this context), and Mrs. Dursely is his mother. They are part of a fictitious household known as the Dursleys' that plays a significant role in

In [114]:
def main():
    st.title("QA Chatbot with Ollama")
    st.write("Ask any question based on the retrieved context.")
    
    # User input
    query = st.text_input("Enter your question:")
    if st.button("Ask"):
        if query:
            response = chat(query)
            st.write("### Answer:")
            st.write(response)
        else:
            st.warning("Please enter a question.")

    

if __name__ == "__main__":
    main()