# PDF Chatbot

In [3]:
pip install -r "./requirements.txt" -q # type: ignore

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.0.1 -> 25.2
[notice] To update, run: c:\Users\rafro\.pyenv\pyenv-win\versions\3.10.11\python.exe -m pip install --upgrade pip


## Imports

In [14]:
import PyPDF2
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import PromptTemplate
from langchain.chains.question_answering import load_qa_chain


## get_pdf_text

In [5]:
def get_pdf_text(pdf_docs):
    full_text = ""
    for doc in pdf_docs:
        with open(doc, 'rb') as pdf_file:
            reader = PyPDF2.PdfReader(pdf_file)

            for page in reader.pages:
                full_text += " "
                full_text += page.extract_text()
    return full_text

## get_text_chunks(text)

In [7]:
def get_text_chunks(text):
    splitter = RecursiveCharacterTextSplitter(
        chunk_size=10000,
        chunk_overlap=1000
    )
    chunks = splitter.split_text(text)
    return chunks  # list of strings

## get_vector_store(chunks)

In [10]:
def get_vector_store(chunks):
    embeddings = GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-001")
    vector_store = FAISS.from_texts(chunks, embeddings)
    vector_store.save_local("faiss_index")

## Build the Conversational Retrieval Chain

In [15]:
def get_conversational_chain():
    prompt_template = """
    Question : {question}
    Answer as detailed as possible based on the given context and answer “answer is not available in the context” if the answer is not in the context.

    Answer:
    """

    model = ChatGoogleGenerativeAI(model="gemini-2.5-flash",
                                   client = None,
                                   temperature=0.3,
                                   )
    prompt = PromptTemplate(template=prompt_template,
                            input_variables="question")
    chain = load_qa_chain(llm=model, chain_type="stuff", prompt=prompt)
    return chain

## User Input

In [16]:
def user_input(user_question):
    embeddings = GoogleGenerativeAIEmbeddings(
        model="TODO")  # type: ignore

    new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True) 
    docs = new_db.similarity_search(user_question)

    chain = get_conversational_chain()

    context = "\n".join([doc.page_content for doc in docs])
    response = chain(
        {"input_documents": docs, "context": context, "question": user_question}, return_only_outputs=True, )

    return response['output_text']