# PDF Chatbot

In [3]:
pip install -r "./requirements.txt" -q # type: ignore

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.0.1 -> 25.2
[notice] To update, run: c:\Users\rafro\.pyenv\pyenv-win\versions\3.10.11\python.exe -m pip install --upgrade pip


## Imports

In [20]:
import PyPDF2, os, csv
import streamlit as st
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import PromptTemplate
from langchain.chains.question_answering import load_qa_chain


## get_pdf_text

In [5]:
def get_pdf_text(pdf_docs):
    full_text = ""
    for doc in pdf_docs:
        with open(doc, 'rb') as pdf_file:
            reader = PyPDF2.PdfReader(pdf_file)

            for page in reader.pages:
                full_text += " "
                full_text += page.extract_text()
    return full_text

## get_text_chunks(text)

In [7]:
def get_text_chunks(text):
    splitter = RecursiveCharacterTextSplitter(
        chunk_size=10000,
        chunk_overlap=1000
    )
    chunks = splitter.split_text(text)
    return chunks  # list of strings

## get_vector_store(chunks)

In [10]:
def get_vector_store(chunks):
    embeddings = GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-001")
    vector_store = FAISS.from_texts(chunks, embeddings)
    vector_store.save_local("faiss_index")

## Build the Conversational Retrieval Chain

In [15]:
def get_conversational_chain():
    prompt_template = """
    Question : {question}
    Answer as detailed as possible based on the given context and answer “answer is not available in the context” if the answer is not in the context.

    Answer:
    """

    model = ChatGoogleGenerativeAI(model="gemini-2.5-flash",
                                   client = None,
                                   temperature=0.3,
                                   )
    prompt = PromptTemplate(template=prompt_template,
                            input_variables="question")
    chain = load_qa_chain(llm=model, chain_type="stuff", prompt=prompt)
    return chain

## User Input

In [16]:
def user_input(user_question):
    embeddings = GoogleGenerativeAIEmbeddings(
        model="TODO")  # type: ignore

    new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True) 
    docs = new_db.similarity_search(user_question)

    chain = get_conversational_chain()

    context = "\n".join([doc.page_content for doc in docs])
    response = chain(
        {"input_documents": docs, "context": context, "question": user_question}, return_only_outputs=True, )

    return response['output_text']

## save_user_info(name, phone, email)

In [19]:
def save_user_info(name, phone, email):
    file_exists = os.path.isfile('user_info.csv')
    with open('user_info.csv', mode='a', newline='') as file:
        if not file_exists:
            file = open('myfile.dat', 'w+')
        fieldnames = ['Name', 'Phone', 'Email']
        writer = csv.DictWriter(file, fieldnames=fieldnames)
        writer.writerow({'Name': name, 'Phone': phone, 'Email': email})

## Streamlit App

In [None]:
def main():  
    st.set_page_config(  # Configure the Streamlit page settings
        page_title="PDF Chatbot",  # Set the browser tab title
        # page_icon="TODO",  # Set the favicon/icon for the page
        layout="wide" # Use a wide layout for more horizontal space
    ) 

    # Sidebar for uploading PDF files
    with st.sidebar:  # Begin sidebar container
        st.sidebar.title("Menu")  # Display the title "Menu" in the sidebar
        pdf_docs = st.file_uploader(  # file uploader widget
            "Upload your PDF Files and Click on the Submit & Process Button", accept_multiple_files=True)  # Allow multiple PDF uploads
        submitBtn = st.button("Submit & Process")
        if "submitBtn" not in st.session_state:
            st.session_state.submitBtn_state = False:  # Add a button "Submit & Process" to trigger processing
            
            with TODO:  # Show a spinner while processing
                raw_text = get_pdf_text(pdf_docs)  # Extract raw text from uploaded PDFs
                text_chunks = get_text_chunks(raw_text)  # Split the text into smaller chunks
                get_vector_store(text_chunks)  # Build or update vector store for retrieval
                st.success("Done")  # Show a success message once processing finishes


    