<a href="https://colab.research.google.com/github/sanaa-04/Generative_AI_Projects/blob/main/PDF_ChatBot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install langchain-google-genai
!pip install langchain
!pip install PyPDF2
!pip install langchain_community
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
import langchain_google_genai

from langchain_google_genai import GoogleGenerativeAIEmbeddings
import google.generativeai as genai

from langchain_community.vectorstores import FAISS
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.chains.question_answering import load_qa_chain
from langchain.prompts import PromptTemplate
import itertools



In [None]:
# Directly declared API keys
API_KEYS = [
    'Gemini API key',
]

# Create a cycle iterator for the API keys
api_key_cycle = itertools.cycle(API_KEYS)

In [None]:
# Function to configure API with a specific key
def configure_api():
    api_key = next(api_key_cycle)
    genai.configure(api_key=api_key)
    return api_key

In [None]:
def get_pdf_text(pdf_path):
    text = ""
    pdf_reader = PdfReader(pdf_path)
    for page in pdf_reader.pages:
        text += page.extract_text()
    return text

In [None]:
# Function to split text into chunks
def get_text_chunks(text):
    splitter = RecursiveCharacterTextSplitter(chunk_size=7500, chunk_overlap=100)
    return splitter.split_text(text)

In [None]:
# Function to generate vector store for text chunks
!pip install faiss-cpu
def get_vector_store(chunks):
    api_key = configure_api()
    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=api_key)
    vector_store = FAISS.from_texts(chunks, embedding=embeddings)
    vector_store.save_local("faiss_index")



In [None]:
# Conversational chain using Google Gemini model
def get_conversational_chain():
    api_key = configure_api()
    prompt_template = """
    Answer the question as detailed as possible from the provided context, make sure to provide all the details.
    If the answer is not in the provided context, don't provide the wrong answer.\n\n
    Context:\n {context}?\n
    Question: \n{question}\n
    Answer:
    """
    model = ChatGoogleGenerativeAI(model="gemini-1.5-pro", google_api_key=api_key, temperature=0.3)
    prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
    return load_qa_chain(llm=model, chain_type="stuff", prompt=prompt)

In [None]:
# Function to handle user queries and provide responses
def user_input(user_question):
    api_key = configure_api()
    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=api_key)
    new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
    docs = new_db.similarity_search(user_question)
    chain = get_conversational_chain()
    response = chain({"input_documents": docs, "question": user_question}, return_only_outputs=True)
    return response

In [None]:
# Main function to process PDF and ask questions
def ask_from_pdf(pdf_path):
    raw_text = get_pdf_text(pdf_path)
    text_chunks = get_text_chunks(raw_text)
    get_vector_store(text_chunks)

    print("PDF processed. You can now ask questions.")
    while True:
        user_question = input("Enter your question (or type 'exit' to quit): ")
        if user_question.lower() == 'exit':
            break
        response = user_input(user_question)
        print(f"Answer: {response['output_text']}")


In [None]:
# Example usage
pdf_path = "/content/drive/MyDrive/the_nestle_hr_policy_pdf_2012.pdf"
ask_from_pdf(pdf_path)

PDF processed. You can now ask questions.
Enter your question (or type 'exit' to quit): please give the summary of the pdf in about 30 words
Answer: This document outlines Nestlé's Human Resources policies, emphasizing their commitment to employee growth, development, and well-being as key to the company's success. 

Enter your question (or type 'exit' to quit): What is this pdf about
Answer:     This PDF document is about the Nestlé Human Resources Policy. 

Enter your question (or type 'exit' to quit): exit


In [None]:
import gradio as gr

def process_pdf_and_answer(pdf_path, user_question):
    raw_text = get_pdf_text(pdf_path)
    text_chunks = get_text_chunks(raw_text)
    get_vector_store(text_chunks)
    response = user_input(user_question)
    return response['output_text']

iface = gr.Interface(
    fn=process_pdf_and_answer,
    inputs=[
        gr.File(label="Upload PDF"),
        gr.Textbox(label="Enter your question")
    ],
    outputs=gr.Textbox(label="Answer"),
    title="PDF Question Answering",
    description="Upload a PDF and ask questions about its content."
)

iface.launch()

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://e10e9625860da7a80a.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


