In [2]:
# ! pip install streamlit pypdf2 -q
# ! pip3 install streamlit google-generativeai python-dotenv langchain PyPDF2 chromadb faiss-cpu langchain_google_genai langchain-community -q

In [3]:
import streamlit as st
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
import os
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from langchain_community.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain
import google.generativeai as genai
from langchain.prompts import PromptTemplate
from dotenv import load_dotenv



  from .autonotebook import tqdm as notebook_tqdm


In [4]:
load_dotenv() # Loads .env file

GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
os.environ['GOOGLE_API_KEY'] = GOOGLE_API_KEY

genai.configure(api_key=os.getenv("GOOGLE_API_KEY")) # Loads API key

In [5]:
def get_pdf_text(pdf_docs):
    text = " "
    # Iterate through each PDF document path in the list
    for pdf in pdf_docs:
        # Create a PdfReader object for the current PDF document
        pdf_reader = PdfReader(pdf)
        # Iterate through each page in the PDF document
        for page in pdf_reader.pages:
            # Extract text from the current page and append it to the 'text' string
            text += page.extract_text()

    # Return the concatenated text from all PDF documents
    return text

In [6]:
def get_chunks(text):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
    chunks = text_splitter.split_text(text)
    return chunks

In [7]:
def get_vector_store(text_chunks):     
    # Create embeddings using a Google Generative AI model
    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

    # Create a vector store using FAISS from the provided text chunks and embeddings
    vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)

    # Save the vector store locally with the name "faiss_index"
    vector_store.save_local("faiss_index")

In [27]:
def get_conversational_chain():
    # Define a prompt template for asking questions based on a given context

    prompt_template = """
    You are an assistant for question-answering tasks.
    Use the following context to answer the question.
    If you don't know the answer, just say that you don't know.
    Use ten sentences minimum and show the answer in bullet points and keep the answer concise.\n
    Question: {question} \n
    Context: {context} \n
    
    Answer:
    """

    # Initialize a ChatGoogleGenerativeAI model for conversational AI
    model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3)

    # Create a prompt template with input variables "context" and "question"
    prompt = PromptTemplate(
        template=prompt_template, input_variables=["context", "question"]
    )

    # Load a question-answering chain with the specified model and prompt
    chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)

    return chain

In [29]:
def user_input(user_question):
    
    # Create embeddings for the user question using a Google Generative AI model
    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

    # Load a FAISS vector database from a local file
    new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
    

    # Perform similarity search in the vector database based on the user question
    similar_docs = new_db.similarity_search(user_question)
    # for doc in similar_docs:
    #     print(doc)

    # Obtain a conversational question-answering chain
    chain = get_conversational_chain()

    # Use the conversational chain to get a response based on the user question and retrieved documents
    response = chain(
        {"input_documents": similar_docs, "question": user_question}, return_only_outputs=True
    )

    # Print the response to the console
    print("response - ", response["output_text"])

    # Display the response in a Streamlit app (assuming 'st' is a Streamlit module)
    # st.write("Reply: ", response["output_text"])

In [28]:

pdf_docs = [os.path.join('data', 'Gen AI.pdf')]
raw_text = get_pdf_text(pdf_docs)
text_chunks = get_chunks(raw_text)
get_vector_store(text_chunks)

In [30]:
user_input('What are Simple explainers?')

response -      - Simple explainers are short, easy-to-understand descriptions that break down complex concepts into digestible chunks.
    - Generative AI can help by quickly generating clear and concise explanations, making complicated topics more accessible to a wider audience.
