In [1]:
import tqdm as notebook_tqdm
from pypdf import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
import os

from langchain_google_genai import GoogleGenerativeAIEmbeddings
import google.generativeai as genai
from langchain_google_genai import ChatGoogleGenerativeAI # for load the google gemini model

from langchain.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain.prompts import PromptTemplate
from dotenv import load_dotenv


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
load_dotenv()
genai.configure(api_key=os.getenv('GOOGLE_AIP_KEY'))

In [3]:
def get_pdf_text():
    text = ''
    for pdf in os.listdir('pdfs'):
        reader = PdfReader(os.path.join('pdfs',pdf))
        for page in reader.pages:
            text = text + page.extract_text()

    return text


def get_text_chunks(text):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size = 500,
        chunk_overlap = 50
    )
    chunks = text_splitter.split_text(text)

    return chunks

def get_vector(text_chunks):
    embedding = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
    vectors = FAISS.from_texts(text_chunks,embedding=embedding)
    vectors.save_local('faiss_index')
    return vectors

def get_conversational_chain():
    prompt_template = '''
    Answer the question from provided context, make sure to give proper correct answer, don't give wrong answer.
    if answer is not available for given question just give the output "Answer is not available for given input \n\n"
    Context : \n {context}? \n
    Question : \n {question} \n

    Answer : 
    '''

    model = ChatGoogleGenerativeAI(model='gemini-1.5-flash',temperature=0.3)
    prompt = PromptTemplate(template=prompt_template,input_variables=['context','question'])

    chain = load_qa_chain(model,chain_type = "stuff",prompt=prompt)

    return chain


def user_input(new_question,vector):
    embedding = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
    docs = vector.similarity_search(new_question)
    chain = get_conversational_chain()

    response = chain(
        {'input_documents':docs,'question':new_question},
        return_only_outputs=True
        )
    return response



In [4]:
text = get_pdf_text()

In [5]:
chunks = get_text_chunks(text)

In [6]:
vectors = get_vector(chunks)

In [7]:
conversation = get_conversational_chain()

stuff: https://python.langchain.com/docs/versions/migrating_chains/stuff_docs_chain
map_reduce: https://python.langchain.com/docs/versions/migrating_chains/map_reduce_chain
refine: https://python.langchain.com/docs/versions/migrating_chains/refine_chain
map_rerank: https://python.langchain.com/docs/versions/migrating_chains/map_rerank_docs_chain

See also guides on retrieval and question-answering here: https://python.langchain.com/docs/how_to/#qa-with-rag
  chain = load_qa_chain(model,chain_type = "stuff",prompt=prompt)


In [8]:
ans = user_input('who is PM of india?',vector = vectors)

  response = chain(


In [9]:
print(ans)

{'output_text': 'Answer is not available for given input\n'}


In [10]:
(user_input('which model have been used in rice dicease detector application?',vector = vectors))

{'output_text': 'MobileNetV2 model was used in the Rice Disease Detector application.\n'}

In [11]:
(user_input('which model analized in rice dicease detector application?',vector = vectors))

{'output_text': 'ResNet50 and MobileNetV2 models were analyzed in the rice disease detector application.  MobileNetV2 was ultimately chosen for the Android application due to its faster prediction time.\n'}