In [3]:
import tqdm as notebook_tqdm
from langchain_pinecone import PineconeVectorStore
from pinecone import Pinecone, ServerlessSpec
from pypdf import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
import os

from langchain_google_genai import GoogleGenerativeAIEmbeddings
import google.generativeai as genai
from langchain_google_genai import ChatGoogleGenerativeAI # for load the google gemini model

from langchain.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain.prompts import PromptTemplate
from dotenv import load_dotenv


In [16]:
os.environ['PINECONE_API_KEY'] = 'pcsk_3eX2TU_K3QrP2W6o1TQAJ3XxAnFtUu4syfaAqnNBSzN4ayZS48dgGrw2HM3RaaYN1aXvC5'

In [17]:
load_dotenv()
genai.configure(api_key=os.getenv('GOOGLE_AIP_KEY'))

In [18]:
def get_pdf_text():
    text = ''
    for pdf in os.listdir('pdfs'):
        reader = PdfReader(os.path.join('pdfs',pdf))
        for page in reader.pages:
            text = text + page.extract_text()

    return text

In [19]:
def get_text_chunks(text):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size = 500,
        chunk_overlap = 50
    )
    chunks = text_splitter.split_text(text)

    return chunks

In [28]:
pc = Pinecone(api_key="pcsk_3eX2TU_K3QrP2W6o1TQAJ3XxAnFtUu4syfaAqnNBSzN4ayZS48dgGrw2HM3RaaYN1aXvC5")

# index_name = "mychatbot"

# pc.create_index(
#     name=index_name,
#     dimension=768, # Replace with your model dimensions
#     metric="cosine", # Replace with your model metric
#     spec=ServerlessSpec(
#         cloud="aws",
#         region="us-east-1"
#     ) 
# )

In [29]:
text = get_pdf_text()
docs = get_text_chunks(text)

In [30]:
pc.list_indexes().names()
index_name = 'mychatbot'
index = pc.Index(index_name)
index.describe_index_stats()

{'dimension': 768,
 'index_fullness': 0.0,
 'namespaces': {},
 'total_vector_count': 0}

In [45]:
index_name = "mychatbot"
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

# vectorstore for adding the documents or text, if not available in pinecone server
vectorstore = PineconeVectorStore.from_texts(docs,index_name=index_name, embedding=embeddings)
# vectorstore = PineconeVectorStore.from_documents(docs,index_name=index_name, embedding=embeddings)


#if vectors already available in pinecone server 
vectorstore = PineconeVectorStore(docs,index_name=index_name, embedding=embeddings)

In [32]:
vectorstore.similarity_search('which model analized in rice dicease detector application?')

[Document(id='36b2e833-f922-4870-a8a9-f9f1dba56ff1', metadata={}, page_content='[34] S. Phadikar, J. Sil, A.K. Das, Rice diseases classification using feature selection and \nrule generation techniques, Comput. Electron. Agric. 90 (2013) 76 – 85 . \n[35] Y. Wang, H. Wang, Z. Peng, Rice diseases detection and classification using \nattention based neural network and bayesian optimization, Expert Syst. Appl. 178 \n(2021), 114770, https://doi.org/10.1016/j.eswa.2021.114770 . \n[36] G. Kathiresan, M. Anirudh, M. Nagharjun, R. Karthik, Disease detection in rice'),
 Document(id='559e48d3-c7f9-462f-89b0-ac92fa2335df', metadata={}, page_content='sifying rice diseases and deficiency symptoms via smartphone-captured \nimages. The ResNet50 model performed best for cloud architectures, \nwhile MobileNetV2 was the best model for the smartphone. Finally, the \nandroid application “ Rice Disease Detector ” , compiled with the Mobi -\nleNetV2 model, performed satisfactorily in identifying multiple dis

In [33]:
def get_conversational_chain():
    prompt_template = '''
    Answer the question from provided context, make sure to give proper correct answer, don't give wrong answer.
    if answer is not available for given question just give the output "Answer is not available for given input \n\n"
    Context : \n {context}? \n
    Question : \n {question} \n

    Answer : 
    '''

    model = ChatGoogleGenerativeAI(model='gemini-1.5-flash',temperature=0.3)
    prompt = PromptTemplate(template=prompt_template,input_variables=['context','question'])

    chain = load_qa_chain(model,chain_type = "stuff",prompt=prompt)

    return chain

In [34]:
def user_input(new_question,vector):
    embedding = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
    docs = vectorstore.similarity_search(new_question)
    chain = get_conversational_chain()

    response = chain(
        {'input_documents':docs,'question':new_question},
        return_only_outputs=True
        )
    return response


In [35]:
text = get_pdf_text()

In [36]:
chunks = get_text_chunks(text)

In [38]:
conversation = get_conversational_chain()

stuff: https://python.langchain.com/docs/versions/migrating_chains/stuff_docs_chain
map_reduce: https://python.langchain.com/docs/versions/migrating_chains/map_reduce_chain
refine: https://python.langchain.com/docs/versions/migrating_chains/refine_chain
map_rerank: https://python.langchain.com/docs/versions/migrating_chains/map_rerank_docs_chain

See also guides on retrieval and question-answering here: https://python.langchain.com/docs/how_to/#qa-with-rag
  chain = load_qa_chain(model,chain_type = "stuff",prompt=prompt)


In [40]:
ans = user_input('who is PM of india?',vector = vectorstore)

  response = chain(


In [41]:
print(ans)

{'output_text': 'Answer is not available for given input\n'}


In [43]:
(user_input('which model have been used in rice dicease detector application?',vector = vectorstore))

{'output_text': 'MobileNetV2 model was used in the Rice Disease Detector application.\n'}

In [44]:
(user_input('which model analized in rice dicease detector application?',vector = vectorstore))

{'output_text': 'The Rice Disease Detector application analyzed the ResNet50 and MobileNetV2 models.  MobileNetV2 was ultimately chosen for the application due to its faster prediction time.\n'}