In [1]:
# for getting google_api_key from the .env file
import os
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
from langchain.embeddings import GooglePalmEmbeddings
from langchain.llms import GooglePalm
from langchain.document_loaders.pdf import PyPDFLoader
from langchain.vectorstores import FAISS
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA

In [3]:
google_api_key=os.environ["GOOGLE_API_KEY"]

### Loading PDF file

In [4]:
loader=PyPDFLoader(file_path='budget_speech.pdf')
doc=loader.load_and_split()

In [5]:
# Sample page content

doc[10]

Document(page_content='9 \n (4) Goods and Services Tax has enabled ‘One Nation, \nOne Market, One Tax’ . Tax reforms have led to \ndeepening  and widening of tax base.  \n(5) Strengthening of the financial sector has helped in \nmaking savings, credit and investments more \nefficient.   \n(6) GIFT IFSC and the unified regulatory authority, IFSCA \nare creating a robust gateway for global capital and \nfinancial services for the economy.  \n(7) Proactive inflation management has helped keep  \ninflation within  the policy band.   \nGlobal Context  \n29. Geopolitically , global affairs are becoming more complex \nand challenging with wars and conflicts. Globalization is being \nredefined with reshoring and friend -shoring, disruption and \nfragmentation of supply chains, and competition for critical \nminerals and technologies. A  new world order is emerging after \nthe Covid pandemic.  \n30. India assumed G20  Presidency during very difficult times \nfor the world. The global economy wa

# Creating a Vector Database using FAISS DB

In [6]:
faiss_index = FAISS.from_documents(documents=doc, embedding=GooglePalmEmbeddings(google_api_key=google_api_key))

In [7]:
type(faiss_index)

langchain_community.vectorstores.faiss.FAISS

In [8]:
#Storing it in local drive
faiss_index.save_local(".\VectorDB_file")

# Load from saved VectorDB and create Retriever from it

In [9]:
# Loading from the saved VectorDB

vectorDB_saved=FAISS.load_local("VectorDB_file", embeddings=GooglePalmEmbeddings(google_api_key=google_api_key))

In [10]:
retriever=vectorDB_saved.as_retriever(score_threshold=0.7)

# Prompt Engineering and Creating RetrievalQA chain for Similarity Search

In [11]:
prompt_template = """Given the following context and a question, generate an answer based on this context only.
In the answer try to provide as much text as possible from "response" section in the source document context without making much changes.
If the answer is not found in the context, kindly state 'I don't know the answer.' Don't try to make up an answer.

CONTEXT: {context}

QUESTION: {question}"""

prompt=PromptTemplate(template=prompt_template, input_variables=["context", "question"])
prompt

PromptTemplate(input_variables=['context', 'question'], template='Given the following context and a question, generate an answer based on this context only.\nIn the answer try to provide as much text as possible from "response" section in the source document context without making much changes.\nIf the answer is not found in the context, kindly state \'I don\'t know the answer.\' Don\'t try to make up an answer.\n\nCONTEXT: {context}\n\nQUESTION: {question}')

In [22]:
llm=GooglePalm(google_api_key=os.environ["GOOGLE_API_KEY"], temperature=0.1)

chain=RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    input_key="query",
    return_source_documents=True,
    chain_type_kwargs={"prompt":prompt}
)

In [23]:
def response(user_query):
    res=chain(user_query)
    return res['result']

# Sample Response

In [24]:
response('Who is the Finance minister?')

'Nirmala Sitharaman'

In [25]:
response('What is the famous place in US?')

"I don't know the answer."

In [26]:
response('Mention schemes for Agriculture Sector?')

'Nano DAP, Atmanirbhar Oil Seeds Abhiyan, Dairy Development, Matsya Sampada, Pradhan Mantri Kisan Sampada Yojana, Pradhan Mantri Formalisation of Micro Food Processing Enterprises Yojana'