In [1]:
import os
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
from langchain_community.document_loaders import PyPDFDirectoryLoader
loader = PyPDFDirectoryLoader("documents/")
pages = loader.load()

In [4]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
chunks = text_splitter.split_documents(pages)

In [5]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001")

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
from langchain_community.vectorstores import FAISS
vector_store=FAISS.from_documents(chunks,embeddings)

In [7]:
vector_store.save_local("faiss_index")

In [8]:
from langchain_google_genai import ChatGoogleGenerativeAI
llm = ChatGoogleGenerativeAI(model="gemini-pro",
                            temperature=0.3)


In [9]:
from langchain_core.prompts import PromptTemplate

template = """Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Use three sentences maximum and keep the answer as concise as possible.
Always say "thanks for asking!" at the end of the answer.
Also tell me the model name and variant and add that as a signature at the end of the answer.

{context}

Question: {question}

Helpful Answer:"""
custom_rag_prompt = PromptTemplate.from_template(template)

In [10]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [11]:
retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 6})


In [14]:
retrieved_docs = retriever.invoke("Amrit kaal?")
print(retrieved_docs[0].page_content)

GOVERNMENT OF INDIA
BUDGET 2023-2024
SPEECH
OF
NIRMALA SITHARAMAN
MINISTER OF FINANCE
February 1,  2023


In [15]:
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | custom_rag_prompt
    | llm
    | StrOutputParser()
)

In [16]:
for chunk in rag_chain.stream("Who presented budget 2024?"):
    print(chunk, end="", flush=True)

I cannot answer this question as the provided context does not mention anything about the budget of 2024.
Thanks for asking!
GPT-3, Gemini