In [57]:
import os
from dotenv import load_dotenv
from langchain.document_loaders import DirectoryLoader, PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

load_dotenv(override=True)

True

In [34]:
os.environ['GOOGLE_API_KEY'] = os.getenv('GOOGLE_API_KEY')

In [41]:
llm = ChatGoogleGenerativeAI(
    model='models/gemini-2.5-flash'
)
embedding = GoogleGenerativeAIEmbeddings(
    model='gemini-embedding-001'
)

In [42]:
file_path = os.path.join(os.getcwd(), "data")
loader = DirectoryLoader(
    path=file_path,
    glob="**/*",
    loader_cls=PyPDFLoader
)
docs = loader.load()
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 500,
    chunk_overlap = 100
)
documents = text_splitter.split_documents(docs)
vector_store = FAISS.from_documents(documents, embedding)

In [43]:
retriever = vector_store.as_retriever()

In [51]:
prompt_template = """
        Answer the question based on the context provided below. 
        If the context does not contain sufficient information, respond with: 
        "I do not have enough information about this."

        Context: {context}

        Question: {question}

        Answer:"""

prompt = PromptTemplate(
    template=prompt_template,
    input_variables=["context", "question"]
)

In [55]:
def format_docs(docs):
    return "\n\n".join([doc.page_content for doc in docs])

In [73]:
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
) 

In [74]:
response = rag_chain.invoke("What is the craze behind the research paper attention is all you need why it is so special?")