In [None]:

import os
from dotenv import load_dotenv
import google.generativeai as genai

# Load environment variables from .env file
load_dotenv()

# Get the API key from environment variables
api_key = os.getenv('GOOGLE_API_KEY')
genai.configure(api_key = api_key)

In [None]:
#importing the necessary libraries
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain.prompts import PromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
from langchain.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
import os

In [None]:
#initializing the model
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0.4)
#loading the embeddings
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

In [None]:

from langchain_community.document_loaders import PDFPlumberLoader
 #loading the document
loader = PDFPlumberLoader("sample.pdf")
#splitting the document into chunks
text_splitter = RecursiveCharacterTextSplitter(
    separators="\n\n",
    chunk_size=2500,
    chunk_overlap=500,
    length_function=len,
    is_separator_regex=False
)
 
chunks = loader.load_and_split(text_splitter)
len(chunks)



In [None]:
#printing the chunks
chunks[:]

In [None]:
#converting chunks to embeddings and store them in Chroma vector store
vectordb=Chroma.from_documents(chunks,embeddings)
#vector store backed retriever for returning output based on user input
retriever = vectordb.as_retriever(search_kwargs={"k": 1})


In [None]:
#template for adding the prompt
template = """
You are a helpful AI assistant.
Answer based on the context provided.
context: {context}
input: {input}
answer:
"""
#passing the template to prompt
prompt = PromptTemplate.from_template(template)
#creating the chain by combining LLM model and prompt
combine_docs_chain = create_stuff_documents_chain(llm, prompt)
#creating retrieval chain by combining previous chain with retriever
retrieval_chain = create_retrieval_chain(retriever, combine_docs_chain)

In [None]:
response=retrieval_chain.invoke({"input":"What are water ices?"})
response["answer"]