# RAG

In [4]:
# Loading a document to be used for retrieval
from langchain_community.document_loaders import PyPDFLoader

pdf_file = "~/Desktop/langchain_concepts/hdfc_policy_doc.pdf"

In [5]:
pdf_loader = PyPDFLoader(pdf_file)
# will load each page as a document, with lazy loading
document_pages = []
async for page in pdf_loader.alazy_load():
    document_pages.append(page)

In [6]:
len(document_pages)

48

In [7]:
document_pages[0]

Document(metadata={'producer': 'Microsoft® Word LTSC', 'creator': 'Microsoft® Word LTSC', 'creationdate': '2025-03-10T17:06:03+05:30', 'author': 'harinis', 'moddate': '2025-03-10T17:06:03+05:30', 'source': '/Users/sarmehta/Desktop/langchain_concepts/hdfc_policy_doc.pdf', 'total_pages': 48, 'page': 0, 'page_label': '1'}, page_content='Page 1 of 48 \n \n \nPart A \n(Welcome Letter) \n<<Date>> \n<<Policyholder’s Name>>  \n<<Policyholder’s Address>> \n<<Policyholder’s Contact Number>> \n \nSub: Your Policy no. <<>>-HDFC Life Sampoorn Nivesh Plus \n \nDear <<Policyholder’s Name>>,  \n \nWe are glad to inform you that your proposal has been accepted and the HDFC Life Sampoorn  Nivesh Plus \nPolicy (“Policy”) being this Policy, has been issued. We have made every effort to design your Policy in a \nsimple format. We have highlighted items of importance so that you may recognize them easily. \n \nPolicy document: \nAs evidence of the insurance contract between HDFC Life Insurance Company Limit

# Chat Model/ LLM Model
Here if we have the resources, we can use a local LLM chat model
or otherwise use API for LLM

In [8]:
# chat model for making the retrieval process conversational also
from langchain_huggingface import ChatHuggingFace, HuggingFacePipeline

In [1]:
# this can be used in local if we have memory, otherwise 
#through API
"""
llm_model = HuggingFacePipeline.from_model_id(
    model_id="HuggingFaceH4/zephyr-7b-beta",
    task="text-generation",
    pipeline_kwargs=dict(
        max_new_tokens=512,
        do_sample=False,
        repetition_penalty=1.03,
    ),
)
"""

'\nllm_model = HuggingFacePipeline.from_model_id(\n    model_id="HuggingFaceH4/zephyr-7b-beta",\n    task="text-generation",\n    pipeline_kwargs=dict(\n        max_new_tokens=512,\n        do_sample=False,\n        repetition_penalty=1.03,\n    ),\n)\n'

In [2]:
#chat_model = ChatHuggingFace(llm=llm_model)

In [9]:
# in case if not loading model on RAM, can use LLM API as well
import os
from api_keys import GROQ_API_KEY
from langchain.chat_models import init_chat_model

os.environ["GROQ_API_KEY"] = GROQ_API_KEY
llm_model = init_chat_model("llama3-8b-8192", model_provider="groq")

# Text Splitter
creating text splitter for our document which will break,
for document either we can go with Recursive Splitter or also Semantic splitter
if the document has content which are more of semantic reference


In [18]:
CHUNK_SIZE = 300
OVERLAP = 30
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size = CHUNK_SIZE,
                                               chunk_overlap = OVERLAP)

# Embedding Model

In [11]:
# embedding model for indexing
EMBED_DIMS = 768
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
embed_model = HuggingFaceEmbeddings(model_name = "thenlper/gte-base")

# Vector index

In [12]:
# faiss index to be used as vector store 
import faiss
from langchain_community.vectorstores import FAISS
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain.storage import InMemoryStore

faiss_index = faiss.IndexFlatL2(EMBED_DIMS)
vector_store = FAISS(embedding_function = embed_model,
                    index = faiss_index,
                    docstore = InMemoryDocstore(),
                    index_to_docstore_id = {})

Here for providing the context to the LLM for retrieval, we have two options,

1. Chunk Context -- In this, if we just store the text spliited (chunked) documents in vector store, the vector store retriever would just retrieve the chunk for LLM would have this chunk as the context. It can be used if the chunk size is big enough, and we just need to generate an answer to the query to the user.

2. Document context -- How ever, if we need more context, and also need give the user the related document from which the answer is generated, we need to keep the mapping of the vector retrieved chunk to the original document. This can be done by using `ParentDocumentRetriever` in Langchain

In [13]:
# adding the chunked documents to the vector store
all_splits = text_splitter.split_documents(document_pages)

In [16]:
all_splits[10]

Document(metadata={'producer': 'Microsoft® Word LTSC', 'creator': 'Microsoft® Word LTSC', 'creationdate': '2025-03-10T17:06:03+05:30', 'author': 'harinis', 'moddate': '2025-03-10T17:06:03+05:30', 'source': '/Users/sarmehta/Desktop/langchain_concepts/hdfc_policy_doc.pdf', 'total_pages': 48, 'page': 0, 'page_label': '1'}, page_content='Details: XXXXXXX>> \n \nTo contact us in case of any grievance, please Click here,you may also refer to Part G. \n \nYours sincerely, \n \n \nAuthorised Signatory')

In [17]:
document_ids = vector_store.add_documents(documents=all_splits)

# Conversational Graph
We will make the RAG application in the LangGraph so as to follow the conversational flow

START -> retrieve_node(node1) --> generate_node(node2) --> END

In [24]:
#defining state class

from langchain_core.documents import Document
from typing_extensions import List, TypedDict

class State(TypedDict):
    question: str
    context: List[Document]
    answer: str

In [22]:
from langgraph.graph import StateGraph, START, END

In [23]:
graph_builder = StateGraph(State)

In [25]:
# node 1 for retriving context related to the question
def retrieval_node(state: State):
    retrieved_docs = vector_store.similarity_search(state["question"])
    return {"context": retrieved_docs}

In [26]:
# node 2 for running the LLm with the context
# for this we will have to create a prompt first which 
# will contain the instruction, question, retrieved context

In [27]:
from langchain import hub

In [28]:

prompt = hub.pull("rlm/rag-prompt")



In [29]:
prompt = ChatPromptTemplate(input_variables=['context', 'question'],
                            messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:")

ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, metadata={'lc_hub_owner': 'rlm', 'lc_hub_repo': 'rag-prompt', 'lc_hub_commit_hash': '50442af133e61576e74536c6556cefe1fac147cad032f4377b60c436e6cdcb6e'}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"), additional_kwargs={})])

In [31]:
from langchain_core.prompts import ChatPromptTemplate