In [1]:
#importing modules which is used in simple RAG Project.
#below classes we used so user can interact with LLM Models.
from langchain_groq import ChatGroq
from langchain_openai import ChatOpenAI
from langchain_huggingface import ChatHuggingFace,HuggingFaceEndpoint #HFE class we used to hit user query and take response from it.

#below classes we used for embedding Models
from langchain_openai import OpenAIEmbeddings  #close source model
from langchain_huggingface import HuggingFaceEmbeddings #open source model

import pdfplumber

#want to load document into workingspace using document loaders
from langchain_community.document_loaders import PDFPlumberLoader,TextLoader

#now splitting the document into chunks need splitter class
from langchain.text_splitter import RecursiveCharacterTextSplitter #this split the text document through herirachy way.

#need to store the chunks embedded document to vector store we r using Pinecone.
from pinecone import Pinecone,ServerlessSpec

#integrating pinecode with langchain.
from langchain_pinecone import PineconeVectorStore


#load the env files
from dotenv import load_dotenv

from pathlib import Path
from typing import Annotated,Optional,List,Literal,TypedDict
from dataclasses import dataclass
from loggers import logger
from Exception import CustomException
import os,sys

from langgraph.graph import StateGraph,START,END #using this class we can create Graph start or end of workflow

#if i want to add tool support to my workflow.
from langchain.tools import tool,Tool,StructuredTool

#if i want to add toolnode in my workflow 
#(toolnode means that node have list of tool here they will decide based on user query which tool need to execute)
from langgraph.prebuilt import ToolNode,tools_condition

#if i need to add memory or persistence to my workflow so that it can save the state value at every checkpoint
from langgraph.checkpoint.memory import InMemorySaver #it will save in state value to Ram memory.
from langgraph.checkpoint.sqlite import SqliteSaver

#tools_condition will  decide if tool message is present on AI response then they redirect to toolnode or end it workflow
from pydantic import BaseModel,Field,computed_field

#this class we used to change retriever object to become tool
from langchain_core.tools.retriever import create_retriever_tool

#fetching the RAG prompt from Hub.
from langchain import hub

import warnings as w
w.filterwarnings('ignore')

from langchain_core.messages import AIMessage,HumanMessage,AnyMessage,ToolMessage
load_dotenv()


For example, replace imports like: `from langchain_core.pydantic_v1 import BaseModel`
with: `from pydantic import BaseModel`
or the v1 compatibility namespace if you are working in a code base that has not been fully upgraded to pydantic 2 yet. 	from pydantic.v1 import BaseModel

  from langchain_pinecone.vectorstores import Pinecone, PineconeVectorStore


True

## step:1) model objects

In [2]:
#groq model
model1 = ChatGroq(
    model="groq/compound-mini",
    temperature=0.1
)

#openai model
model2 = ChatOpenAI(
    model="gpt-3.5-turbo",temperature=0.1
)

#hugging face model.
llm = HuggingFaceEndpoint(
    repo_id="meta-llama/Llama-3.1-8B-Instruct",  
    task="text-generation",  
    
)
model3 = ChatHuggingFace(llm=llm)


emb_model = OpenAIEmbeddings(model="text-embedding-3-small")

## Setting Up Pincone Database to store Embedding Vector into Index Folder

In [3]:
#Pinecone Basic Configuration.
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))

index_name = "medical-book-index"

# 🔹 Ensure index exists
existing_indexes = [idx["name"] for idx in pc.list_indexes()]
existing_indexes

['medical-book-index']

In [4]:
if index_name not in existing_indexes:
    pc.create_index(
        name=index_name,
        dimension=1536,
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1"),
        tags={"environment": "RAGdevelopment"}
    )
    logger.info(f"🆕 Created Pinecone index: {index_name}")
    import time
    time.sleep(10)  # wait for index to be ready
else:
    logger.info(f"ℹ️ Index {index_name} already exists. Skipping creation.")

[2025-09-23 01:06:27,446]-INFO-13-ℹ️ Index medical-book-index already exists. Skipping creation.


In [5]:
index = pc.Index(index_name)
vector_store = PineconeVectorStore(index=index, embedding=emb_model)
vector_store

<langchain_pinecone.vectorstores.PineconeVectorStore at 0x10b91e745e0>

### step:2) changing vector store to become as retrievers

In [6]:
base_retriever = vector_store.as_retriever(
    search_type="mmr",
    search_kwargs={"k": 3, "lambda_mult": 0.25} #lambda_mult will give more diversified output using MMR algorithm
)
base_retriever #retrievers is runnable so it means we can invoke easily.

VectorStoreRetriever(tags=['PineconeVectorStore', 'OpenAIEmbeddings'], vectorstore=<langchain_pinecone.vectorstores.PineconeVectorStore object at 0x0000010B91E745E0>, search_type='mmr', search_kwargs={'k': 3, 'lambda_mult': 0.25})

### step:3) Now changing vector store retriever to become tool

In [7]:
retriever_tool = create_retriever_tool(
    retriever=base_retriever,
    name="medical_doc_retriever",
    description=(
        "Use this tool to retrieve the most relevant information "
        "from the ingested PDFs. Best suited for answering "
        "questions that require factual context or reference to "
        "the uploaded documents."
    )
)
retriever_tool

Tool(name='medical_doc_retriever', description='Use this tool to retrieve the most relevant information from the ingested PDFs. Best suited for answering questions that require factual context or reference to the uploaded documents.', args_schema=<class 'langchain_core.tools.retriever.RetrieverInput'>, func=functools.partial(<function _get_relevant_documents at 0x0000010BB5FD5A20>, retriever=VectorStoreRetriever(tags=['PineconeVectorStore', 'OpenAIEmbeddings'], vectorstore=<langchain_pinecone.vectorstores.PineconeVectorStore object at 0x0000010B91E745E0>, search_type='mmr', search_kwargs={'k': 3, 'lambda_mult': 0.25}), document_prompt=PromptTemplate(input_variables=['page_content'], input_types={}, partial_variables={}, template='{page_content}'), document_separator='\n\n', response_format='content'), coroutine=functools.partial(<function _aget_relevant_documents at 0x0000010BB5FD5CF0>, retriever=VectorStoreRetriever(tags=['PineconeVectorStore', 'OpenAIEmbeddings'], vectorstore=<langch

#### #Testing the tool.

In [8]:
#Testing the tool.
retriever_tool.invoke({"query": "Tell me about why AIDS is happened"})

[2025-09-23 01:06:30,173]-INFO-1025-HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


'Antigen—Any substance that stimulates the body to sible retrovirus that causes AIDS in humans. Two\nproduce antibody. forms of HIV are now recognized: HIV-1, which caus-\nes most cases of AIDS in Europe, North and South\nAutoimmunity—A condition in which the body’s\nAmerica, and most parts of Africa; and HIV-2, which\nimmune system produces antibodies in response to\nis chiefly found in West African patients. HIV-2, dis-\nits own tissues or blood components instead of for-\ncovered in 1986, appears to be less virulent than HIV-\neign particles or microorganisms.\n1 and may also have a longer latency period.\nCCR5—A chemokine receptor; defects in its struc-\nImmunodeficient—A condition in which the body’s\nture caused by genetic mutation cause the progres-\nimmune response is damaged, weakened, or is not\nsion of AIDS to be prevented or slowed.\nfunctioning properly.\nCD4—A type of protein molecule in human blood,\nKaposi’s sarcoma—A cancer of the connective tis-\n\nfor Disease Control

#### Note :- adding more tools to support

In [9]:
# Calculator tool
def calculator(first_num: float, second_num: float, operation: str) -> dict:
    try:
        if operation == "add": result = first_num + second_num
        elif operation == "sub": result = first_num - second_num
        elif operation == "mul": result = first_num * second_num
        elif operation == "div": result = first_num / second_num if second_num != 0 else "Division by zero"
        else: return {"error": f"Unsupported operation {operation}"}
        return {"first_num": first_num, "second_num": second_num, "operation": operation, "result": result}
    except Exception as e:
        return {"error": str(e)}

calc_tool = Tool(
    name="Calculator",
    func=calculator,
    description="Perform basic arithmetic: add, sub, mul, div"
)

# Stock price tool
import requests
def get_stock_price(symbol: str) -> dict:
    url = f'https://www.alphavantage.co/query?function=GLOBAL_QUOTE&symbol={symbol}&apikey=1PPNPDOMK62HNKRO'
    return requests.get(url).json()

stock_tool = Tool(
    name="StockPrice",
    func=get_stock_price,
    description="Fetch latest stock price for a given symbol"
)

In [10]:
#now combining all tolls together
lst_tools = [retriever_tool,stock_tool,calc_tool]
lst_tools

[Tool(name='medical_doc_retriever', description='Use this tool to retrieve the most relevant information from the ingested PDFs. Best suited for answering questions that require factual context or reference to the uploaded documents.', args_schema=<class 'langchain_core.tools.retriever.RetrieverInput'>, func=functools.partial(<function _get_relevant_documents at 0x0000010BB5FD5A20>, retriever=VectorStoreRetriever(tags=['PineconeVectorStore', 'OpenAIEmbeddings'], vectorstore=<langchain_pinecone.vectorstores.PineconeVectorStore object at 0x0000010B91E745E0>, search_type='mmr', search_kwargs={'k': 3, 'lambda_mult': 0.25}), document_prompt=PromptTemplate(input_variables=['page_content'], input_types={}, partial_variables={}, template='{page_content}'), document_separator='\n\n', response_format='content'), coroutine=functools.partial(<function _aget_relevant_documents at 0x0000010BB5FD5CF0>, retriever=VectorStoreRetriever(tags=['PineconeVectorStore', 'OpenAIEmbeddings'], vectorstore=<langc

In [11]:
#now binding the tool with LLM Model.
llm_with_tool  = model2.bind_tools(tools=lst_tools)
llm_with_tool  #tools is also runnable

RunnableBinding(bound=ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x0000010BB623E7D0>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x0000010BB623DF60>, root_client=<openai.OpenAI object at 0x0000010BB623E290>, root_async_client=<openai.AsyncOpenAI object at 0x0000010BB7617310>, temperature=0.1, model_kwargs={}, openai_api_key=SecretStr('**********')), kwargs={'tools': [{'type': 'function', 'function': {'name': 'medical_doc_retriever', 'description': 'Use this tool to retrieve the most relevant information from the ingested PDFs. Best suited for answering questions that require factual context or reference to the uploaded documents.', 'parameters': {'properties': {'query': {'description': 'query to look up in retriever', 'type': 'string'}}, 'required': ['query'], 'type': 'object'}}}, {'type': 'function', 'function': {'name': 'StockPrice', 'description': 'Fetch latest stock price for a given symbol', 'p

#### #testing tool bind with llm

In [12]:
#it will parallely call all the tools together based on user query give suggestions which tool is suitable or not
initial_testing_msg = [HumanMessage(content="tell me about acne?",name="Human")]
llm_with_tool.invoke(input=initial_testing_msg)

[2025-09-23 01:06:34,086]-INFO-1025-HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_oSs6EGCsWXENc36pfzxpDP6t', 'function': {'arguments': '{"query":"acne"}', 'name': 'medical_doc_retriever'}, 'type': 'function'}], 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 19, 'prompt_tokens': 156, 'total_tokens': 175, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'id': 'chatcmpl-CIgcrE9YVQTpVBPrrndCJlXcWOUWb', 'service_tier': 'default', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run--55dd23ba-26f2-41cd-a16f-19be0815e29b-0', tool_calls=[{'name': 'medical_doc_retriever', 'args': {'query': 'acne'}, 'id': 'call_oSs6EGCsWXENc36pfzxpDP6t', 'type': 'tool_call'}], usage_metadata={'input_tokens': 156, 'output_tokens': 19, 'total_tokens': 175, 'input_token_details':

## step:4) Defining State or Memory Schema that hold value throughout Workflow

In [13]:
from langgraph.graph.message import MessagesState #this message state is prebuilt class that store mesaage in lst 
from langgraph.graph.message import add_messages,BaseMessage

class StateSchema(TypedDict):
    #key schema defining i will store all the messages init as well as Query.
    messages : Annotated[list[BaseMessage],add_messages]

### step:5) Defineing the graph object and adding nodes and edges to graph so finally it ready the workflow

In [14]:
#creating an object of stategraph class
graph = StateGraph(state_schema=StateSchema)
graph

<langgraph.graph.state.StateGraph at 0x10bb7f4ea40>

#### adding nodes and edges to my graph

In [15]:
#creating user_query_or_respond function that perform action in node.
def user_query_or_respond(state:StateSchema) ->StateSchema:
    """Call the toool binded model to generate a response based on the current state(user query). Given
    the question, it will decide to retrieve using the retriever tool, or simply respond to the user.
    """
    
    #fetching the user query from state class.
    human_msg = state['messages'] #taking user input
    
    #now sending this user query that is LLM who is bind with tools.
    #it will parallely call all the tools together based on user query give suggestions which tool is suitable.
    ai_response = llm_with_tool.invoke(input=human_msg)
    
    #now updating the partial state message.
    return {
        'messages' : [ai_response]
    }

### doing testing

In [16]:
initial_schema = StateSchema(messages=[HumanMessage(content="tell me about acne?",name="Human")])
lst_msg = user_query_or_respond(initial_schema)
lst_msg

[2025-09-23 01:06:35,269]-INFO-1025-HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


{'messages': [AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_d6ORs39l0PUtDLgKdQmPMKVg', 'function': {'arguments': '{"query":"acne"}', 'name': 'medical_doc_retriever'}, 'type': 'function'}], 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 19, 'prompt_tokens': 156, 'total_tokens': 175, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'id': 'chatcmpl-CIgctMypZTIT7RnhQjztN42DvojGD', 'service_tier': 'default', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run--bd368948-61ae-43d6-b721-6221e551590f-0', tool_calls=[{'name': 'medical_doc_retriever', 'args': {'query': 'acne'}, 'id': 'call_d6ORs39l0PUtDLgKdQmPMKVg', 'type': 'tool_call'}], usage_metadata={'input_tokens': 156, 'output_tokens': 19, 'total_tokens': 175, 'input_t

In [17]:
initial_schema = StateSchema(messages=[HumanMessage(content="tell me about acne?",name="Human")])
lst_msg = user_query_or_respond(initial_schema)
for msg in lst_msg["messages"]:
    msg.pretty_print()

[2025-09-23 01:06:36,447]-INFO-1025-HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
Tool Calls:
  medical_doc_retriever (call_06LGqr7SB3wJiurZgT5gBHMw)
 Call ID: call_06LGqr7SB3wJiurZgT5gBHMw
  Args:
    query: acne


#### Tool nodes will have all tool support init

In [18]:
#if ai response coming from user_query_or_respond node have Toolmessage toh will redirect to toolnode.
#so defining logical that perform action in toolnode.
tools = ToolNode(tools=lst_tools)
tools

tools(tags=None, recurse=True, explode_args=False, func_accepts={'config': ('N/A', <class 'inspect._empty'>), 'store': ('store', None)}, tools_by_name={'medical_doc_retriever': Tool(name='medical_doc_retriever', description='Use this tool to retrieve the most relevant information from the ingested PDFs. Best suited for answering questions that require factual context or reference to the uploaded documents.', args_schema=<class 'langchain_core.tools.retriever.RetrieverInput'>, func=functools.partial(<function _get_relevant_documents at 0x0000010BB5FD5A20>, retriever=VectorStoreRetriever(tags=['PineconeVectorStore', 'OpenAIEmbeddings'], vectorstore=<langchain_pinecone.vectorstores.PineconeVectorStore object at 0x0000010B91E745E0>, search_type='mmr', search_kwargs={'k': 3, 'lambda_mult': 0.25}), document_prompt=PromptTemplate(input_variables=['page_content'], input_types={}, partial_variables={}, template='{page_content}'), document_separator='\n\n', response_format='content'), coroutine=

```
## Very Important Meaning
👉 Grading documents ka matlab hai:
LLM (ya ek heuristic) ka use karke har retrieved document ko evaluate karna ki:
1)Kya yeh user query ke liye retrieve document relevant hai?
2)Kya isme user ko answer dene ke liye sahi context hai?
3)Agar multiple docs aaye to kaunsa sabse useful document hai?
```

In [19]:
#define GrageDocument schema using pydantic.
class GradeDocumentSchema(BaseModel):
    """Grade documents using a binary score for relevance check."""

    binary_score: str = Field(
        description="Relevance score: 'yes' if relevant, or 'no' if not relevant"
    )
    
#defining pydantic output parser the parser the response give us structure response.
from langchain_core.output_parsers import PydanticOutputParser
pydantic_parser = PydanticOutputParser(pydantic_object=GradeDocumentSchema)
pydantic_parser

PydanticOutputParser(pydantic_object=<class '__main__.GradeDocumentSchema'>)

#### messages[0] → The original user question.
#### messages[1] → based on user question the assistant's placeholder,suggest which tool useful to solve the user question
#### messages[2] → The tool response (the document retrieved).

# Router Function based grade document suggestion it redirect

In [None]:
#defining grade_documetn function that perform action
def grade_document(state:StateSchema) ->Literal['generate_answer','rewrite_question']:
    #fetching the user question and tool retrieve document from state class
    question = state['messages'][0].content
    context  = state['messages'][-1].content
    
    print(question)
    print(context)
    
    #now making structure instruction prompt that will decide the fetch document user question is relevant or not
    from langchain_core.prompts import PromptTemplate
    prompt = PromptTemplate(
        template="""
        You are a relevance grader. 
        Your task is to decide whether a retrieved document is relevant to a given user question.

        Retrieved document:
        {context}

        User question:
        {question}

        Instructions:
        - Consider both exact keyword matches and semantic meaning.
        - If the document provides information that answers or is directly related to the question, grade it as 'yes'.
        - If the document does not provide relevant information, grade it as 'no'.
        Return the answer strictly in this format:
        {format_instructions}
        """,
        input_variables=["context","question"],
        partial_variables={'format_instructions':pydantic_parser.get_format_instructions()}
    )
    
    #now passing the structure instruction to llm model.
    chain = prompt | model2 
    
    #chain is runnbale so we can invoke and get ai response.
    response = chain.invoke({"context":context,"question":question})
    print(response.binary_score)
    
    score = response.binary_score

    if score == "yes":
        return "generate_answer"
    else:
        return "rewrite_question"

In [21]:
graph.add_node(node="understand_user_query_or_respond",action=user_query_or_respond)
graph.add_node(node = "vectorretriever", action=tools)
#adding edges to graph.
graph.add_edge(START,"understand_user_query_or_respond")
graph.add_conditional_edges(
    "understand_user_query_or_respond"
    # Assess LLM decision (call `retriever_tool` tool or respond to the user)
    ,tools_condition,
    {
        "tools":"vectorretriever",
        END:END
    }
)

<langgraph.graph.state.StateGraph at 0x10bb7f4ea40>

In [22]:

workflow = graph.compile()

#workflow is runnable passing initial state message to this worklfow.
initial_schema = StateSchema(messages=[HumanMessage(content="tell me about acne?",name="Human")])
workflow.invoke(input=initial_schema)

[2025-09-23 01:06:37,572]-INFO-1025-HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
[2025-09-23 01:06:38,691]-INFO-1025-HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


{'messages': [HumanMessage(content='tell me about acne?', additional_kwargs={}, response_metadata={}, name='Human', id='b17a84bd-bcfb-4f8a-af65-eef5849c7bfe'),
  AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_cZ6VPGEn0tamMUYmOR6bk0oG', 'function': {'arguments': '{"query":"acne"}', 'name': 'medical_doc_retriever'}, 'type': 'function'}], 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 19, 'prompt_tokens': 156, 'total_tokens': 175, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'id': 'chatcmpl-CIgcvA0345WsRCP2uke9DcDy2yeS1', 'service_tier': 'default', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run--b7be1602-fadf-4e80-8be4-ca0d3504cec9-0', tool_calls=[{'name': 'medical_doc_retriever', 'args': {'query': 'acne'}, '

In [23]:
for msg in workflow.invoke(input=initial_schema)["messages"]:
    msg.pretty_print()

[2025-09-23 01:06:40,444]-INFO-1025-HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
[2025-09-23 01:06:42,287]-INFO-1025-HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
Name: Human

tell me about acne?
Tool Calls:
  medical_doc_retriever (call_mG7YIa0UPb98T7OuytOQlI6S)
 Call ID: call_mG7YIa0UPb98T7OuytOQlI6S
  Args:
    query: acne
Name: medical_doc_retriever

disease specialist,or an endocrinologist,a specialist who
treats diseases of the body’s endocrine (hormones and
glands) system.
Acne has a characteristic appearance and is not diffi-
cult to diagnose. The doctor takes a complete medical
history,including questions about skin care,diet,factors
causing flare-ups,medication use,and prior treatment.
Physical examination includes the face, upper neck,
chest,shoulders,back,and other affected areas. Under
good lighting,the doctor determines what types and how
many blemishes are present,whether they are inflamed,
whether they are d