# Agentic RAG Part1
This project demonstrates how to build a retrieval-augmented generation (RAG) pipeline and integrate it with LLM calls using LangGraph. The workflow is orchestrated by a Supervisor agent that dynamically decides when to call the RAG retriever, when to query the LLM directly, and how to route tasks across different sub-agents.

In [28]:

from langchain_groq import ChatGroq
from dotenv import load_dotenv
import os
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_community.document_loaders import TextLoader,DirectoryLoader
from langchain_community.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langgraph.graph import StateGraph,END
load_dotenv()

True

In [2]:
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")

os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")

os.environ["LANGCHAIN_PROJECT"] = os.getenv("LANGCHAIN_PROJECT")

os.environ["LANGCHAIN_TRACING_V2"] = "true"

os.environ["HUGGINGFACEHUB_API_TOKEN"] = os.getenv("HF_TOKEN")

In [3]:
llm = ChatGroq(model= "qwen/qwen3-32b")

# Config embedding model

In [4]:
embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en")
len(embeddings.embed_query("hi"))

  embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en")


384

In [7]:
# Load text data
loader=TextLoader("data.txt")
docs =loader.load()

In [8]:
print(docs)

[Document(metadata={'source': 'data.txt'}, page_content="ðŸ‡ºðŸ‡¸ Overview of the U.S. Economy\nThe United States of America possesses the largest economy in the world in terms of nominal GDP, making it the most powerful economic force globally. It operates under a capitalist mixed economy, where the private sector dominates, but the government plays a significant regulatory and fiscal role. With a population of over 335 million people and a high level of technological advancement, the U.S. economy thrives on a foundation of consumer spending, innovation, global trade, and financial services. It has a highly diversified structure with strong sectors in technology, healthcare, finance, real estate, defense, and agriculture.\n\nU.S. GDP â€“ Size, Composition, and Global Share\nAs of 2024, the United Statesâ€™ nominal GDP is estimated to be around $28 trillion USD, accounting for approximately 25% of the global economy. It ranks #1 in the world by nominal GDP, far ahead of China (which ra

In [10]:
# Chunking data
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 200,chunk_overlap = 50)
chunks = text_splitter.split_documents(documents=docs)

In [11]:
len(chunks) 

55

In [12]:
len(embeddings.embed_query("hi"))

384

In [14]:
# Using chroma db to save data
chroma_db = Chroma.from_documents(chunks,embeddings)

In [15]:
# crated reteriever 
retriever = chroma_db.as_retriever(search_kwargs = {"k":3})

In [16]:
retriever.invoke("industrial growth at usa?")

[Document(metadata={'source': 'data.txt'}, page_content='Looking forward, the U.S. economy is expected to grow at a moderate pace, powered by innovation in AI, green energy, robotics, biotech, and quantum computing. The Biden administrationâ€™s Inflation'),
 Document(metadata={'source': 'data.txt'}, page_content='Looking forward, the U.S. economy is expected to grow at a moderate pace, powered by innovation in AI, green energy, robotics, biotech, and quantum computing. The Biden administrationâ€™s Inflation'),
 Document(metadata={'source': 'data.txt'}, page_content='The U.S. maintains its GDP growth through strong innovation, entrepreneurship, and investment in R&D. With companies like Apple, Google, Amazon, Microsoft, and Tesla leading global markets, the U.S.')]

# Define Pydantic class

In [17]:
import operator
from typing import List
from pydantic import BaseModel , Field
from langchain.prompts import PromptTemplate
from typing import TypedDict, Annotated, Sequence
from langchain_core.messages import BaseMessage
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.messages import HumanMessage, AIMessage
from langgraph.graph import StateGraph,END
from langchain.output_parsers import PydanticOutputParser

In [18]:
class TopicSelectionParser(BaseModel):
    Topic:str=Field(description="selected topic")
    Reasoning:str=Field(description='Reasoning behind topic selection')

# Define agent Class

In [19]:
class AgentState(TypedDict):
    messages: Annotated[Sequence[BaseMessage], operator.add]

In [20]:
llm_parser = llm.with_structured_output(TopicSelectionParser)

In [21]:
def Class_identifying_fun(state: AgentState):
    question = state["messages"][-1]


    template = """
    Your task is to classify the given user query into one of the following categories: ["USA", "Not Related"].

    User query: "{question}"
    """

    prompt = PromptTemplate(
        template=template,
        input_variables=["question"]
    )

    
    chain = prompt | llm_parser

    response = chain.invoke({"question": question})

 
    return {"messages":[response.Topic]}
    

In [22]:
# Define Pydantic class
class TopicSelectionParser(BaseModel):
    Topic:str=Field(description="selected topic")
    Reasoning:str=Field(description='Reasoning behind topic selection')

# Supervisor/Router

In [23]:
def router(state:AgentState):
    print("-> ROUTER ->")
    
    last_message=state["messages"][-1]
   
    
    if "usa" in last_message.lower():
        print("Rag")
        return "RAG Call"
    else:
        return "LLM Call"

# RAG Call

In [24]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [25]:
# RAG Function
def rag_call(state:AgentState):
    
    print("-> RAG Call ->")
    
    question = state["messages"][0]
    
    prompt=PromptTemplate(
        template="""You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:""",
        
        input_variables=['context', 'question']
    )
    
    rag_chain = (
        {"context": retriever | format_docs, "question": RunnablePassthrough()}
        | prompt
        | llm
        | StrOutputParser()
    )
    result = rag_chain.invoke(question)
    
    return  {"messages": [result]}

# LLM Call

In [26]:
# LLM Function
def llm_call(state:AgentState):
    print("-> LLM Call ->")
    question = state["messages"][0]
    
    # Normal LLM call
    complete_query = "Anwer the follow question with you knowledge of the real world. Following is the user question: " + question
    response = llm.invoke(complete_query)
    return {"messages": [response.content]}

# Graph using Langgraph

In [29]:
# define graph
workflow=StateGraph(AgentState)

In [30]:
workflow.add_node("Supervisor",Class_identifying_fun)
workflow.add_node("RAG",rag_call)
workflow.add_node("LLM",llm_call)

<langgraph.graph.state.StateGraph at 0x24107c002f0>

In [31]:
# create entry point
workflow.set_entry_point("Supervisor")


<langgraph.graph.state.StateGraph at 0x24107c002f0>

In [32]:
# condition for routing
workflow.add_conditional_edges(
    "Supervisor",
    router,
    {
        "RAG Call": "RAG",
        "LLM Call": "LLM",
    }
)

<langgraph.graph.state.StateGraph at 0x24107c002f0>

In [33]:
# define edges
workflow.add_edge("RAG",END)
workflow.add_edge("LLM",END)

<langgraph.graph.state.StateGraph at 0x24107c002f0>

In [34]:
#compile
app = workflow.compile()

In [35]:
# Output
state= {"messages":["hi"]}
app.invoke(state)

-> ROUTER ->
-> LLM Call ->


{'messages': ['hi',
  'Not Related',
  '<think>\nOkay, the user sent "hi". That\'s a greeting. I should respond in a friendly and welcoming way. Maybe say hello back and ask how I can help them today. Keep it open-ended so they can specify what they need. Make sure the tone is positive and approachable.\n</think>\n\nHello! How can I assist you today? ðŸ˜Š']}

In [36]:
# Output 2
state={"messages":["what is a gdp of usa?"]}
app.invoke(state)

-> ROUTER ->
Rag
-> RAG Call ->


{'messages': ['what is a gdp of usa?',
  'USA',
  '<think>\nOkay, the user is asking for the GDP of the USA. Let me check the context provided.\n\nThe context says that as of 2024, the U.S. nominal GDP is estimated to be around $28 trillion USD. It also mentions that this accounts for about 25% of the global economy and that the U.S. is ranked first by nominal GDP. \n\nI need to make sure I present this information clearly and concisely. The user probably wants the latest figure, so specifying 2024 is important. I should mention both the total GDP and its share of the global economy. Also, confirming the ranking will add context. \n\nWait, the context starts with "U.S. GDP â€“ Size, Composition, and Global Share" but then repeats the same paragraph twice. I need to avoid redundancy. The key points are the $28 trillion figure, the 25% global share, and the #1 ranking. \n\nI should structure the answer to include all three points in three sentences max. Let me check the example answer fo