# Agentic RAG
[Manaranjan Pradhan](www.manaranjanp.com)

By the end of the tutorial we will have done the following:

- Fetch and preprocess documents that will be used for retrieval.
- Index those documents for semantic search and create a retriever tool for the agent.
- Build an agentic RAG system that can decide when to use the retriever tool.

Adopted from (https://langchain-ai.github.io/langgraph/tutorials/rag/langgraph_agentic_rag/)

### Libraries used

  - LangChain
  - LangGraph

### Install Required Libaries



In [None]:
%%capture --no-stderr
%pip install -U --quiet langgraph langchain-groq langchain-community langchain-text-splitters unstructured langchain-huggingface langsmith

In [None]:
import nest_asyncio

nest_asyncio.apply()

### Load the file and embed it

Store the file converted_document.md in a folder called files under current directory.

In [None]:
from langchain_community.document_loaders import DirectoryLoader

loader = DirectoryLoader("./files", glob="**/*.md")
docs = loader.load()

In [None]:
print(docs[0].page_content[:500])

In [None]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=500, chunk_overlap=50
)
doc_splits = text_splitter.split_documents(docs)

In [None]:
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_huggingface import HuggingFaceEmbeddings

vectorstore = InMemoryVectorStore.from_documents(
    documents=doc_splits,
    embedding=HuggingFaceEmbeddings(model_name = 'BAAI/bge-large-en-v1.5',
                                    model_kwargs = {'device': 'cpu'})
)
retriever = vectorstore.as_retriever()

### Create a Retriever

In [None]:
from langchain.tools.retriever import create_retriever_tool

retriever_tool = create_retriever_tool(
    retriever,
    "retrieve_quarterly_reports",
    "Search and return information about Infosys quarterly performance reports.",
)

In [None]:
retriever_tool.invoke({"query": "What is the revenue growth for the quarter?"})

In [None]:
from IPython.display import Markdown, display

display(Markdown(retriever_tool.invoke({"query": "What is the revenue growth for the quarter?"})))

### Create the Generator Model

In [None]:
import os
from getpass import getpass
os.environ["GROQ_API_KEY"] = getpass("Enter your Groq API key: ")

In [None]:
os.environ['LANGCHAIN_TRACING_V2'] = "true"
os.environ['LANGSMITH_TRACING'] = "true"

In [None]:
os.environ["LANGSMITH_API_KEY"] = getpass("Enter your LANGSMITH API key: ")

In [None]:
for key in ["LANGCHAIN_TRACING_V2", "LANGSMITH_API_KEY", "LANGCHAIN_PROJECT"]:
    print(f"{key} =", os.getenv(key))

In [None]:
from langgraph.graph import MessagesState
from langchain.chat_models import init_chat_model
from langchain_groq import ChatGroq

#response_model = init_chat_model("groq:llama-3.3-70b-versatile", temperature=0)

response_model = ChatGroq(model="llama-3.3-70b-versatile",
               temperature=0,
               max_tokens=256,
               max_retries=2)

In [None]:
def generate_query_or_respond(state: MessagesState):
    """Call the model to generate a response based on the current state. Given
    the question, it will decide to retrieve using the retriever tool, or simply respond to the user.
    """
    response = (
        response_model
        .bind_tools([retriever_tool]).invoke(state["messages"])
    )
    return {"messages": [response]}

In [None]:
input = {
    "messages": [
        {
            "role": "user",
            "content": "What is the revenue growth for the quarter?",
        }
    ]
}

In [None]:
response = generate_query_or_respond(input)

In [None]:
response.keys()

In [None]:
response['messages']

### Grade the retrieved Chunuks as relevant or not relevant

In [None]:
from pydantic import BaseModel, Field
from typing import Literal

GRADE_PROMPT = (
    "You are a grader assessing relevance of a retrieved document to a user question. \n "
    "Here is the retrieved document: \n\n {context} \n\n"
    "Here is the user question: {question} \n"
    "If the document contains keyword(s) or semantic meaning related to the user question, grade it as relevant. \n"
    "Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question."
)


class GradeDocuments(BaseModel):
    """Grade documents using a binary score for relevance check."""

    binary_score: str = Field(
        description="Relevance score: 'yes' if relevant, or 'no' if not relevant"
    )

grader_model = init_chat_model("groq:llama-3.3-70b-versatile", temperature=0)

def grade_documents(
    state: MessagesState,
) -> Literal["generate_answer", "rewrite_question"]:
    """Determine whether the retrieved documents are relevant to the question."""
    question = state["messages"][0].content
    context = state["messages"][-1].content

    prompt = GRADE_PROMPT.format(question=question, context=context)
    response = (
        grader_model
        .with_structured_output(GradeDocuments).invoke(
            [{"role": "user", "content": prompt}]
        )
    )
    score = response.binary_score

    print("###############################################")
    print(f"Score from grade_documents: {score}")
    print("###############################################")

    if score == "yes":
        return "generate_answer"
    else:
        return "rewrite_question"

### Create a Prompt Rewriter

In [None]:
 REWRITE_PROMPT = (
    "Look at the input and try to reason about the underlying semantic intent / meaning.\n"
    "Here is the initial question:"
    "\n ------- \n"
    "{question}"
    "\n ------- \n"
    "Formulate an improved question:"
)


def rewrite_question(state: MessagesState):
    """Rewrite the original user question."""
    messages = state["messages"]
    question = messages[0].content
    prompt = REWRITE_PROMPT.format(question=question)
    response = response_model.invoke([{"role": "user", "content": prompt}])
    return {"messages": [{"role": "user", "content": response.content}]}

### Configure the Generator Node

In [None]:
GENERATE_PROMPT = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer the question. "
    "If you don't know the answer, just say that you don't know. "
    "Use three sentences maximum and keep the answer concise.\n"
    "Question: {question} \n"
    "Context: {context}"
)


def generate_answer(state: MessagesState):
    """Generate an answer."""
    question = state["messages"][0].content
    context = state["messages"][-1].content
    prompt = GENERATE_PROMPT.format(question=question, context=context)
    response = response_model.invoke([{"role": "user", "content": prompt}])
    return {"messages": [response]}

### Create the Agentic RAG Graph

In [None]:
from langgraph.graph import StateGraph, START, END
from langgraph.prebuilt import ToolNode
from langgraph.prebuilt import tools_condition

workflow = StateGraph(MessagesState)

# Define the nodes we will cycle between
workflow.add_node(generate_query_or_respond)
workflow.add_node("retrieve", ToolNode([retriever_tool]))
workflow.add_node(rewrite_question)
workflow.add_node(generate_answer)

workflow.add_edge(START, "generate_query_or_respond")

# Decide whether to retrieve
workflow.add_conditional_edges(
    "generate_query_or_respond",
    # Assess LLM decision (call `retriever_tool` tool or respond to the user)
    tools_condition,
    {
        # Translate the condition outputs to nodes in our graph
        "tools": "retrieve",
        END: END,
    },
)

# Edges taken after the `action` node is called.
workflow.add_conditional_edges(
    "retrieve",
    # Assess agent decision
    grade_documents,
)
workflow.add_edge("generate_answer", END)
workflow.add_edge("rewrite_question", "generate_query_or_respond")

# Compile
graph = workflow.compile()

In [None]:
from IPython.display import Image, display

display(Image(graph.get_graph().draw_mermaid_png()))

### Invoke the Graph

In [None]:
for chunk in graph.stream(
    {
        "messages": [
            {
                "role": "user",
                "content": "What is the revenue growth for the quarter?",
            }
        ]
    }
):
    for node, update in chunk.items():
        print("Update from node", node)
        update["messages"][-1].pretty_print()
        print("\n\n")