# Adaptive RAG

Adaptive RAG is a strategy for RAG that unites query analysis with self-corrective RAG.

In the paper, they report query analysis to route across:

 - No Retrieval
 - Single-shot RAG
 - Iterative RAG


In [1]:
from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_huggingface import HuggingFaceEmbeddings

urls = [
    "https://lilianweng.github.io/posts/2024-11-28-reward-hacking/",
    "https://lilianweng.github.io/posts/2024-07-07-hallucination/",
    "https://lilianweng.github.io/posts/2024-04-12-diffusion-video/",
]

docs = [WebBaseLoader(url).load() for url in urls]
docs_list = [item for sublist in docs for item in sublist]

text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=200, chunk_overlap=50
)
doc_splits = text_splitter.split_documents(docs_list)

embd = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")

vectorstore = InMemoryVectorStore.from_documents(
    documents=doc_splits, embedding=embd
)
retriever = vectorstore.as_retriever()

USER_AGENT environment variable not set, consider setting it to identify your requests.





In [None]:
from typing import Literal
from pydantic import BaseModel, Field
from langchain_core.prompts import ChatPromptTemplate
from langchain.chat_models import init_chat_model

class GradeQuery(BaseModel):
    """Grade a user query for difficulty. The grade will range from 0-5. """

    grade: Literal["0", "1", "2", "3", "4", "5"] = Field(
        ...,
        description="Given a user question grade the difficulty for a RAG system.",
    )
    reasoning: str = Field(
        ...,
        description="Provide a reasoning for the grade you assigned to the user question.",
    )

# LLM with function call
llm = init_chat_model("gemini-2.0-flash-lite", temperature=0, model_provider="google_genai")
structured_llm_router = llm.with_structured_output(GradeQuery)

# Prompt
system = """You are an expert at grading the difficulty of a user question for a RAG system. Your grade will range from 0-5. Grade 0 means no retrieval is needed. Grade 1 is a question that can be answered with a single shot retrieval from a vectorstore, while grade 5 is a question that might require up to 5 retrievals querying a vectorstore with each retrieval being a follow-up to the previous one."""
route_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "Rate the difficulty of the following question: \n\n {question}"),
    ]
)

question_router = route_prompt | structured_llm_router
print(
    question_router.invoke(
        {"question": "Who will the Bears draft first in the NFL draft?"}
    )
)
print(question_router.invoke({"question": "What are the types of agent memory?"}))

grade='1' reasoning="This question requires a single retrieval to find the Bears' draft pick."
grade='1' reasoning='The question asks for a list of types, which can be answered with a single retrieval.'
