# Indexing

- Load document
- Split document
- Store document

### Load Document

In [1]:
from langchain_community.document_loaders import PyPDFLoader
from rich import print as rprint


file_path = "../example_data/A_SURVEY_ON_AGENTIC_RAG.pdf"
loader = PyPDFLoader(file_path)

docs = loader.load()

len(docs)

39

In [2]:
rprint(docs[33])

In [3]:
docs[33].page_content = docs[33].page_content.split("\nReferences\n")[0]

### Split Document

In [4]:
from langchain_text_splitters import RecursiveCharacterTextSplitter


text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,  # chunk size (characters)
    chunk_overlap=200,  # chunk overlap (characters)
    add_start_index=True,  # track index in original document
)
all_splits = text_splitter.split_documents(docs[:34])

len(all_splits)

110

# Store Document

In [5]:
import os
from dotenv import load_dotenv
from langchain_community.embeddings import DashScopeEmbeddings
from langchain_qdrant import QdrantVectorStore
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams


load_dotenv()


embedding_model = DashScopeEmbeddings(
    model="text-embedding-v4",
    dashscope_api_key=os.environ.get("COMPATIBLE_API_KEY"),
)


client = QdrantClient(host="localhost", port=6333)

client.create_collection(
    collection_name="agentic_rag_survey",
    vectors_config=VectorParams(size=1024, distance=Distance.COSINE),
)

vectorstore = QdrantVectorStore(
    client=client,
    collection_name="agentic_rag_survey",
    embedding=embedding_model,
)

vectorstore.add_documents(documents=all_splits)

['76729433a1434c43838efbbb477cd7e7',
 'e984b0d2381c436881eb9525475d40c6',
 '330f5862adf4454db1c4daf5abbe2a70',
 '1120a2d8ef484208931d6d41f2bcd4a6',
 '5201fba80bc84864b13476b7aa1f5f83',
 '090586af2fac4317bdf27664a04d2163',
 'afba64f3e9424f20a2c1613b911db486',
 '8efdf5d43306414a8faf20db65160bda',
 '1c5472456e074758ae79944e0300d3be',
 'f63c58bb6d52407fa31bd554e47699e3',
 '5ca19172202340cfbb7dd47b817b06e1',
 '83d9ad4effc14e33ac63c7b528e7ee28',
 '31abecc4ef21407baba3d8681e7a720b',
 '363306b5d32845389b0d931b721993aa',
 '649c7cdef59346919b71ef3637cebee4',
 'bbf0b4b02f254adfbeff9228b029b50f',
 'd4271129ec74495ba68331fe7d553d3e',
 '0b0aecf096d84e059a30c23c70965ee7',
 'ef93f613ce8e492cbe0dc7243aada0b1',
 'd4fe853634ec4180b6a7d48761d4f66d',
 '85198f82dc004e4393c722f318c949df',
 '6a159a4927264cecb5c95133222ea131',
 'b48ffd1b9d624a538167d1ae63fc7fb7',
 'a1ecf2f024224bec9b657d11c69f475c',
 'd099d25e0bc949a989437108b05cec22',
 '59e89991fd4a439ab6c0a6f96e1f0afd',
 '904cbe0078f047478158e5c77a01c192',
 

# Retrieval and Generation

- RAG agents
- RAG chains

In [6]:
from langchain_openai import ChatOpenAI


model = ChatOpenAI(
    model=os.environ.get("MODEL_NAME"),
    temperature=0,
    base_url=os.environ.get("COMPATIBLE_BASE_URL"),
    api_key=os.environ.get("COMPATIBLE_API_KEY"),
)

### RAG agents

In [7]:
from langchain.tools import tool


@tool(response_format="content_and_artifact")
def retrieve_context(query: str):
    """Retrieve information to help answer a query."""
    retrieved_docs = vectorstore.similarity_search(query, k=2)
    serialized = "\n\n".join(
        (f"Source: {doc.metadata}\nContent: {doc.page_content}")
        for doc in retrieved_docs
    )
    return serialized, retrieved_docs

In [8]:
from langchain.agents import create_agent


tools = [retrieve_context]
# If desired, specify custom instructions
prompt = (
    "You have access to a tool that retrieves context from a paper. "
    "Use the tool to help answer user queries."
)
agent = create_agent(model, tools, system_prompt=prompt)

In [12]:
from langchain.messages import HumanMessage


query = (
    "What is RAG?\n"
    "Once you get the answer, look up the core components of it?"
)

for event in agent.stream({"messages": [HumanMessage(query)]}, stream_mode="values"):
    event["messages"][-1].pretty_print()


What is RAG?
Once you get the answer, look up the core components of it?
Tool Calls:
  retrieve_context (call_8ee3389a93d6432abde680d2)
 Call ID: call_8ee3389a93d6432abde680d2
  Args:
    query: What is RAG?
Name: retrieve_context

Source: {'producer': 'pdfTeX-1.40.25', 'creator': 'LaTeX with hyperref', 'creationdate': '2025-02-05T01:26:00+00:00', 'author': '', 'keywords': '', 'moddate': '2025-02-05T01:26:00+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5', 'subject': '', 'title': '', 'trapped': '/False', 'source': '../example_data/A_SURVEY_ON_AGENTIC_RAG.pdf', 'total_pages': 39, 'page': 0, 'page_label': '1', 'start_index': 781, '_id': 'e984b0d2-381c-4368-81eb-9525475d40c6', '_collection_name': 'agentic_rag_survey'}
Content: outputs. Retrieval-Augmented Generation (RAG) has emerged as a solution, enhancing LLMs by
integrating real-time data retrieval to provide contextually relevant and up-to-date responses. Despite
it

### RAG chains

In [13]:
from langchain.agents.middleware import dynamic_prompt, ModelRequest


@dynamic_prompt
def prompt_with_context(request: ModelRequest) -> str:
    """Inject context into state messages."""
    last_query = request.state["messages"][-1].text
    retrieved_docs = vectorstore.similarity_search(last_query)

    docs_content = "\n\n".join(doc.page_content for doc in retrieved_docs)

    system_message = (
        "You are a helpful assistant. Use the following context in your response:"
        f"\n\n{docs_content}"
    )

    return system_message


agent = create_agent(model, tools=[], middleware=[prompt_with_context])

In [14]:
query = "What is RAG?"
for step in agent.stream({"messages": [HumanMessage(query)]}, stream_mode="values"):
    step["messages"][-1].pretty_print()


What is RAG?

**Retrieval-Augmented Generation (RAG)** is a framework that enhances **Large Language Models (LLMs)** by integrating **real-time data retrieval** from external sources—such as knowledge bases, APIs, or the web—into the response generation process.

### Why RAG?
Traditional LLMs rely solely on their **static training data**, which can lead to:
- **Outdated information**
- **Hallucinated (factually incorrect) responses**
- **Inability to adapt to dynamic or real-world scenarios**

RAG addresses these limitations by **retrieving relevant, up-to-date information** at query time and using it to **ground the model’s output in factual, context-specific data**.

### How RAG Works (Simplified):
1. **Retrieve**: When a user asks a question, the system searches an external knowledge source for relevant documents or data.
2. **Augment**: The retrieved information is combined with the original query.
3. **Generate**: The LLM uses this enriched context to produce a more accurate, rel

In [15]:
from typing import Any
from langchain_core.documents import Document
from langchain.agents.middleware import AgentMiddleware, AgentState


class State(AgentState):
    context: list[Document]


class RetrieveDocumentsMiddleware(AgentMiddleware[State]):
    state_schema = State

    def before_model(self, state: AgentState) -> dict[str, Any] | None:
        last_message = state["messages"][-1]
        retrieved_docs = vectorstore.similarity_search(last_message.text)

        docs_content = "\n\n".join(doc.page_content for doc in retrieved_docs)

        augmented_message_content = (
            f"{last_message.text}\n\n"
            "Use the following context to answer the query:\n"
            f"{docs_content}"
        )
        return {
            "messages": [last_message.model_copy(update={"content": augmented_message_content})],
            "context": retrieved_docs,
        }


agent = create_agent(
    model,
    tools=[],
    middleware=[RetrieveDocumentsMiddleware()],
)

In [16]:
query = "What is RAG?"
for step in agent.stream({"messages": [HumanMessage(query)]}, stream_mode="values"):
    step["messages"][-1].pretty_print()


What is RAG?

What is RAG?

Use the following context to answer the query:
outputs. Retrieval-Augmented Generation (RAG) has emerged as a solution, enhancing LLMs by
integrating real-time data retrieval to provide contextually relevant and up-to-date responses. Despite
its promise, traditional RAG systems are constrained by static workflows and lack the adaptability
required for multi-step reasoning and complex task management.
Agentic Retrieval-Augmented Generation (Agentic RAG) transcends these limitations by embedding
autonomous AI agents into the RAG pipeline. These agents leverage agentic design patterns reflec-
tion, planning, tool use, and multi-agent collaboration to dynamically manage retrieval strategies,
iteratively refine contextual understanding, and adapt workflows through clearly defined operational
structures ranging from sequential steps to adaptive collaboration. This integration enables Agentic
RAG systems to deliver unparalleled flexibility, scalability, and contex