## Enhanced Agentic PageRAG with Elegant Schema Architecture

#### Real-World Applications:
1. **Financial Analysis**: SEC filings (10-K, 10-Q) with automatic company/year/section detection
2. **Legal Research**: Contracts and case law with jurisdiction/date/clause filtering  
3. **Medical Records**: Patient documents with date/provider/department filters
4. **Enterprise Docs**: Multi-project knowledge base with org/dept/category filters
5. **Research Papers**: Academic search with author/year/topic/section filters

In [None]:
# ollama pull qwen3
# ollama pull qwen3:0.6b
# ollama pull nomic-embed-text


In [None]:
from typing_extensions import TypedDict, Annotated
import operator
import os

from langgraph.graph import StateGraph, START, END
from langgraph.checkpoint.memory import MemorySaver
from langgraph.prebuilt import ToolNode
from langchain_ollama import ChatOllama
from langchain_core.messages import HumanMessage, SystemMessage
from langchain_core.tools import tool
from IPython.display import display, Markdown

from scripts import utils

In [None]:
# =============================================================================
# Configuration
# =============================================================================

LLM_MODEL = "qwen3"
BASE_URL = "http://localhost:11434"

llm = ChatOllama(
        model=LLM_MODEL,
        base_url=BASE_URL
    )

In [None]:
# =============================================================================
# State Definition
# =============================================================================

class AgenticRAGState(TypedDict):
    """State for agentic RAG workflow."""
    messages: Annotated[list, operator.add]

In [None]:
# =============================================================================
# Retriever Tool
# =============================================================================

@tool
def retrieve_docs(query: str, k=5) -> str:
    """
    Retrieve relevant financial documents from ChromaDB.
    Extracts filters from query and retrieves matching documents.

    Args:
        query: The search query (e.g., "What was Amazon's revenue in Q2 2025?")
        k: Number of documents to retrieve

    Returns:
        Retrieved documents with metadata as formatted string
    """
    print(f"\n[TOOL] retrieve_docs called")
    print(f"[QUERY] {query}")

    # Extract filters from query
    filters = utils.extract_filters(query)
    ranking_keywords = utils.generate_ranking_keywords(query)

    results = utils.search_docs(query, filters, ranking_keywords, k=20)

    docs = utils.rank_documents_by_keywords(results, ranking_keywords, k=k)

    print(f"[RETRIEVED] {len(docs)} documents")

    # Handle empty results
    if len(docs) == 0:
        return "No documents found. Try rephrasing your query or using different filters."

    # Format results
    retrieved_text = []

    for i, doc in enumerate(docs, 1):
        doc_text = [f"\n--- Document {i} ---"]

        # Add all metadata
        for key, value in doc.metadata.items():
            doc_text.append(f"{key}: {value}")

        # Add content
        doc_text.append(f"\nContent:\n{doc.page_content}")

        retrieved_text.append("\n".join(doc_text))

    retrieved_text = "\n".join(retrieved_text)

    # store retrieved text for debugging
    os.makedirs("debug_logs", exist_ok=True)
    with open("debug_logs/retrieved_reranked_docs.md", "w", encoding="utf-8") as f:
        f.write(retrieved_text)

    return retrieved_text

In [None]:
# =============================================================================
# Agent Node
# =============================================================================

def agent_node(state: AgenticRAGState) -> dict:
    """Agent that decides when to use retriever tool."""
    messages = state["messages"]
    
    # Bind tool to LLM
    llm_with_tools = llm.bind_tools([retrieve_docs])
    
    # Detailed system message with step-by-step instructions
    system_msg = SystemMessage(
        content="""You are a financial document analysis assistant with access to a document retrieval tool.

                CRITICAL RULES:
                1. ALWAYS use the retrieve_docs tool first - NEVER answer from memory
                2. You MUST call the tool before providing any financial information
                3. Answer ONLY based on the retrieved documents
                4. If documents don't contain the answer, clearly state that

                WORKFLOW FOR SIMPLE QUESTIONS:
                Step 1: Call retrieve_docs tool with the user's question
                Step 2: Wait for the tool results
                Step 3: Analyze the retrieved documents
                Step 4: Provide answer with citations (company, year, quarter, page)

                WORKFLOW FOR COMPLEX/COMPARISON QUESTIONS:
                Step 1: Break down the question into sub-questions
                Example: "Compare Amazon and Google revenue" -> 
                - Sub-question 1: "Amazon revenue"
                - Sub-question 2: "Google revenue"

                Step 2: Call retrieve_docs for EACH sub-question separately
                - First call for Amazon
                - Wait for results
                - Second call for Google
                - Wait for results

                Step 3: Analyze all retrieved documents

                Step 4: Present comparison in TABLE format:
                | Metric | Company A | Company B |
                |--------|-----------|-----------|
                | Revenue | $X | $Y |

                ANSWER FORMATTING (Use Markdown):
                - Use **headings** (##, ###) for sections
                - Use paragraphs for detailed findings and reasonings
                - Use **bullet points** for lists
                - Use **tables** for comparisons and structured data
                - Use **bold** for emphasis on key metrics
                - Cite sources: (Company: X, Year: Y, Quarter: Z, Page: N)

                EXAMPLES:

                Example 1 - Simple Question:
                User: "What was Amazon's revenue in Q2 2025?"
                You: [Call tool] -> [Analyze docs] -> 
                "## Amazon Q2 2025 Revenue

                Amazon's revenue for Q2 2025 was **$XXX billion**

                **Source:** Amazon, 2025, Q2, Page 5"

                Example 2 - Comparison Question:
                User: "Compare Amazon and Google revenue"
                You: [Call tool for Amazon] -> [Call tool for Google] -> [Analyze both] ->
                "## Revenue Comparison

                | Company | Revenue | Year | Quarter |
                |---------|---------|------|---------|
                | Amazon  | $XXX B  | 2025 | Q2      |
                | Google  | $YYY B  | 2025 | Q2      |

                **Analysis:**
                - Amazon's revenue was higher by $ZZZ billion
                - Google showed XX% growth

                **Sources:**
                - Amazon: 2025, Q2, Page 5
                - Google: 2025, Q2, Page 8"

                Example 3 - Multi-part Question:
                User: "What are Amazon's revenue, profit, and cash flow?"
                You: [Call tool] -> [Analyze docs] ->
                "## Amazon Financial Metrics

                ### Revenue
                - Q2 2025: $XXX billion

                ### Profit  
                - Q2 2025: $YYY billion

                ### Cash Flow
                - Q2 2025: $ZZZ billion

                **Source:** Amazon, 2025, Q2, Pages 5-7"

                REMEMBER:
                - ALWAYS call the tool first
                - Break complex questions into sub-questions
                - Use tables for comparisons
                - Format answers in detailed Markdown
                - Always cite sources
                - If no relevant documents are found, try with different filters."""
    )
    
    # LLM decides whether to use tool
    response = llm_with_tools.invoke([system_msg] + messages)
    
    # Log tool calls
    if hasattr(response, "tool_calls") and response.tool_calls:
        for tc in response.tool_calls:
            print(f"[AGENT] Calling tool: {tc.get('name')}")
    else:
        print(f"[AGENT]")
    
    return {"messages": [response]}


In [None]:
# =============================================================================
# Router
# =============================================================================

def should_continue(state: AgenticRAGState):
    """Route to tools or end."""
    last = state["messages"][-1]
    
    # If there are tool calls, route to tools
    if hasattr(last, "tool_calls") and last.tool_calls:
        return "tools"
    
    # Otherwise, done
    return END

In [None]:
# =============================================================================
# Graph
# =============================================================================

def create_agentic_rag():
    """Create agentic RAG with ToolNode."""
    builder = StateGraph(AgenticRAGState)

    # Add nodes
    builder.add_node("agent", agent_node)
    builder.add_node("tools", ToolNode([retrieve_docs]))  # ToolNode executes tools

    # Define flow
    builder.add_edge(START, "agent")
    # Conditional: tools or end
    builder.add_conditional_edges("agent", should_continue, ["tools", END])
    # After tools, back to agent for final response
    builder.add_edge("tools", "agent")

    # Add memory
    checkpointer = MemorySaver()

    return builder.compile(checkpointer=checkpointer)

graph = create_agentic_rag()

In [None]:
graph

In [None]:
# =============================================================================
# Demo
# =============================================================================
config = {"configurable": {"thread_id": "demo"}}

# Example query
result = graph.invoke({
    "messages": ["What was Amazon's revenue in 2024?"]
}, config)

result["messages"][-1].pretty_print()

In [None]:
# Example query
graph = create_agentic_rag()

result = graph.invoke({
    "messages": ["Compare the Google's, Apple's and Amazon's revenue in 2023.?"]
}, config)


# result["messages"][-1].pretty_print()
from IPython.display import display, Markdown

display(Markdown(result["messages"][-1].content))

In [None]:
# Example query
graph = create_agentic_rag()

result = graph.invoke({
    "messages": ["Compare the Google's and Apple's balance sheets in 2023.?"]
}, config)


display(Markdown(result["messages"][-1].content))

In [None]:
# Example query

# try changing with the model like gpt-oss or so
graph = create_agentic_rag()

result = graph.invoke({
    "messages": ["Show segment wise earning for Iphones and Macbooks in 2024"]
}, config)


display(Markdown(result["messages"][-1].content))