[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/oviya-raja/ist-402/blob/main/learning-path/W09/W9_Building_Agentic_RAG_LlamaIndex_3_4.ipynb)

---



# Building Agentic RAG with LlamaIndex - Complete Notebook Content

This notebook contains all lessons from the course on building agentic RAG systems using LlamaIndex.

Setup and Installation
First, let's install the required packages.

# Setup and Installation
First, let's install the required packages.

In [None]:
%pip install --upgrade pip
%pip install llama-index
%pip install llama-index-llms-openai
%pip install llama-index-embeddings-openai
%pip install nest-asyncio
%pip install openai
%pip install python-dotenv

## Set up OpenAI API Key

In [None]:
# Set up OpenAI API Key
import os
from dotenv import load_dotenv

# Try to get API key from Google Colab userdata first (if running in Colab)
OPENAI_API_KEY = None
try:
    import google.colab
    from google.colab import userdata
    OPENAI_API_KEY = userdata.get('OPENAI_API_KEY')
    if OPENAI_API_KEY:
        print("‚úÖ OpenAI API Key loaded from Colab userdata!")
except (ImportError, ValueError):
    # Not running in Colab or userdata not available, try environment variables
    pass

# If not found in Colab userdata, try environment variables
if not OPENAI_API_KEY:
    # Load environment variables from .env file
    load_dotenv()
    
    # Get OpenAI API Key from environment variable
    OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
    if OPENAI_API_KEY:
        print("‚úÖ OpenAI API Key loaded from environment variables!")
    else:
        raise ValueError(
            "OPENAI_API_KEY not found. Please set it in one of the following ways:\n"
            "  - In Google Colab: userdata.set('OPENAI_API_KEY', 'your_key')\n"
            "  - Locally: Create a .env file with OPENAI_API_KEY=your_key\n"
            "  - Or set environment variable: export OPENAI_API_KEY=your_key"
        )

# Ensure the API key is set in the environment for OpenAI libraries
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
print("OpenAI API Key configured successfully!")

In [None]:
import nest_asyncio
nest_asyncio.apply()

# Lesson 1: Router Engine

### Load Data
Download the MetaGPT paper:

In [None]:
# Create data directory if it doesn't exist
import os
os.makedirs("data", exist_ok=True)

# Download the MetaGPT paper
!wget "https://openreview.net/pdf?id=VtmBAGCN7o" -O data/metagpt.pdf

In [None]:
from llama_index.core import SimpleDirectoryReader

# load documents
documents = SimpleDirectoryReader(input_files=["data/metagpt.pdf"]).load_data()

## Define LLM and Embedding Model

In [None]:
from llama_index.core.node_parser import SentenceSplitter

splitter = SentenceSplitter(chunk_size=1024)
nodes = splitter.get_nodes_from_documents(documents)

In [None]:
from llama_index.core import Settings
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding

Settings.llm = OpenAI(model="gpt-3.5-turbo")
Settings.embed_model = OpenAIEmbedding(model="text-embedding-ada-002")

## Define Summary Index and Vector Index

In [None]:
from llama_index.core import SummaryIndex, VectorStoreIndex

summary_index = SummaryIndex(nodes)
vector_index = VectorStoreIndex(nodes)

## Define Query Engines and Set Metadata

In [None]:
summary_query_engine = summary_index.as_query_engine(
    response_mode="tree_summarize",
    use_async=True,
)
vector_query_engine = vector_index.as_query_engine()

In [None]:
from llama_index.core.tools import QueryEngineTool

print("Creating summary tool...")
summary_tool = QueryEngineTool.from_defaults(
    query_engine=summary_query_engine,
    description=(
        "Useful for summarization questions related to MetaGPT"
    ),
)
print(f"‚úì Summary tool created successfully")
print(f"  Description: {summary_tool.metadata.description}")

print("\nCreating vector tool...")
vector_tool = QueryEngineTool.from_defaults(
    query_engine=vector_query_engine,
    description=(
        "Useful for retrieving specific context from the MetaGPT paper."
    ),
)
print(f"‚úì Vector tool created successfully")
print(f"  Description: {vector_tool.metadata.description}")
print("\n‚úì Both tools are ready to use!")

## Define Router Query Engine

In [None]:
from llama_index.core.query_engine.router_query_engine import RouterQueryEngine
from llama_index.core.selectors import LLMSingleSelector

query_engine = RouterQueryEngine(
    selector=LLMSingleSelector.from_defaults(),
    query_engine_tools=[
        summary_tool,
        vector_tool,
    ],
    verbose=True
)

In [None]:
response = query_engine.query("What is the summary of the document?")
print(str(response))
print(len(response.source_nodes))

In [None]:
response = query_engine.query(
    "How do agents share information with other agents?"
)
print(str(response))

# Lesson 2: Tool Calling

### 1. Define a Simple Tool

In [None]:
from llama_index.core.tools import FunctionTool

def add(x: int, y: int) -> int:
    """Adds two integers together."""
    return x + y

def mystery(x: int, y: int) -> int:
    """Mystery function that operates on top of two numbers."""
    return (x + y) * (x + y)

add_tool = FunctionTool.from_defaults(fn=add)
mystery_tool = FunctionTool.from_defaults(fn=mystery)

In [None]:
# ============================================================================
# EXPLANATION: Using LLM with Function/Tool Calling
# ============================================================================
# This demonstrates how an LLM can intelligently choose and call functions/tools
# based on a natural language query.

from llama_index.llms.openai import OpenAI

# Step 1: Initialize the OpenAI LLM
# This creates a connection to OpenAI's GPT-3.5-turbo model
print("Step 1: Initializing OpenAI LLM (gpt-3.5-turbo)...")
llm = OpenAI(model="gpt-3.5-turbo")
print("‚úì LLM initialized\n")

# Step 2: Use predict_and_call to let the LLM decide which tool to use
# The LLM will:
#   - Analyze the query: "Tell me the output of the mystery function on 2 and 9"
#   - Understand it needs to call a function with arguments 2 and 9
#   - Choose the appropriate tool (mystery_tool in this case)
#   - Call the function with the correct arguments
#   - Return the result

print("Step 2: LLM analyzing query and selecting appropriate tool...")
print("Query: 'Tell me the output of the mystery function on 2 and 9'")
print("Available tools: add_tool, mystery_tool")
print("\nLLM reasoning process (verbose=True shows this):")
print("-" * 60)

response = llm.predict_and_call(
    [add_tool, mystery_tool],  # List of available tools the LLM can choose from
    "Tell me the output of the mystery function on 2 and 9",  # User's query
    verbose=True  # Shows the LLM's decision-making process
)

print("-" * 60)
print("\nStep 3: Final response from LLM:")
print("=" * 60)
print(str(response))
print("=" * 60)

# Explanation of what happened:
print("\n" + "=" * 60)
print("WHAT HAPPENED:")
print("=" * 60)
print("1. The LLM received your query asking about 'mystery function'")
print("2. It analyzed the available tools and chose 'mystery_tool'")
print("3. It extracted the arguments: x=2, y=9")
print("4. It called mystery_tool(2, 9) which calculates: (2+9) * (2+9) = 121")
print("5. It returned the result: 121")
print("=" * 60)

### 2. Define an Auto-Retrieval Tool

In [None]:
from llama_index.core import SimpleDirectoryReader

# load documents
documents = SimpleDirectoryReader(input_files=["data/metagpt.pdf"]).load_data()

In [None]:
from llama_index.core.node_parser import SentenceSplitter

splitter = SentenceSplitter(chunk_size=1024)
nodes = splitter.get_nodes_from_documents(documents)
print(nodes[0].get_content(metadata_mode="all"))

In [None]:
from llama_index.core import VectorStoreIndex

vector_index = VectorStoreIndex(nodes)
query_engine = vector_index.as_query_engine(similarity_top_k=2)

In [None]:
from llama_index.core.vector_stores import MetadataFilters

query_engine = vector_index.as_query_engine(
    similarity_top_k=2,
    filters=MetadataFilters.from_dicts(
        [
            {"key": "page_label", "value": "2"}
        ]
    )
)

response = query_engine.query(
    "What are some high-level results of MetaGPT?",
)
print(str(response))
for n in response.source_nodes:
    print(n.metadata)

### Define the Auto-Retrieval Tool

In [None]:
from typing import List
from llama_index.core.vector_stores import FilterCondition

def vector_query(
    query: str,
    page_numbers: List[str]
) -> str:
    """Perform a vector search over an index.

    query (str): the string query to be embedded.
    page_numbers (List[str]): Filter by set of pages. Leave BLANK if we want to perform a vector search
        over all pages. Otherwise, filter by the set of specified pages.

    """

    metadata_dicts = [
        {"key": "page_label", "value": p} for p in page_numbers
    ]

    query_engine = vector_index.as_query_engine(
        similarity_top_k=2,
        filters=MetadataFilters.from_dicts(
            metadata_dicts,
            condition=FilterCondition.OR
        )
    )
    response = query_engine.query(query)
    return response


vector_query_tool = FunctionTool.from_defaults(
    name="vector_tool",
    fn=vector_query
)

In [None]:
llm = OpenAI(model="gpt-3.5-turbo", temperature=0)
response = llm.predict_and_call(
    [vector_query_tool],
    "What are the high-level results of MetaGPT as described on page 2?",
    verbose=True
)
for n in response.source_nodes:
    print(n.metadata)

### Add More Tools

In [None]:
from llama_index.core import SummaryIndex
from llama_index.core.tools import QueryEngineTool

summary_index = SummaryIndex(nodes)
summary_query_engine = summary_index.as_query_engine(
    response_mode="tree_summarize",
    use_async=True,
)
summary_tool = QueryEngineTool.from_defaults(
    name="summary_tool",
    query_engine=summary_query_engine,
    description=(
        "Useful if you want to get a summary of MetaGPT"
    ),
)

In [None]:
response = llm.predict_and_call(
    [vector_query_tool, summary_tool],
    "What are the MetaGPT comparisons with ChatDev described on page 8?",
    verbose=True
)
for n in response.source_nodes:
    print(n.metadata)

In [None]:
response = llm.predict_and_call(
    [vector_query_tool, summary_tool],
    "What is a summary of the paper?",
    verbose=True
)

# Lesson 3: Building an Agent Reasoning Loop

## Setup Function Calling Agent

In [None]:
from llama_index.llms.openai import OpenAI

llm = OpenAI(model="gpt-3.5-turbo", temperature=0)

In [None]:
from llama_index.core.agent.workflow import FunctionAgent

agent = FunctionAgent(
    tools=[vector_tool, summary_tool],
    llm=llm,
    verbose=True
)

In [None]:
# For FunctionAgent - must use asyncio.run() for async execution
import asyncio

response = asyncio.run(agent.run(
    "Tell me about the agent roles in MetaGPT, and how they communicate."
))
print(str(response))

In [None]:
response = asyncio.run(agent.run(
    "Tell me about the evaluation datasets used."
))
print(str(response))

In [None]:
response = asyncio.run(agent.run("Tell me the results over one of the above datasets."))

print(str(response))

# Lesson 4: Building a Multi-Document Agent

## 1. Setup an Agent Over 3 Papers

In [None]:
urls = [
    "https://openreview.net/pdf?id=VtmBAGCN7o",
    "https://openreview.net/pdf?id=6PmJoRfdaK",
    "https://openreview.net/pdf?id=hSyW5go0v8",
]

papers = [
    "metagpt.pdf",
    "longlora.pdf",
    "selfrag.pdf",
]

In [None]:
# Download papers
for url, paper in zip(urls, papers):
    !wget "{url}" -O "data/{paper}"

In [None]:
# Helper function to create tools for each paper
# Works in both Google Colab and local environments
from pathlib import Path
from llama_index.core import SimpleDirectoryReader, SummaryIndex, VectorStoreIndex
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.tools import QueryEngineTool

def get_doc_tools(file_path: str, name: str):
    """
    Get vector and summary query engine tools from a document.
    
    This function works in both Google Colab and local environments.
    Make sure Settings.llm and Settings.embed_model are configured before calling this function.
    
    Args:
        file_path: Path to the document file (relative or absolute path)
        name: Name identifier for the document (used in tool descriptions)
    
    Returns:
        tuple: (vector_tool, summary_tool) - QueryEngineTool instances
    """

    # Load documents
    documents = SimpleDirectoryReader(input_files=[file_path]).load_data()
    splitter = SentenceSplitter(chunk_size=1024)
    nodes = splitter.get_nodes_from_documents(documents)

    # Create indices
    vector_index = VectorStoreIndex(nodes)
    summary_index = SummaryIndex(nodes)

    # Create query engines
    vector_query_engine = vector_index.as_query_engine()
    summary_query_engine = summary_index.as_query_engine(
        response_mode="tree_summarize",
        use_async=True,
    )

    # Create tools
    vector_tool = QueryEngineTool.from_defaults(
        query_engine=vector_query_engine,
        description=f"Useful for retrieving specific context from {name}.",
    )

    summary_tool = QueryEngineTool.from_defaults(
        query_engine=summary_query_engine,
        description=f"Useful for summarization questions related to {name}.",
    )

    return vector_tool, summary_tool

In [None]:
import os
from pathlib import Path

paper_to_tools_dict = {}
for paper in papers:
    print(f"Getting tools for paper: {paper}")
    
    # Check if file exists in data directory before processing
    file_path = f"data/{paper}"
    if not os.path.exists(file_path):
        print(f"  ‚ö†Ô∏è  Warning: File '{file_path}' does not exist. Skipping...")
        print(f"  üí° Tip: Make sure you've downloaded all papers first.")
        continue
    
    try:
        vector_tool, summary_tool = get_doc_tools(file_path, Path(paper).stem)
        paper_to_tools_dict[paper] = [vector_tool, summary_tool]
        print(f"  ‚úì Successfully created tools for {paper}\n")
    except Exception as e:
        print(f"  ‚ùå Error processing {paper}: {e}")
        print(f"  Skipping this paper...\n")
        continue

print(f"\n‚úì Successfully processed {len(paper_to_tools_dict)} papers")
print(f"Papers with tools: {list(paper_to_tools_dict.keys())}")

In [None]:
initial_tools = [t for paper in papers for t in paper_to_tools_dict[paper]]

In [None]:
from llama_index.llms.openai import OpenAI

llm = OpenAI(model="gpt-3.5-turbo")
print(f"Number of tools: {len(initial_tools)}")

In [None]:
# LlamaIndex >= 0.14.6
from llama_index.core.agent.workflow import FunctionAgent
# If any items in `initial_tools` are plain Python functions, wrap them first:
# from llama_index.core.tools import FunctionTool
# initial_tools = [FunctionTool.from_defaults(fn) for fn in initial_tools]

agent = FunctionAgent(
    tools=initial_tools,
    llm=llm,
    verbose=True,  # optional: shows workflow logs
)


In [None]:
import asyncio

response = asyncio.run(agent.run(
    user_msg=(
        "Tell me about the evaluation dataset used in LongLoRA, "
        "and then tell me about the evaluation results"
    )
))
print(str(response))


In [None]:
import asyncio

response = asyncio.run(
    agent.run(
        user_msg="Give me a summary of both Self-RAG and LongLoRA"
    )
)
print(str(response))


## 2. Setup an Agent Over 11 Papers

In [None]:
urls = [
    "https://openreview.net/pdf?id=VtmBAGCN7o",
    "https://openreview.net/pdf?id=6PmJoRfdaK",
    "https://openreview.net/pdf?id=LzPWWPAdY4",
    "https://openreview.net/pdf?id=VTF8yNQM66",
    "https://openreview.net/pdf?id=hSyW5go0v8",
    "https://openreview.net/pdf?id=9WD9KwssyT",
    "https://openreview.net/pdf?id=yV6fD7LYkF",
    "https://openreview.net/pdf?id=hnrB5YHoYu",
    "https://openreview.net/pdf?id=WbWtOYIzIK",
    "https://openreview.net/pdf?id=c5pwL0Soay",
    "https://openreview.net/pdf?id=TpD2aG1h0D"
]

papers = [
    "metagpt.pdf",
    "longlora.pdf",
    "loftq.pdf",
    "swebench.pdf",
    "selfrag.pdf",
    "zipformer.pdf",
    "values.pdf",
    "finetune_fair_diffusion.pdf",
    "knowledge_card.pdf",
    "metra.pdf",
    "vr_mcl.pdf"
]

In [None]:
# Download all papers
for url, paper in zip(urls, papers):
    !wget "{url}" -O "data/{paper}"

In [None]:
import os
from pathlib import Path

paper_to_tools_dict = {}

for paper in papers:
    print(f"Getting tools for paper: {paper}")
    
    # Check if file exists in data directory before processing
    file_path = f"data/{paper}"
    if not os.path.exists(file_path):
        print(f"  ‚ö†Ô∏è  Warning: File '{file_path}' does not exist. Skipping...")
        print(f"  üí° Tip: Make sure you've downloaded all papers first.")
        continue
    
    try:
        vector_tool, summary_tool = get_doc_tools(file_path, Path(paper).stem)
        paper_to_tools_dict[paper] = [vector_tool, summary_tool]
        print(f"  ‚úì Successfully created tools for {paper}")

    except UnicodeEncodeError as e:
        print(f" Unicode error while processing {paper}: {e}")
        try:
            # Attempt to re-read text safely and re-generate tools
            text_bytes = Path(file_path).read_bytes()
            safe_text = text_bytes.decode("utf-8", errors="replace")

            # Optionally, save the cleaned text for inspection
            clean_path = Path(file_path).with_name(Path(file_path).stem + "_clean.txt")
            clean_path.write_text(safe_text, encoding="utf-8")
            print(f"Saved cleaned version: {clean_path}")

            # Retry tool creation if your get_doc_tools can accept a string path
            vector_tool, summary_tool = get_doc_tools(str(clean_path), Path(paper).stem)
            paper_to_tools_dict[paper] = [vector_tool, summary_tool]
            print(f" Retried successfully for {paper}")

        except Exception as inner_e:
            print(f" Still failed on {paper}: {inner_e}")

    except Exception as e:
        print(f" Unexpected error for {paper}: {e}")

print(f"\n‚úì Successfully processed {len(paper_to_tools_dict)} papers")
print(f"Papers with tools: {list(paper_to_tools_dict.keys())}")


## Extend the Agent with Tool Retrieval

In [None]:
all_tools = [t for paper in papers for t in paper_to_tools_dict[paper]]

In [None]:
# Define an "object" index and retriever over these tools
from llama_index.core import VectorStoreIndex
from llama_index.core.objects import ObjectIndex

obj_index = ObjectIndex.from_objects(
    all_tools,
    index_cls=VectorStoreIndex,
)

In [None]:
obj_retriever = obj_index.as_retriever(similarity_top_k=3)

In [None]:
tools = obj_retriever.retrieve(
    "Tell me about the eval dataset used in MetaGPT and SWE-Bench"
)
tools[2].metadata

In [None]:
from llama_index.core.agent.workflow import FunctionAgent
from llama_index.core.tools import RetrieverTool  # ‚úÖ wrap retrievers as tools

retriever_tool = RetrieverTool.from_defaults(
    retriever=obj_retriever,
    name="paper_retriever",
    description="Retrieve relevant chunks from the loaded papers."
)

agent = FunctionAgent(
    tools=[retriever_tool],
    llm=llm,
    system_prompt=(
        "You are an agent designed to answer queries over a set of given papers. "
        "Always use the provided tools to answer a question. Do not rely on prior knowledge."
    ),
    verbose=True,
)


In [None]:
import asyncio

response = asyncio.run(
    agent.run(
        user_msg="Give me a summary of both Self-RAG and LongLoRA"
    )
)
print(str(response))


In [None]:
import asyncio

response = asyncio.run(
    agent.run(
        user_msg=(
            "Tell me about the evaluation dataset used "
            "in MetaGPT and compare it against SWE-Bench."
        )
    )
)
print(str(response))


In [None]:
import asyncio

response = asyncio.run(
    agent.run(
        user_msg=(
            "Compare and contrast the LoRA papers (LongLoRA, LoftQ). "
            "Analyze the approach in each paper first."
        )
    )
)
print(str(response))


**End of Notebook**

This complete notebook covers all 4 lessons for building agentic RAG systems with LlamaIndex:


*   Router Engine - Route queries to appropriate tools

*   Tool Calling - Create and use custom function tools
*   Agent Reasoning Loop - Build agents with multi-step reasoning
*   Multi-Document Agent - Scale to multiple documents with tool retrieval