# Query Rewriting RAG using Llama Agents

Let's setup two agents: a query rewriting service and a RAG service in a simple constrained flow using our Pipeline Orchestrator.

In [1]:
import nest_asyncio

nest_asyncio.apply()

## Load Data

In [4]:
!mkdir -p 'data/'
!curl 'https://arxiv.org/pdf/2307.09288.pdf' -o 'data/llama2.pdf'

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100   249  100   249    0     0   1970      0 --:--:-- --:--:-- --:--:--  2146


In [2]:
from llama_parse import LlamaParse

parser = LlamaParse(result_type="text")
docs = parser.load_data("data/10k/uber_2021.pdf")

Started parsing the file under job_id cac11eca-3926-4a70-8b1a-91bdb0d3df50


In [3]:
# from llama_parse import LlamaParse

# parser = LlamaParse(result_type="text")
# docs = parser.load_data("data/llama2.pdf")

In [4]:
import os
from llama_index.core import (
    StorageContext,
    VectorStoreIndex,
    load_index_from_storage,
)

if not os.path.exists("storage"):
    index = VectorStoreIndex.from_documents(docs)
    # save index to disk
    index.set_index_id("vector_index")
    index.storage_context.persist("./storage")
else:
    # rebuild storage context
    storage_context = StorageContext.from_defaults(persist_dir="storage")
    # load index
    index = load_index_from_storage(storage_context, index_id="vector_index")

## Setup Agents

We define a few custom agents: 
- a retriever agent that will return nodes based on a custom query string

In [5]:
# define router agent

from llama_index.core.bridge.pydantic import PrivateAttr
from llama_index.core.agent import FnAgentWorker, Task, AgentChatResponse
from llama_index.core import PromptTemplate
from llama_index.core.bridge.pydantic import Field
from llama_index.core.query_pipeline import QueryPipeline
from llama_index.core.llms import LLM
from llama_index.llms.openai import OpenAI
from typing import Any, Dict, Optional, Tuple

OPENAI_LLM = OpenAI(model="gpt-4o")

# use HyDE to hallucinate answer.
HYDE_PROMPT_STR = (
    "Please write a passage to answer the question\n"
    "Try to include as many key details as possible.\n"
    "\n"
    "\n"
    "{query_str}\n"
    "\n"
    "\n"
    'Passage:"""\n'
)
HYDE_PROMPT_TMPL = PromptTemplate(HYDE_PROMPT_STR)


def run_hdye_fn(state: Dict[str, Any]) -> Tuple[Dict[str, Any], bool]:
    """Run HyDE."""
    prompt_tmpl, llm, input_str = state["prompt_tmpl"], state["llm"], state["__task__"].input
    qp = QueryPipeline(chain=[prompt_tmpl, llm])
    output = qp.run(query_str=input_str)

    state["__output__"] = str(output)
    # return state dictionary and also if agent is done 
    return state, True

In [6]:
hyde_agent = FnAgentWorker(
    fn=run_hdye_fn, initial_state={"prompt_tmpl": HYDE_PROMPT_TMPL, "llm": OPENAI_LLM}
).as_agent()

In [7]:
# define RAG agent
from llama_index.core.query_engine import RetrieverQueryEngine

def run_rag_fn(state: Dict[str, Any]) -> Tuple[Dict[str, Any], bool]:
    """Run RAG."""
    retriever, llm, input_str = state["retriever"], state["llm"], state["__task__"].input
    query_engine = RetrieverQueryEngine.from_args(
        retriever,
        llm=llm
    )
    response = query_engine.query(input_str)
    state["__output__"] = str(response)
    return state, True

rag_agent = FnAgentWorker(
    fn=run_rag_fn, initial_state={"retriever": index.as_retriever(), "llm": OPENAI_LLM}
).as_agent()

## Setup Agent Services

This includes AgentService and QueryPipeline

In [8]:
from llama_agents.services import AgentService
# from llama_agents.control_plane.fastapi import FastAPIControlPlane
from llama_agents import ControlPlaneServer
from llama_agents.message_queues.simple import SimpleMessageQueue
from llama_agents.orchestrators import PipelineOrchestrator
from llama_agents import ServiceComponent

from llama_index.core.agent import FunctionCallingAgentWorker
from llama_index.core.tools import FunctionTool
from llama_index.llms.openai import OpenAI

llm = OpenAI(model="gpt-3.5-turbo")
message_queue = SimpleMessageQueue()

## Define Agent Services
query_rewrite_server = AgentService(
    agent=hyde_agent,
    message_queue=message_queue,
    description="Used to rewrite queries",
    service_name="query_rewrite_agent",
)
query_rewrite_server_c = ServiceComponent.from_service_definition(query_rewrite_server)

rag_agent_server = AgentService(
    agent=rag_agent,
    message_queue=message_queue,
    description="rag_agent"
)
rag_agent_server_c = ServiceComponent.from_service_definition(rag_agent_server)

# create our multi-agent framework components
pipeline = QueryPipeline(chain=[query_rewrite_server_c, rag_agent_server_c])
pipeline_orchestrator = PipelineOrchestrator(pipeline)
control_plane = ControlPlaneServer(
    message_queue=message_queue,
    orchestrator=pipeline_orchestrator,
)

## Launch agent 

In [9]:
from llama_agents.launchers.local import LocalLauncher

## Define Launcher
launcher = LocalLauncher(
    [query_rewrite_server, rag_agent_server],
    control_plane,
    message_queue,
)

In [10]:
# query_str = "What was Lyft's revenue growth in 2021?"
# gets stuck in a loop, should mostly be called once 
query_str = "What are the risk factors for Uber?"
launcher.launch_single(query_str)

INFO:llama_agents.message_queues.simple:Consumer d0b8a231-a35a-4262-be9b-03146f56e417 has been registered.
INFO:llama_agents.message_queues.simple:Consumer f2a5028e-fab5-4b84-9b29-54e58fe6fb04 has been registered.
INFO:llama_agents.message_queues.simple:Consumer 985a242b-6808-4378-a57d-6a7daf43cbb9 has been registered.
INFO:llama_agents.message_queues.simple:Consumer 424c7f3a-48fd-45ff-9d88-d65fc8d8ecba has been registered.
INFO:llama_agents.message_queues.base:Publishing message: id_='90bdcae0-cf2f-4dfd-842d-92ee1ffaf787' publisher_id='LocalLauncher-da4f49eb-9f6c-41ec-9e83-529b73dbbb66' data={'input': 'What are the risk factors for Uber?', 'task_id': '7c893df7-6cd9-416d-bcf3-f5bdb3a0dff9', 'state': {}, 'agent_id': None} action=<ActionTypes.NEW_TASK: 'new_task'> stats=QueueMessageStats(publish_time=None, process_start_time=None, process_end_time=None) type='control_plane'
INFO:llama_agents.services.agent:query_rewrite_agent launch_local
INFO:llama_agents.services.agent:default_agent la

"Uber, as a global ride-sharing and transportation network company, faces a multitude of risk factors that could impact its operations, financial performance, and overall business sustainability. These risk factors can be broadly categorized into regulatory, competitive, operational, financial, and reputational risks.\n\n**Regulatory Risks:** Uber operates in a highly regulated industry, and changes in laws and regulations can significantly affect its business model. Different countries and cities have varying requirements for ride-sharing services, including licensing, insurance, and safety standards. Compliance with these regulations can be costly and complex. Additionally, there is always the risk of new regulations being introduced that could limit Uber's ability to operate or increase its operational costs. Legal battles over the classification of drivers as independent contractors versus employees also pose a significant risk, as reclassification could lead to increased labor cos