In [1]:
# Install necessary packages
%pip install kitchenai-whisk llama-index openai

Note: you may need to restart the kernel to use updated packages.


In [2]:
# Import required libraries

from whisk.kitchenai_sdk.kitchenai import KitchenAIApp

from whisk.kitchenai_sdk.schema import (
    ChatInput, 
    ChatResponse,
)
kitchen = KitchenAIApp(namespace="react-agent-with-query-engine")

from llama_index.core import (
    SimpleDirectoryReader,
    VectorStoreIndex,
    StorageContext,
    load_index_from_storage,
)
from llama_index.core.tools import QueryEngineTool, ToolMetadata
import openai


In [3]:
import os


# api_key = input("Please enter your OpenAI API key: ")
# os.environ["OPENAI_API_KEY"] = api_key



# ReAct Agent with Query Engine (RAG) Tools

In this section, we show how to setup an agent powered by the ReAct loop for financial analysis.

The agent has access to two "tools": one to query the 2021 Lyft 10-K and the other to query the 2021 Uber 10-K.

We try two different LLMs:

- gpt-3.5-turbo
- gpt-3.5-turbo-instruct

Note that you can plug in any LLM that exposes a text completion endpoint.

## Build Query Engine Tools

In [4]:
%pip install llama-index-llms-openai

Note: you may need to restart the kernel to use updated packages.


In [5]:
from llama_index.core import (
    SimpleDirectoryReader,
    VectorStoreIndex,
    StorageContext,
    load_index_from_storage,
)

from llama_index.core.tools import QueryEngineTool, ToolMetadata

In [6]:
try:
    storage_context = StorageContext.from_defaults(
        persist_dir="./storage/lyft"
    )
    lyft_index = load_index_from_storage(storage_context)

    storage_context = StorageContext.from_defaults(
        persist_dir="./storage/uber"
    )
    uber_index = load_index_from_storage(storage_context)

    index_loaded = True
except:
    index_loaded = False

Download Data

In [7]:
!mkdir -p 'data/10k/'
!wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/10k/uber_2021.pdf' -O 'data/10k/uber_2021.pdf'
!wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/10k/lyft_2021.pdf' -O 'data/10k/lyft_2021.pdf'

--2025-02-19 15:55:47--  https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/10k/uber_2021.pdf
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 2606:50c0:8001::154, 2606:50c0:8000::154, 2606:50c0:8002::154, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|2606:50c0:8001::154|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1880483 (1.8M) [application/octet-stream]
Saving to: ‘data/10k/uber_2021.pdf’


2025-02-19 15:55:48 (22.1 MB/s) - ‘data/10k/uber_2021.pdf’ saved [1880483/1880483]

--2025-02-19 15:55:48--  https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/10k/lyft_2021.pdf
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 2606:50c0:8000::154, 2606:50c0:8003::154, 2606:50c0:8002::154, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|2606:50c0:8000::154|:443... connected.
HTTP request sent, awaiting response... 200

In [8]:
if not index_loaded:
    # load data
    lyft_docs = SimpleDirectoryReader(
        input_files=["./data/10k/lyft_2021.pdf"]
    ).load_data()
    uber_docs = SimpleDirectoryReader(
        input_files=["./data/10k/uber_2021.pdf"]
    ).load_data()

    # build index
    lyft_index = VectorStoreIndex.from_documents(lyft_docs)
    uber_index = VectorStoreIndex.from_documents(uber_docs)

    # persist index
    lyft_index.storage_context.persist(persist_dir="./storage/lyft")
    uber_index.storage_context.persist(persist_dir="./storage/uber")

In [9]:
lyft_engine = lyft_index.as_query_engine(similarity_top_k=3)
uber_engine = uber_index.as_query_engine(similarity_top_k=3)

In [10]:
query_engine_tools = [
    QueryEngineTool(
        query_engine=lyft_engine,
        metadata=ToolMetadata(
            name="lyft_10k",
            description=(
                "Provides information about Lyft financials for year 2021. "
                "Use a detailed plain text question as input to the tool."
            ),
        ),
    ),
    QueryEngineTool(
        query_engine=uber_engine,
        metadata=ToolMetadata(
            name="uber_10k",
            description=(
                "Provides information about Uber financials for year 2021. "
                "Use a detailed plain text question as input to the tool."
            ),
        ),
    ),
]

## Setup ReAct Agent

Here we setup two ReAct agents: one powered by standard gpt-3.5-turbo, and the other powered by gpt-3.5-turbo-instruct.

You can **optionally** specify context which will be added to the core ReAct system prompt.

In [11]:
from llama_index.core.agent import ReActAgent
from llama_index.llms.openai import OpenAI

In [12]:
# [Optional] Add Context
# context = """\
# You are a stock market sorcerer who is an expert on the companies Lyft and Uber.\
#     You will answer questions about Uber and Lyft as in the persona of a sorcerer \
#     and veteran stock market investor.
# """
llm = OpenAI(model="gpt-3.5-turbo")

agent = ReActAgent.from_tools(
    query_engine_tools,
    llm=llm,
    verbose=True,
    # context=context
)

agent_whisk = ReActAgent.from_tools(
    query_engine_tools,
    llm=llm,
    verbose=True,
    # context=context
)

In [13]:
response = agent.chat("What was Lyft's revenue growth in 2021?")
print(str(response))

> Running step 677b59c5-38a7-4490-a2a7-c162b0be6bc2. Step input: What was Lyft's revenue growth in 2021?
[1;3;38;5;200mThought: The user is asking about Lyft's revenue growth in 2021. I should use a tool to find this information.
Action: lyft_10k
Action Input: {'input': "What was Lyft's revenue growth in 2021?"}
[0m[1;3;34mObservation: Lyft's revenue growth in 2021 was 36%.
[0m> Running step df630582-0749-41ab-9977-fe392ae3434f. Step input: None
[1;3;38;5;200mThought: I can answer without using any more tools. I'll use the user's language to answer
Answer: Lyft's revenue growth in 2021 was 36%.
[0mLyft's revenue growth in 2021 was 36%.


In [14]:
# Define a chat handler for querying the index
@kitchen.chat.handler("regular-agent")
async def query_financial_documents(chat: ChatInput) -> ChatResponse:
    """Query the financial documents using the Llama Index."""

    # Extract the user's latest message
    question = chat.messages[-1].content

    # Query the index (assuming index is already built and accessible)
    response = await agent_whisk.achat(question)

    # Return response in chat format
    return ChatResponse(content=str(response))

In [None]:
def is_not_hotdog():
    """Randomly determines if something is NOT a hotdog based on odd/even logic."""
    num = random.randint(1, 100)  # Select a random number
    return num % 2 == 0  # Even numbers mean it's NOT a hotdog

In [17]:
import random
import time

# Define a chat handler for querying the index
@kitchen.chat.handler("not-a-hot-dog")
async def query_financial_documents(chat: ChatInput) -> ChatResponse:
    """Pretend to use an advanced AI model to determine if something is NOT a hotdog."""

    # Fake AI loading effect
    loading_messages = [
        "Initializing deep neural quantum probability matrix...",
        "Running GPT-∞ transformer over 10 billion hotdog embeddings...",
        "Applying Bayesian sausage theorem...",
        "Consulting the Ancient Scrolls of Hotdog Recognition...",
        "Summoning a convolutional bun classifier..."
    ]

    for msg in loading_messages:
        print(msg)
        time.sleep(random.uniform(0.5, 1.5))  # Simulate AI thinking

    # Make a completely dumb decision
    num = random.randint(1, 100)
    is_not_hotdog = num % 2 == 0

    # Generate a fake AI response
    if is_not_hotdog:
        response = (
            f"🤖 Beep boop. After extensive analysis using a hyperdimensional sausage detection model, "
            f"my calculations indicate: **Not a hotdog!** (Confidence: {random.uniform(90, 99.9):.2f}%)"
        )
    else:
        response = (
            f"🌭 ALERT! My deep learning system has classified this as a **Hotdog!** "
            f"(Confidence: {random.uniform(50, 89.9):.2f}%)"
        )

    # Return response in chat format
    return ChatResponse(content=response)


In [None]:
# Launch Whisk server
from whisk.config import WhiskConfig, ServerConfig
from whisk.router import WhiskRouter

config = WhiskConfig(server=ServerConfig(type="fastapi"))
router = WhiskRouter(kitchen_app=kitchen, config=config)

# Run the Whisk server in the notebook
router.run_in_notebook(host="0.0.0.0", port=8000)

Shutting down existing Whisk server...
Whisk server stopped.
Whisk server started on http://0.0.0.0:8000 (in background)


INFO:     Started server process [3805965]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)


INFO:     127.0.0.1:42026 - "OPTIONS /v1/models HTTP/1.1" 200 OK
INFO:     127.0.0.1:42038 - "GET /v1/models HTTP/1.1" 200 OK
INFO:     127.0.0.1:34026 - "OPTIONS /v1/chat/completions HTTP/1.1" 200 OK
INFO:     127.0.0.1:34028 - "POST /v1/chat/completions HTTP/1.1" 422 Unprocessable Entity


In [18]:
router.stop_in_notebook()

INFO:     Shutting down
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.
INFO:     Finished server process [3533840]


Shutting down existing Whisk server...
Whisk server stopped.


## Run Some Example Queries

We run some example queries using the agent, showcasing some of the agent's abilities to do chain-of-thought-reasoning and tool use to synthesize the right answer.

We also show queries.

In [None]:
response = agent.chat(
    "Compare and contrast the revenue growth of Uber and Lyft in 2021, then"
    " give an analysis"
)
print(str(response))

**Async execution**: Here we try another query with async execution

In [None]:
# Try another query with async execution

import nest_asyncio

nest_asyncio.apply()

response = await agent.achat(
    "Compare and contrast the risks of Uber and Lyft in 2021, then give an"
    " analysis"
)
print(str(response))

### Compare gpt-3.5-turbo vs. gpt-3.5-turbo-instruct 

We compare the performance of the two agents in being able to answer some complex queries.

#### Taking a look at a turbo-instruct agent

In [23]:
llm_instruct = OpenAI(model="gpt-3.5-turbo-instruct")
agent_instruct = ReActAgent.from_tools(
    query_engine_tools, llm=llm_instruct, verbose=True
)

agent_instruct_whisk = ReActAgent.from_tools(
    query_engine_tools, llm=llm_instruct, verbose=True
)

In [None]:
response = agent_instruct.chat("What was Lyft's revenue growth in 2021?")
print(str(response))

#### Try more complex queries

We compare gpt-3.5-turbo with gpt-3.5-turbo-instruct agents on more complex queries.

In [None]:
response = agent.chat(
    "Compare and contrast the revenue growth of Uber and Lyft in 2021, then"
    " give an analysis"
)
print(str(response))

In [None]:
response = agent_instruct.chat(
    "Compare and contrast the revenue growth of Uber and Lyft in 2021, then"
    " give an analysis"
)
print(str(response))

In [None]:
response = agent.chat(
    "Can you tell me about the risk factors of the company with the higher"
    " revenue?"
)
print(str(response))

In [None]:
response = agent_instruct.query(
    "Can you tell me about the risk factors of the company with the higher"
    " revenue?"
)
print(str(response))

**Observation**: The turbo-instruct agent seems to do worse on agent reasoning compared to the regular turbo model. Of course, this is subject to further observation!

In [24]:
# Define a chat handler for querying the index
@kitchen.chat.handler("instruct-agent")
async def query_financial_documents(chat: ChatInput) -> ChatResponse:
    """Query the financial documents using the Llama Index."""

    # Extract the user's latest message
    question = chat.messages[-1].content

    # Query the index (assuming index is already built and accessible)
    response = await agent_instruct_whisk.achat(question)

    # Return response in chat format
    return ChatResponse(content=str(response))


In [None]:
# Launch Whisk server
from whisk.config import WhiskConfig, ServerConfig
from whisk.router import WhiskRouter

config = WhiskConfig(server=ServerConfig(type="fastapi"))
router = WhiskRouter(kitchen_app=kitchen, config=config)

# Run the Whisk server in the notebook
router.run_in_notebook(host="0.0.0.0", port=8000)


In [None]:
router.stop_in_notebook()