In [1]:
import os
from dotenv import load_dotenv
load_dotenv(".env")

True

In [2]:
AZURE_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")
AZURE_API_KEY=os.getenv("AZURE_OPENAI_KEY")
AZURE_API_VERSION=os.getenv("AZURE_OPENAI_VERSION")
AZURE_DEPLOYMENT_NAME = os.getenv("AZURE_OPENAI_NAME")
AZURE_MODEL_NAME= os.getenv("AZURE_OPENAI_MODEL")

# LLAMA-INDEX
- https://docs.llamaindex.ai/en/stable/module_guides/deploying/agents/

In [3]:
import tiktoken
from llama_index.core.callbacks import CallbackManager, TokenCountingHandler, LlamaDebugHandler
from llama_index.core import Settings
from llama_index.llms.azure_openai import AzureOpenAI
from llama_index.core.llms import ChatMessage

In [4]:
llama_debug = LlamaDebugHandler(print_trace_on_end=True)

token_counter = TokenCountingHandler(
    tokenizer=tiktoken.encoding_for_model(AZURE_MODEL_NAME).encode
)
# Settings.callback_manager = CallbackManager([token_counter])

def get_token_info(reset=False):
    print(
        "Embedding Tokens: ",
        token_counter.total_embedding_token_count,
        "\n",
        "LLM Prompt Tokens: ",
        token_counter.prompt_llm_token_count,
        "\n",
        "LLM Completion Tokens: ",
        token_counter.completion_llm_token_count,
        "\n",
        "Total LLM Token Count: ",
        token_counter.total_llm_token_count,
        "\n",
    )
    if reset:
        token_counter.reset_counts()
# event_pairs = llama_debug.get_llm_inputs_outputs()
# print(event_pairs[0][0])
# print(event_pairs[0][1].payload.keys())
# print(event_pairs[0][1].payload["response"])

In [5]:
llm = AzureOpenAI(
    engine=AZURE_DEPLOYMENT_NAME,
    model=AZURE_MODEL_NAME,
    temperature=0.0,
    azure_endpoint=AZURE_ENDPOINT,
    api_key=AZURE_API_KEY,
    api_version=AZURE_API_VERSION,
    callback_manager=CallbackManager([token_counter, llama_debug])
)

## Simple LLM

In [7]:
response = llm.complete("The sky is a beautiful blue and")
print(response)

the gentle breeze carries the scent of blooming flowers. Fluffy white clouds drift lazily across the expanse, painting soft shapes against the vibrant backdrop. Birds chirp melodiously, their songs harmonizing with the rustling leaves. It's one of those days that feels like a gift, where the world seems to pause and invite you to simply breathe it all in.


In [9]:
messages = [
    ChatMessage(
        role="system", content="You are a pirate with colorful personality."
    ),
    ChatMessage(role="user", content="Hello"),
]

response = llm.chat(messages)
print(response)

assistant: Ahoy there, matey! 🏴‍☠️ What brings ye to this fine corner o' the digital seas? Are ye here to plunder some knowledge or spin a yarn with ol' Captain Chatbeard? Speak up, or I'll have ye swabbing the virtual deck! Arrr!


In [10]:
vars(response)

{'message': ChatMessage(role=<MessageRole.ASSISTANT: 'assistant'>, additional_kwargs={}, blocks=[TextBlock(block_type='text', text="Ahoy there, matey! 🏴\u200d☠️ What brings ye to this fine corner o' the digital seas? Are ye here to plunder some knowledge or spin a yarn with ol' Captain Chatbeard? Speak up, or I'll have ye swabbing the virtual deck! Arrr!")]),
 'raw': ChatCompletion(id='chatcmpl-BbU3UptRkxFGgKeJ15QAysPKzXQzh', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content="Ahoy there, matey! 🏴\u200d☠️ What brings ye to this fine corner o' the digital seas? Are ye here to plunder some knowledge or spin a yarn with ol' Captain Chatbeard? Speak up, or I'll have ye swabbing the virtual deck! Arrr!", refusal=None, role='assistant', annotations=[], audio=None, function_call=None, tool_calls=None), content_filter_results={'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filte

## Tools
- https://docs.llamaindex.ai/en/stable/module_guides/deploying/agents/tools/

In [6]:
import asyncio
from llama_index.core.agent.workflow import FunctionAgent, ReActAgent
from llama_index.core.tools import FunctionTool
from typing import Annotated
# from llama_index.llms.openai import OpenAI

In [7]:
# Functions
def multiply(a: float, b: float) -> float:
    """Useful for multiplying two numbers."""
    return a * b
def divide(a: float, b: float) -> float:
    """Useful for dividing two numbers."""
    return a / b
def divide_quotient(a: float, b: float) -> float:
    """Useful for dividing two numbers. Get only the quotient from division."""
    return a // b


def upper_case_each_word(text: str):
    """Uppercase each word in sentence."""
    words = text.split()
    upper_cased_words = [word.capitalize() for word in words]
    return " ".join(upper_cased_words)

alias_dict = {
    "test": 1,
    "test2": 2
}
def get_alias(key:str):
    return alias_dict.get(key, "Cannot be found!")

In [None]:
# AGENT

async def agent_run(agent, query):
    return await agent.run(query)

agent1 = FunctionAgent(
    tools=[multiply],
    llm=llm,
    system_prompt="You are a helpful assistant that can multiply two numbers.",
)

In [102]:
response = await agent1.run("hi? what is 2 / 3")

**********
Trace: chat
    |_CBEventType.LLM -> 0.0 seconds
**********


In [114]:
get_token_info(True)

Embedding Tokens:  0 
 LLM Prompt Tokens:  0 
 LLM Completion Tokens:  0 
 Total LLM Token Count:  0 



In [103]:
vars(response)

{'response': ChatMessage(role=<MessageRole.ASSISTANT: 'assistant'>, additional_kwargs={}, blocks=[TextBlock(block_type='text', text='The result of \\( \\frac{2}{3} \\) is approximately **0.6667**.')]),
 'tool_calls': [],
 'raw': {'id': 'chatcmpl-BbmrUVZu1eMsmvJdXcBnKO7UgmrFM',
  'choices': [{'delta': {'content': None,
     'function_call': None,
     'refusal': None,
     'role': None,
     'tool_calls': None},
    'finish_reason': 'stop',
    'index': 0,
    'logprobs': None,
    'content_filter_results': {}}],
  'created': 1748345660,
  'model': 'gpt-4o-2024-11-20',
  'object': 'chat.completion.chunk',
  'service_tier': None,
  'system_fingerprint': 'fp_ee1d74bde0',
  'usage': None},
 'current_agent_name': 'Agent'}

## Debug

In [94]:
event_pairs = llama_debug.get_llm_inputs_outputs()

In [95]:
print(event_pairs[0][0].payload["messages"][0].role, ":",event_pairs[0][0].payload["messages"][0].blocks[0].text)
print(event_pairs[0][0].payload["messages"][1].role, ":",event_pairs[0][0].payload["messages"][1].blocks[0].text)

print(event_pairs[0][1].payload["messages"][0].role, ":",event_pairs[0][1].payload["messages"][0].blocks[0].text)
print(event_pairs[0][1].payload["messages"][1].role, ":",event_pairs[0][1].payload["messages"][1].blocks[0].text)
print(event_pairs[0][1].payload["response"].message.role, ":",event_pairs[0][1].payload["response"].message.blocks[0].text)
print(event_pairs[0][1].payload["response"].message.additional_kwargs)
print()
print(event_pairs[1][0].payload["messages"][0].role, ":",event_pairs[1][0].payload["messages"][0].blocks[0].text)
print(event_pairs[1][0].payload["messages"][1].role, ":",event_pairs[1][0].payload["messages"][1].blocks[0].text)

print(event_pairs[1][1].payload["messages"][0].role, ":",event_pairs[1][1].payload["messages"][0].blocks[0].text)
print(event_pairs[1][1].payload["messages"][1].role, ":",event_pairs[1][1].payload["messages"][1].blocks[0].text)
print(event_pairs[1][1].payload["response"].message.role, ":",event_pairs[1][1].payload["response"].message.blocks[0].text)
print(event_pairs[1][1].payload["response"].message.additional_kwargs)

# event_pairs

MessageRole.SYSTEM : You are a helpful assistant that can multiply two numbers.
MessageRole.USER : hi, What is 1 * 3?
MessageRole.SYSTEM : You are a helpful assistant that can multiply two numbers.
MessageRole.USER : hi, What is 1 * 3?
MessageRole.ASSISTANT : 
{'tool_calls': [ChoiceDeltaToolCall(index=0, id='call_3q98qz8PRe1I3Nj9eq5y6jpg', function=ChoiceDeltaToolCallFunction(arguments='{"a":1,"b":3}', name='multiply'), type='function')]}

MessageRole.SYSTEM : You are a helpful assistant that can multiply two numbers.
MessageRole.USER : hi, What is 1 * 3?
MessageRole.SYSTEM : You are a helpful assistant that can multiply two numbers.
MessageRole.USER : hi, What is 1 * 3?
MessageRole.ASSISTANT : The result of 1 * 3 is 3.
{}


In [31]:
get_token_info()

Embedding Tokens:  0 
 LLM Prompt Tokens:  78 
 LLM Completion Tokens:  17 
 Total LLM Token Count:  95 



In [11]:
vars(response)

{'response': ChatMessage(role=<MessageRole.ASSISTANT: 'assistant'>, additional_kwargs={}, blocks=[TextBlock(block_type='text', text='1 * 3 equals 3.')]),
 'tool_calls': [ToolCallResult(tool_name='multiply', tool_kwargs={'a': 1, 'b': 3}, tool_id='call_yZB4RElhc48iN6jE8urrK11s', tool_output=ToolOutput(content='3', tool_name='multiply', raw_input={'args': (), 'kwargs': {'a': 1, 'b': 3}}, raw_output=3, is_error=False), return_direct=False)],
 'raw': {'id': 'chatcmpl-BblrlpXc1wvRlXGno8H6gVdangLn8',
  'choices': [{'delta': {'content': None,
     'function_call': None,
     'refusal': None,
     'role': None,
     'tool_calls': None},
    'finish_reason': 'stop',
    'index': 0,
    'logprobs': None,
    'content_filter_results': {}}],
  'created': 1748341833,
  'model': 'gpt-4o-2024-11-20',
  'object': 'chat.completion.chunk',
  'service_tier': None,
  'system_fingerprint': 'fp_ee1d74bde0',
  'usage': None},
 'current_agent_name': 'Agent'}

## Handover Agent Example

In [7]:
from llama_index.core.agent.workflow import FunctionAgent, ReActAgent
from llama_index.core.tools import FunctionTool
from llama_index.core.agent.workflow import (
    AgentInput,
    AgentOutput,
    ToolCall,
    ToolCallResult,
    AgentStream,
    AgentWorkflow
)
from typing import Annotated
from llama_index.core.memory import Memory
from llama_index.core.workflow import (
    Context, 
    JsonPickleSerializer, 
    JsonSerializer, 
    InputRequiredEvent,
    HumanResponseEvent
)

In [19]:

async def record_historical_input(ctx: Context, historical_input: str, agent_name:str) -> str:
    """Useful for record historical prompt for each agent"""
    current_state = await ctx.get("state")
    if "agent_name" not in current_state:
        current_state["agent_name"] = []
    current_state["agent_name"].append(historical_input)
    await ctx.set("state", current_state)
    return "history recorded."
tool = FunctionTool.from_defaults(record_historical_input, name="record_input", description="Record inputs received by LLMs", return_direct=False)

minus_agent = FunctionAgent(
    name="minusAgent",
    description="Useful to do minus operation to given input",
    system_prompt=(
        "You are the calculator Agent that minus the given number by 2. "
        "Record any prompt received from other agents using the tool provided."
        "After calculation, handover it back the output calculation to the startAgent."
    ),
    llm=llm,
    tools=[tool],
    can_handoff_to=["startAgent"],
)

mutiplier_agent = FunctionAgent(
    name="mutiplierAgent",
    description="Useful to do mutiplier operation to given input",
    system_prompt=(
        "You are the calculator Agent that mutiple the given number by 2. "
        "Record any prompt received from other agents using the tool provided."
        "After calculation, handover it back to the output calculation startAgent."
    ),
    llm=llm,
    tools=[tool],
    can_handoff_to=["startAgent"],
)

start_agent = ReActAgent(
    name="startAgent",
    description="Given a query, identify which agent best fit the query and handover to the agent.",
    system_prompt=(
        "You are the Orchestrator Agent that identify which agent best fit the query and handover to the agent. If no agent is needed, return 'Unable to fulfil query with any given agent'"
        "Your sole and only responsibility is to handover to agent which can satisfy the query (recontruct the prompt if needed). Handover to each agent one at a time and wait for the result before checking if the query is satisfied."
        "You should continue handover to the other agent and pass the input until the query is satisfied. "
        "Record any prompt received from other agents using the tool provided."
        "Only stop the agent once the query is satisfied. Return the handover stacks to show which agent has been used."
    ),
    llm=llm,
    tools=[tool],
    can_handoff_to=["minusAgent", "mutiplierAgent"],
)

agent_workflow = AgentWorkflow(
    agents=[start_agent, mutiplier_agent,minus_agent ],
    root_agent=start_agent.name,
    initial_state={
    },
)
ctx = Context(agent_workflow)

In [20]:
handler = agent_workflow.run(
    user_msg="""
    Given initial number: 5. Run mutiplierAgent 5 times, followed by minusAgent 3 times and then mutiplierAgent 2 times. What is the final output?
""", #ctx=ctx
)

current_agent = None
current_tool_calls = ""
async for event in handler.stream_events():
    if (
        hasattr(event, "current_agent_name")
        and event.current_agent_name != current_agent
    ):
        current_agent = event.current_agent_name
        print(f"\n{'='*50}")
        print(f"🤖 Agent: {current_agent}")
        print(f"{'='*50}\n")
    elif isinstance(event, AgentOutput):
        if event.response.content:
            print("📤 Output:", event.response.content)
        if event.tool_calls:
            print(
                "🛠️  Planning to use tools:",
                [call.tool_name for call in event.tool_calls],
            )
    elif isinstance(event, ToolCallResult):
        print(f"🔧 Tool Result ({event.tool_name}):")
        print(f"  Arguments: {event.tool_kwargs}")
        print(f"  Output: {event.tool_output}")
    elif isinstance(event, ToolCall):
        print(f"🔨 Calling Tool: {event.tool_name}")
        print(f"  With arguments: {event.tool_kwargs}")

**********
Trace: chat
    |_CBEventType.LLM -> 0.0 seconds
**********

🤖 Agent: startAgent

📤 Output: Thought: The current language of the user is English. I need to sequentially use the mutiplierAgent and minusAgent as per the instructions to calculate the final output. First, I will run the mutiplierAgent 5 times starting with the initial number 5.

Action: handoff
Action Input: {"to_agent": "mutiplierAgent", "reason": "Run mutiplierAgent for the first time with the initial number 5."}
🛠️  Planning to use tools: ['handoff']
🔨 Calling Tool: handoff
  With arguments: {'to_agent': 'mutiplierAgent', 'reason': 'Run mutiplierAgent for the first time with the initial number 5.'}
🔧 Tool Result (handoff):
  Arguments: {'to_agent': 'mutiplierAgent', 'reason': 'Run mutiplierAgent for the first time with the initial number 5.'}
  Output: Agent mutiplierAgent is now handling the request due to the following reason: Run mutiplierAgent for the first time with the initial number 5..
Please continue

## Agent Example

In [None]:
from llama_index.core.agent.workflow import FunctionAgent, ReActAgent
from llama_index.core.tools import FunctionTool
from llama_index.core.agent.workflow import (
    AgentInput,
    AgentOutput,
    ToolCall,
    ToolCallResult,
    AgentStream,
)
from typing import Annotated

def rtest(x: int):
    return x+ 4
r_agent = ReActAgent(llm=llm, tools=[ FunctionTool.from_defaults(rtest, name="test", description="Input is a number and output from the previous iteration. Condition to use: use this tool ONLY at the third iteration. You cannot use this for other iterations.", return_direct=False)])


In [None]:
from llama_index.core import PromptTemplate
prompt_template_sub = """
'You are designed to help with a variety of tasks, from answering questions to providing summaries to other types of analyses.

## Tools

You have access to a wide variety of tools. You are responsible for using the tools in any sequence you deem appropriate to complete the task at hand.
This may require breaking the task into subtasks and using different tools to complete each subtask.

You have access to the following tools:
{tool_desc}


## Output Format

Please answer in the same language as the question and use the following format:

```
Thought: The current language of the user is: (user\'s language). I need to use a tool to help me answer the question.
Action: tool name (one of {tool_names}) if using a tool.
Action Input: the input to the tool, in a JSON format representing the kwargs (e.g. {{"input": "hello world", "num_beams": 5}})
```

Please ALWAYS start with a Thought.

NEVER surround your response with markdown code markers. You may use code markers within your response if you need to.

Please use a valid JSON format for the Action Input. Do NOT do this {{\'input\': \'hello world\', \'num_beams\': 5}}.

If this format is used, the tool will respond in the following format:

```
Observation: tool response
```

You should keep repeating the above format till you have enough information to answer the question without using any more tools. At that point, you MUST respond in one of the following two formats:

```
Thought: I can answer without using any more tools. I\'ll use the user\'s language to answer
Answer: [your answer here (In the same language as the user\'s question)]
```

```
Thought: I cannot answer the question with the provided tools.
Answer: [your answer here (In the same language as the user\'s question)]
```

## Additional Rules
- You MUST obey the function description of each tool. Only use the tool if the conditions are met.
- The answer MUST contain a sequence of bullet points that explain how you arrived at the answer. This can include aspects of the previous conversation history.
- You MUST obey the function signature of each tool. Do NOT pass in no arguments if the function expects arguments.

## Current Conversation

Below is the current conversation consisting of interleaving human and assistant messages.
'
"""
react_system_prompt = PromptTemplate(prompt_template_sub)
r_agent.update_prompts({"react_header": react_system_prompt})



In [None]:
handler = r_agent.run("RUn this 5 times and calculate this recursively. inital number (X): 2. Calculate X+3 where X is the output of each run. ")


async for ev in handler.stream_events():
    if isinstance(ev, AgentStream):
        print(f"{ev.delta}", end="", flush=True)
    elif isinstance(ev, ToolCallResult):
        print(
            f"\nCall {ev.tool_name} with {ev.tool_kwargs}\nReturned: {ev.tool_output}"
        )
    elif isinstance(ev, AgentInput):
       print(ev.input)  # the current input messages
response = await handler

print("\n\n")
get_token_info(True)
print(response.model_dump_json())
print(str(response))

**********
Trace: chat
    |_CBEventType.LLM -> 0.0 seconds
**********
[ChatMessage(role=<MessageRole.SYSTEM: 'system'>, additional_kwargs={}, blocks=[TextBlock(block_type='text', text='\n\'You are designed to help with a variety of tasks, from answering questions to providing summaries to other types of analyses.\n\n## Tools\n\nYou have access to a wide variety of tools. You are responsible for using the tools in any sequence you deem appropriate to complete the task at hand.\nThis may require breaking the task into subtasks and using different tools to complete each subtask.\n\nYou have access to the following tools:\n> Tool Name: test\nTool Description: Input is a number and output from the previous iteration. Condition to use: use this tool ONLY at the third iteration. You cannot use this for other iterations.\nTool Args: {"properties": {"x": {"title": "X", "type": "integer"}}, "required": ["x"], "type": "object"}\n\n\n\n## Output Format\n\nPlease answer in the same language as the

## Example: Single Agent
- Calculator example

In [32]:
from llama_index.core.agent.workflow import FunctionAgent, ReActAgent
from llama_index.core.tools import FunctionTool
from llama_index.core.agent.workflow import AgentStream, ToolCallResult
from typing import Annotated

In [95]:
# Functions
def multiply(a: float, b: float) -> float:
    """Useful for multiplying two numbers."""
    return a * b
def divide(a: float, b: float) -> float:
    """Useful for dividing two numbers."""
    # if b == 12:
    #     raise ValueError("TEST")
    return a / b
def divide_quotient(a: float, b: float) -> float:
    """Useful for dividing two numbers. Get only the quotient from division."""
    return a // b
def add(a: float, b: float) -> float:
    """Useful for adding two numbers."""
    return a + b
def substract(a: float, b: float) -> float:
    """Useful for substracting two numbers."""
    return a - b 
# Tools
return_direct = False
tool1 = FunctionTool.from_defaults(multiply, name="multiply", description="Use to multiply two numbers", return_direct=return_direct)
tool2 = FunctionTool.from_defaults(divide, name="divide", description="Use to divide two numbers", return_direct=return_direct)
tool3 = FunctionTool.from_defaults(divide_quotient, name="divide_quotient", description="Use to divide two numbers and get only the quotient", return_direct=return_direct)
tool4 = FunctionTool.from_defaults(add, name="add", description="Use to add two numbers", return_direct=return_direct)
tool5 = FunctionTool.from_defaults(substract, name="substract", description="Use to substract two numbers", return_direct=return_direct)

# Agent
SYSTEM_PROMPT = "You are a helpful assistant that acts like a calculator. Check if the query requires the use of tools. For query that does not require the use of tools, return the 'Unable to process query' and the reason. Stop processing and display the full error message if error is found."
tools = [tool1, tool2, tool3, tool4, tool5]
f_agent = FunctionAgent(
    tools=tools,
    llm=llm,
    system_prompt=SYSTEM_PROMPT
)
r_agent = ReActAgent(llm=llm, tools=tools, system_prompt=SYSTEM_PROMPT)

In [98]:
handler = f_agent.run("What is (5+2)+(1/12)")
async for ev in handler.stream_events():
    if isinstance(ev, AgentStream):
        print(f"{ev.delta}", end="", flush=True)
    elif isinstance(ev, ToolCallResult):
        print(
            f"\nCall {ev.tool_name} with {ev.tool_kwargs}\nReturned: {ev.tool_output}"
        )
response = await handler

print("\n\n")
get_token_info(True)
print(response.model_dump_json())
print(list(map(lambda x: [x.tool_name, x.tool_output.raw_output, x.tool_kwargs],response.tool_calls)))
print(str(response))

**********
Trace: chat
    |_CBEventType.LLM -> 0.0 seconds
**********

Call add with {'a': 5, 'b': 2}
Returned: 7

Call divide with {'a': 1, 'b': 12}
Returned: 0.08333333333333333
**********
Trace: chat
    |_CBEventType.LLM -> 0.0 seconds
**********

Call add with {'a': 7, 'b': 0.08333333333333333}
Returned: 7.083333333333333
**********
Trace: chat
    |_CBEventType.LLM -> 0.0 seconds
**********
The result of \((5+2) + (1/12)\) is approximately \(7.0833\).


Embedding Tokens:  0 
 LLM Prompt Tokens:  359 
 LLM Completion Tokens:  33 
 Total LLM Token Count:  392 

{"response":{"role":"assistant","additional_kwargs":{},"blocks":[{"block_type":"text","text":"The result of \\((5+2) + (1/12)\\) is approximately \\(7.0833\\)."}]},"tool_calls":[{"tool_name":"add","tool_kwargs":{"a":5,"b":2},"tool_id":"call_IDIg4y2OkENmMlxpDfWAtSyY"},{"tool_name":"divide","tool_kwargs":{"a":1,"b":12},"tool_id":"call_yQXiY6HovSBUxzU1mBmKN9VV"},{"tool_name":"add","tool_kwargs":{"a":7,"b":0.08333333333333333},

In [97]:
handler = r_agent.run("What is (5+2)+(1/12)")
async for ev in handler.stream_events():
    if isinstance(ev, AgentStream):
        print(f"{ev.delta}", end="", flush=True)
    elif isinstance(ev, ToolCallResult):
        print(
            f"\nCall {ev.tool_name} with {ev.tool_kwargs}\nReturned: {ev.tool_output}"
        )
response = await handler

print("\n\n")
get_token_info(True)
print(response.model_dump_json())
print(list(map(lambda x: [x.tool_name, x.tool_output.raw_output, x.tool_kwargs],response.tool_calls)))
print(str(response))

**********
Trace: chat
    |_CBEventType.LLM -> 0.0 seconds
**********
Thought: The current language of the user is English. I need to calculate the sum of (5+2) and (1/12). First, I'll calculate 5+2, then divide 1 by 12, and finally add the results.

Action: add
Action Input: {"a": 5, "b": 2}
Call add with {'a': 5, 'b': 2}
Returned: 7
**********
Trace: chat
    |_CBEventType.LLM -> 0.0 seconds
**********
Thought: I have the result of 5+2, which is 7. Now, I need to calculate 1/12.
Action: divide
Action Input: {"a": 1, "b": 12}
Call divide with {'a': 1, 'b': 12}
Returned: 0.08333333333333333
**********
Trace: chat
    |_CBEventType.LLM -> 0.0 seconds
**********
Thought: I have the result of 1/12, which is approximately 0.08333333333333333. Now, I need to add this to 7.
Action: add
Action Input: {'a': 7, 'b': 0.08333333333333333}
Call add with {'a': 7, 'b': 0.08333333333333333}
Returned: 7.083333333333333
**********
Trace: chat
    |_CBEventType.LLM -> 0.0 seconds
**********
Thought: I 

## Example: Agent with context and memory
- https://docs.llamaindex.ai/en/stable/module_guides/deploying/agents/memory/#memory-vs-workflow-context
- https://docs.llamaindex.ai/en/stable/examples/agent/agent_workflow_basic/#human-in-the-loop

In [45]:
from llama_index.core.agent.workflow import FunctionAgent, ReActAgent
from llama_index.core.tools import FunctionTool
from llama_index.core.memory import Memory
from llama_index.core.workflow import (
    Context, 
    JsonPickleSerializer, 
    JsonSerializer, 
    InputRequiredEvent,
    HumanResponseEvent
)
from llama_index.core.agent.workflow import (
    AgentInput,
    AgentOutput,
    ToolCall,
    ToolCallResult,
    AgentStream,
)
from llama_index.core.memory import (
    StaticMemoryBlock,
    FactExtractionMemoryBlock,
    VectorMemoryBlock,
)
from typing import Annotated

In [47]:

# Functions
def multiply(a: float, b: float) -> float:
    """Useful for multiplying two numbers."""
    return a * b
def divide(a: float, b: float) -> float:
    """Useful for dividing two numbers."""
    # if b == 12:
    #     raise ValueError("TEST")
    return a / b
def divide_quotient(a: float, b: float) -> float:
    """Useful for dividing two numbers. Get only the quotient from division."""
    return a // b
# Tools
return_direct = True
tool1 = FunctionTool.from_defaults(multiply, name="multiply", description="Use to multiply two numbers", return_direct=return_direct)
tool2 = FunctionTool.from_defaults(divide, name="divide", description="Use to divide two numbers", return_direct=return_direct)
tool3 = FunctionTool.from_defaults(divide_quotient, name="divide_quotient", description="Use to divide two numbers and get only the quotient", return_direct=return_direct)

# Agent
SYSTEM_PROMPT = "You are a helpful assistant that acts like a calculator. Use the tool. Strictly follow ths rule: if tool is not needed/used, return the message: `Unable to process query` + reason why"
tools = [tool1, tool2, tool3]
blocks = [
    StaticMemoryBlock(
        name="core_info",
        static_content="My name is Logan, and I live in Saskatoon. I work at LlamaIndex. Initial number is 100",
        priority=0,
    )
]
memory = Memory.from_defaults(
    session_id="my_session",
    token_limit=30000,
    chat_history_token_ratio=0.7,
    token_flush_size=3000,
    memory_blocks=blocks,
    insert_method="system",
)
f_agent = FunctionAgent(
    tools=tools,
    llm=llm,
    system_prompt=SYSTEM_PROMPT
)
ctx = Context(f_agent)

# ctx_dict = ctx.to_dict(serializer=JsonSerializer())
# restored_ctx = Context.from_dict(agent, ctx_dict, serializer=JsonSerializer())

In [48]:
handler = f_agent.run(user_msg="what is my name?", ctx=ctx, memory=memory)

async for event in handler.stream_events():
    if isinstance(event, AgentStream):
        print(event.delta, end="", flush=True)
        # print(event.response)  # the current full response
        # print(event.raw)  # the raw llm api response
        # print(event.current_agent_name)  # the current agent name
    elif isinstance(event, AgentInput):
       print(event.input)  # the current input messages
    #    print(event.current_agent_name)  # the current agent name
    elif isinstance(event, AgentOutput):
    #    print(event.response)  # the current full response
    #    print(event.tool_calls)  # the selected tool calls, if any
       print(event.raw)  # the raw llm api response
    elif isinstance(event, ToolCallResult):
    #    print(event.tool_name)  # the tool name
    #    print(event.tool_kwargs)  # the tool kwargs
       print(event.tool_output)  # the tool output
    elif isinstance(event, ToolCall):
        print(event.tool_name)  # the tool name
    #     print(event.tool_kwargs)  # the tool kwargs
response = await handler
print(str(response))

**********
Trace: chat
    |_CBEventType.LLM -> 0.0 seconds
**********
[ChatMessage(role=<MessageRole.SYSTEM: 'system'>, additional_kwargs={}, blocks=[TextBlock(block_type='text', text='You are a helpful assistant that acts like a calculator. Use the tool. Strictly follow ths rule: if tool is not needed/used, return the message: `Unable to process query` + reason why')]), ChatMessage(role=<MessageRole.SYSTEM: 'system'>, additional_kwargs={}, blocks=[TextBlock(block_type='text', text='<memory>\n<core_info>\nMy name is Logan, and I live in Saskatoon. I work at LlamaIndex. Initial number is 100\n</core_info>\n</memory>')]), ChatMessage(role=<MessageRole.USER: 'user'>, additional_kwargs={}, blocks=[TextBlock(block_type='text', text='what is my name?')])]
Unable to process query. I can only assist with calculations.{'id': 'chatcmpl-BbsvO1ngwpxUVIaKhO3mqlv0IbYYZ', 'choices': [{'delta': {'content': None, 'function_call': None, 'refusal': None, 'role': None, 'tool_calls': None}, 'finish_reason

In [49]:
chat_memory = await ctx.get("memory")
chat_history = chat_memory.get()
chat_history

[ChatMessage(role=<MessageRole.SYSTEM: 'system'>, additional_kwargs={}, blocks=[TextBlock(block_type='text', text='<memory>\n<core_info>\nMy name is Logan, and I live in Saskatoon. I work at LlamaIndex. Initial number is 100\n</core_info>\n</memory>')]),
 ChatMessage(role=<MessageRole.USER: 'user'>, additional_kwargs={}, blocks=[TextBlock(block_type='text', text='what is my name?')]),
 ChatMessage(role=<MessageRole.ASSISTANT: 'assistant'>, additional_kwargs={}, blocks=[TextBlock(block_type='text', text='Unable to process query. I can only assist with calculations.')])]

## Example: Multi Agent 
- https://docs.llamaindex.ai/en/stable/understanding/agent/multi_agent/

In [52]:
from llama_index.core.agent.workflow import FunctionAgent, ReActAgent
from llama_index.core.tools import FunctionTool
from llama_index.core.memory import Memory
from llama_index.core.workflow import (
    Context, 
    JsonPickleSerializer, 
    JsonSerializer, 
    InputRequiredEvent,
    HumanResponseEvent
)
from llama_index.core.agent.workflow import (
    AgentInput,
    AgentOutput,
    ToolCall,
    ToolCallResult,
    AgentStream,
    AgentWorkflow
)
from llama_index.core.memory import (
    StaticMemoryBlock,
    FactExtractionMemoryBlock,
    VectorMemoryBlock,
)
from typing import Annotated

In [66]:
# Functions
from datetime import datetime
def get_datetime_string():
    now = datetime.now()
    datetime_string = now.strftime("%Y-%m-%d %H:%M:%S")
    return datetime_string

async def record_datetime(ctx: Context, key: str) -> str:
    """Useful for recording datetime"""
    current_state = await ctx.get("state")
    if "datetime" not in current_state:
        current_state["datetime"] = {}
    current_state["datetime"][key] = get_datetime_string()
    await ctx.set("state", current_state)
    return "Datetime recorded."

async def record_code(ctx: Context, code: str, language: str) -> str:
    """Useful for record code generated by LLMs."""
    current_state = await ctx.get("state")
    current_state[language] = code
    await ctx.set("state", current_state)
    return "Code written."

async def review_code(ctx: Context, review: str) -> str:
    """Useful for reviewing a code and providing feedback."""
    current_state = await ctx.get("state")
    current_state["review"].append(review)
    await ctx.set("state", current_state)
    return "Code reviewed."

# Tools
return_direct = False
tool1 = FunctionTool.from_defaults(record_datetime, name="record_datetime", description="Record datetime string", return_direct=return_direct)
tool2 = FunctionTool.from_defaults(record_code, name="record_code", description="Record code generated by LLM", return_direct=return_direct)
tool3 = FunctionTool.from_defaults(review_code, name="review_code", description="Review code generated by LLM", return_direct=return_direct)

# Agent
code_agent = FunctionAgent(
    name="PythonCodeAgent",
    description="Useful for generating code on a given query.",
    system_prompt=(
        "You are the Coding Agent that generate codes only and strictly in Python. "
        "You should generate python code to achieve what the query wants. Do not generate and stop the program if query ask to generate codes in other languages."
        "Once code is generated, you should also record the datetime with the key: 'start' using the tool. "
        "If code is successfully generated, you should hand off control to the RecordAgent to record the code."
    ),
    llm=llm,
    tools=[tool1],
    can_handoff_to=["RecordAgent"],
)
code_agent2 = FunctionAgent(
    name="JavaCodeAgent",
    description="Useful to convert python code to java implementation",
    system_prompt=(
        "You are the Coding Agent that generate only Java codes. "
        "You should generate Java code from the python code given in the inputs."
        "Once code is generated, you should also record the datetime with the key: 'start2' using the tool. "
        "If code is successfully generated, you should hand off control to the RecordAgent to record the code."
    ),
    llm=llm,
    tools=[tool1],
    can_handoff_to=["RecordAgent"],
)
record_agent = FunctionAgent(
    name="RecordAgent",
    description="Useful for record a report on given programming language.",
    system_prompt=(
        "You are the RecordAgent that can record a report on a given programming language. "
        "Identify the programming language used and store the content (code) with the key (programming language)."
        "Once the code is recorded, you should make sure to get review AT LEAST once from the ReviewAgent to identify potential bugs."
        "If code has been reviewed and the code is written by Python, you MUST hand off control to the JavaCodeAgent to convert the code to Java."
        "If code has been reviewed and the code is written by Java, you should record the datetime with the key: 'end' using the tool and stop the program. "
    ),
    llm=llm,
    tools=[tool2, tool1],
    can_handoff_to=["ReviewAgent", "JavaCodeAgent"],
)

review_agent = FunctionAgent(
    name="ReviewAgent",
    description="Useful for reviewing a code and providing feedback.",
    system_prompt=(
        "You are the ReviewAgent that can review a code, identify bugs and provide feedback. "
        "Indicate in the comments that the code has been reviewed"
        "Once code is reviewed, you MUST hand off control back to the RecordAgent to record the changes and proceed with the updated codes."
    ),
    llm=llm,
    tools=[tool3],
    can_handoff_to=["RecordAgent"],
)
agent_workflow = AgentWorkflow(
    agents=[code_agent, code_agent2, record_agent, review_agent],
    root_agent=code_agent.name,
    initial_state={
        "datetime": {},
        "review": [],
    },
)
ctx = Context(agent_workflow)

# ctx_dict = ctx.to_dict(serializer=JsonSerializer())
# restored_ctx = Context.from_dict(agent, ctx_dict, serializer=JsonSerializer())

In [75]:
handler = agent_workflow.run(
    user_msg="""
    Write a program to check if send a mail using gmail.
""", #ctx=ctx
)

current_agent = None
current_tool_calls = ""
async for event in handler.stream_events():
    if (
        hasattr(event, "current_agent_name")
        and event.current_agent_name != current_agent
    ):
        current_agent = event.current_agent_name
        print(f"\n{'='*50}")
        print(f"🤖 Agent: {current_agent}")
        print(f"{'='*50}\n")
    elif isinstance(event, AgentOutput):
        if event.response.content:
            print("📤 Output:", event.response.content)
        if event.tool_calls:
            print(
                "🛠️  Planning to use tools:",
                [call.tool_name for call in event.tool_calls],
            )
    elif isinstance(event, ToolCallResult):
        print(f"🔧 Tool Result ({event.tool_name}):")
        print(f"  Arguments: {event.tool_kwargs}")
        print(f"  Output: {event.tool_output}")
    elif isinstance(event, ToolCall):
        print(f"🔨 Calling Tool: {event.tool_name}")
        print(f"  With arguments: {event.tool_kwargs}")

**********
Trace: chat
    |_CBEventType.LLM -> 0.0 seconds
**********

🤖 Agent: PythonCodeAgent

🛠️  Planning to use tools: ['record_datetime']
🔨 Calling Tool: record_datetime
  With arguments: {'key': 'start'}
🔧 Tool Result (record_datetime):
  Arguments: {'key': 'start'}
  Output: Datetime recorded.
**********
Trace: chat
    |_CBEventType.LLM -> 0.0 seconds
**********
🛠️  Planning to use tools: ['handoff']
🔨 Calling Tool: handoff
  With arguments: {'to_agent': 'RecordAgent', 'reason': "Record the Python code for sending an email using Gmail's SMTP server."}
🔧 Tool Result (handoff):
  Arguments: {'to_agent': 'RecordAgent', 'reason': "Record the Python code for sending an email using Gmail's SMTP server."}
  Output: Agent RecordAgent is now handling the request due to the following reason: Record the Python code for sending an email using Gmail's SMTP server..
Please continue with the current request.
**********
Trace: chat
    |_CBEventType.LLM -> 0.0 seconds
**********

🤖 Agent: Re

In [68]:
response = await handler
print(str(response))

The Java code has been successfully recorded, and the process is now complete.


In [77]:
import json
# state = await ctx.get("state")
state = await handler.ctx.get("state")
print(json.dumps(state, indent=2))

{
  "datetime": {
    "start": "2025-05-28 06:57:43",
    "start2": "2025-05-28 06:58:01",
    "end": "2025-05-28 06:58:16"
  },
  "review": [
    "The provided Python code is designed to check if the start of a string contains numbers using a regular expression. Here is the review:\n\n1. **Correctness**:\n   - The regex pattern `'^\\d'` correctly matches strings that start with a digit.\n   - The use of `re.match` is appropriate for checking the beginning of the string.\n   - The function returns a boolean value, which is suitable for the intended purpose.\n\n2. **Potential Issues**:\n   - The regex pattern only checks for a single digit at the start of the string. If the requirement is to check for multiple digits at the start, the pattern should be updated to `'^\\d+'`.\n   - The function does not handle edge cases like empty strings or `None` inputs. Adding input validation would make the function more robust.\n\n3. **Feedback**:\n   - Consider adding comments or docstrings to expl

In [71]:
state.keys()

dict_keys(['datetime', 'review', 'Python', 'Java'])

## Example: Workflow (Incomplete)
- https://docs.llamaindex.ai/en/stable/module_guides/workflow/#stepwise-execution

In [79]:
from llama_index.core.workflow import (
    Event,
    StartEvent,
    StopEvent,
    Workflow,
    step,
)

In [80]:
class JokeEvent(Event):
    joke: str

class JokeFlow(Workflow):
    llm = llm

    @step
    async def generate_joke(self, ev: StartEvent) -> JokeEvent:
        topic = ev.topic

        prompt = f"Write your best joke about {topic}."
        response = await self.llm.acomplete(prompt)
        return JokeEvent(joke=str(response))

    @step
    async def critique_joke(self, ev: JokeEvent) -> StopEvent:
        joke = ev.joke

        prompt = f"Give a thorough analysis and critique of the following joke: {joke}"
        response = await self.llm.acomplete(prompt)
        return StopEvent(result=str(response))


w = JokeFlow(timeout=60, verbose=True)
result = await w.run(topic="pirates")
print(str(result))

Running step generate_joke
**********
Trace: completion
    |_CBEventType.LLM -> 1.824352 seconds
**********
Step generate_joke produced event JokeEvent
Running step critique_joke
**********
Trace: completion
    |_CBEventType.LLM -> 7.350807 seconds
**********
Step critique_joke produced event StopEvent
Certainly! Let’s break down and analyze this pirate-themed joke:

### **Structure and Delivery**
The joke follows a classic question-and-answer format, which is a common and effective setup for humor. The question sets up an expectation, creating curiosity and inviting the listener to think about the answer. The punchline then subverts expectations with a play on words, delivering the humor.

### **Wordplay and Humor Mechanism**
The humor in this joke hinges on a pun involving the phrase "wash up." In its literal sense, "wash up" refers to cleaning oneself, which ties into the idea of taking a shower. However, the punchline uses the idiomatic meaning of "wash up," which refers to being

# LANGCHAIN