In [1]:
import os
import warnings

warnings.simplefilter(action="ignore")
os.environ["GRPC_VERBOSITY"] = "NONE"

# Multi Agent Building

Dependency injection shown in [02.tool_calling_and_dependency_injection.ipynb](./02.tool_calling_and_dependency_injection.ipynb) allow us to build multi agent with intuitive coding. `Agent` class has the argument `subagents` that accepts `list[Agent]` typed variable, and subagents are dynamically converted to tools which invokes agent using dependency injection feature. This notebook shows examples of multi-agent building. 

# Prerequisites

Please make sure your environmental variables and dependencies are ready to use LLM services. Name of the environmental variables is arbitraray because langrila modules accepts that name as an argument.

In [2]:
from dotenv import load_dotenv

load_dotenv("../../.env_api")

True

# Import modules

In [3]:
import uuid
from pathlib import Path

from langrila import Agent, InMemoryConversationMemory, JSONConversationMemory
from langrila.anthropic import AnthropicClient
from langrila.google import GoogleClient
from langrila.openai import OpenAIClient

# Instantiate client

In [4]:
# For OpenAI
openai_client = OpenAIClient(api_key_env_name="OPENAI_API_KEY")

# For Azure OpenAI
azure_openai_client = OpenAIClient(
    api_key_env_name="AZURE_API_KEY",
    api_type="azure",
    azure_api_version="2024-11-01-preview",
    azure_endpoint_env_name="AZURE_ENDPOINT",
    azure_deployment_id_env_name="AZURE_DEPLOYMENT_ID",
)

# For Gemini on Google AI Studio
google_dev_client = GoogleClient(api_key_env_name="GEMINI_API_KEY")

# For Gemini on Google Cloud VertexAI
vertexai_client = GoogleClient(
    api_type="vertexai",
    project_id_env_name="GOOGLE_CLOUD_PROJECT",
    location="us-central1",
)

# For Claude of Anthropic
anthropic_client = AnthropicClient(api_key_env_name="ANTHROPIC_API_KEY")

# For Claude of Amazon Bedrock
claude_bedrock_client = AnthropicClient(
    api_type="bedrock",
    aws_access_key_env_name="AWS_ACCESS_KEY",
    aws_secret_key_env_name="AWS_SECRET_KEY",
    aws_region_env_name="AWS_REGION",
)

# Define tools

Using dummy tools.

In [5]:
import random
from typing import Literal


def power_disco_ball(power: bool) -> bool:
    """
    Powers the spinning dissco ball.

    Parameters
    ----------
    power : bool
        Whether to power the disco ball or not.

    Returns
    ----------
    bool
        Whether the disco ball is spinning or not.
    """
    return f"Disco ball is {'spinning!' if power else 'stopped.'}"


def start_music() -> str:
    """
    Turn on the music. The genre, BPM, and volume are randomly selected.

    Returns
    ----------
    str
        A message indicating that the music is starting.
    """
    music_genre = random.choice(["rock", "pop", "jazz", "classical", "hip-hop"])
    volume = random.uniform(0.2, 1.0)
    bpm = 120
    return f"Starting music! Genre: {music_genre}, BPM: {bpm}, Volume: {volume}"


def change_bpm(new_bpm: int) -> int:
    """
    Change the BPM of the music.

    Parameters
    ----------
    new_bpm : int
        The new BPM to set the music.

    Returns
    ----------
    int
        The new BPM of the music.
    """
    return f"Changing BPM to {new_bpm}"


def change_music(
    genre: Literal["rock", "pop", "jazz", "classical", "hip-hop"],
    bpm: int,
) -> str:
    """
    Change the music genre and BPM.

    Parameters
    ----------
    genre : str
        The genre of music to play. Should be one of "rock", "pop", "jazz", "classical", or "hip-hop".
    bpm : int
        The BPM of the music.

    Returns
    ----------
    str
        A message indicating that the music has been changed.
    """
    return f"Changing music to {genre} with BPM {bpm}"


def turn_light_on() -> str:
    """
    Turn the lights on.

    Returns
    ----------
    str
        A message indicating that the lights are turning on.
    """
    brightness = random.uniform(0.5, 1.0)
    return "Lights are now on! Brightness: {:.2f}".format(brightness)


def dim_lights(brightness: float) -> bool:
    """
    Dim the lights.

    Parameters
    ----------
    brightness : float
        The brightness level to set the lights. Should be between 0 and 1.

    Returns
    ----------
    bool
        Whether the lights were successfully dimmed.
    """
    return f"Lights are now set to {brightness}"


def adjust_volume(volume: float) -> bool:
    """
    Adjust the volume of the music.

    Parameters
    ----------
    volume : float
        The volume level to set the music. Should be between 0 and 1.

    Returns
    ----------
    bool
        Whether the volume was successfully adjusted.
    """
    return f"Volume is now set to {volume}"


def stop_music() -> str:
    """
    Stop the music.

    Returns
    ----------
    str
        A message indicating that the music is stopping.
    """
    return "Stopping music!"


def stop_disco_ball() -> str:
    """
    Stop the disco ball power and spinning.

    Returns
    ----------
    str
        A message indicating that the disco ball is stopping.
    """
    return "Stopping disco ball! Powered off and stopped spinning."


def adjust_lights(brightness: float) -> bool:
    """
    Adjust the brightness of the lights.

    Parameters
    ----------
    brightness : float
        The brightness level to set the lights. Should be between 0 and 1.

    Returns
    ----------
    bool
        Whether the lights were successfully brightened.
    """
    return f"Lights are now set to {brightness}"

# Multi-Agent

`Agent` class accepts `subagents` argument which generates dynamically tools to run agent. Langrila supports multi agent with multi client. In langrila, we can build orchestrator-typed multi-agent, not graph-based multi-agent. The orchestrator routes the execution of tools to task-specific agents, aggregates the results, and outputs the final answer.

## Task specific agent. 

In [6]:
memory_id = str(uuid.uuid4())  # Use your own memory ID
memory_path = f"./{memory_id}"

_memory_path = Path(memory_path)

if not _memory_path.exists():
    _memory_path.mkdir(parents=True, exist_ok=True)

lights_agent = Agent(
    client=vertexai_client,
    model="gemini-2.0-flash-exp",
    temperature=0.0,
    tools=[turn_light_on, adjust_lights],
    conversation_memory=JSONConversationMemory(f"{memory_path}/lights_agent.json", exist_ok=True),
)

disco_ball_agent = Agent(
    client=anthropic_client,
    model="claude-3-5-sonnet-20240620",
    temperature=0.0,
    tools=[power_disco_ball, stop_disco_ball],
    max_tokens=500,
    conversation_memory=JSONConversationMemory(
        f"{memory_path}/disco_ball_agent.json", exist_ok=True
    ),
)

music_agent = Agent(
    client=openai_client,
    model="gpt-4o-mini-2024-07-18",
    temperature=0.0,
    tools=[start_music, change_music, adjust_volume],
    conversation_memory=JSONConversationMemory(f"{memory_path}/music_agent.json", exist_ok=True),
)

## Orchestrator agent.

The thing you have to do for building multi-agent is to pass agents to the orchestrator agent. Planning mode is supported for multi agent.

In [7]:
orchestrator = Agent(
    client=openai_client,
    model="gpt-4o-mini-2024-07-18",
    temperature=0.0,
    subagents=[lights_agent, disco_ball_agent, music_agent],
    planning=True,
    conversation_memory=JSONConversationMemory(f"{memory_id}/orchestrator.json", exist_ok=True),
)

In [8]:
prompt = "Turn this place into a party mood!"

response = orchestrator.generate_text(prompt=prompt)

[32m[2025-01-05 21:50:15][0m [34m[1mDEBUG | Prompt: [TextPrompt(text='Please make a concise plan to answer the following question/requirement, considering the conversation history.\nYou can invoke the sub-agents or tools to answer the questions/requirements shown in the capabilities section.\nAgent has no description while the tools have a description.\n\nQuestion/Requirement:\nTurn this place into a party mood!\n\nCapabilities:\n- lights_agent\n  - turn_light_on: Turn the lights on.\n  - adjust_lights: Adjust the brightness of the lights.\n- disco_ball_agent\n  - power_disco_ball: Powers the spinning dissco ball.\n  - stop_disco_ball: Stop the disco ball power and spinning.\n- music_agent\n  - start_music: Turn on the music. The genre, BPM, and volume are randomly selected.\n  - change_music: Change the music genre and BPM.\n  - adjust_volume: Adjust the volume of the music.\n')][0m
[32m[2025-01-05 21:50:15][0m [1mINFO | root: Generating text[0m
[32m[2025-01-05 21:50:18][0m

As you can see in the log message above, the orchestrator agent assigned subdivided task to each subagents and finally generated answer.

In [9]:
print(response.contents[0].text)

The party mood has been successfully set! Here's what has been done:

1. **Lights**: The lights are now on and the brightness has been adjusted to a vibrant level (0.9).
2. **Disco Ball**: The disco ball is powered on and spinning, adding a fun visual element to the atmosphere.
3. **Music**: Upbeat music is playing with the genre set to Hip-hop, BPM at 120, and volume at 0.91.

Enjoy the party vibes! If you have any other requests or adjustments, feel free to let me know!


Usage is collected all over the agent including subagents.

In [10]:
list(response.usage.items())

[('disco_ball_agent',
  Usage(model_name='claude-3-5-sonnet-20240620', prompt_tokens=990, output_tokens=149)),
 ('lights_agent',
  Usage(model_name='gemini-2.0-flash-exp', prompt_tokens=211, output_tokens=22)),
 ('root',
  Usage(model_name='gpt-4o-mini-2024-07-18', prompt_tokens=1954, output_tokens=420)),
 ('music_agent',
  Usage(model_name='gpt-4o-mini-2024-07-18', prompt_tokens=442, output_tokens=41))]

Top-level orchestrator name is `root`.

Individual subagents store their own inputs and outputs. You can access those conversation histories by calling `load_history()` of focused agent. Here is an example to access to the history of music_agent.

In [11]:
orchestrator.subagents[2]

Agent(name=music_agent)

In [12]:
orchestrator.subagents[2].load_history()

[Prompt(type='Prompt', role='user', contents=[TextPrompt(text='Turn on the music with an upbeat genre, BPM, and volume.')], name=None),
 Response(type='Response', role='assistant', contents=[ToolCallResponse(name='start_music', args='{}', call_id='call_S0CJOkr6WxFcyhT595nIlGWb')], usage=Usage(model_name=None, prompt_tokens=0, output_tokens=0), raw=None, name='music_agent', is_last_chunk=None, prompt=None),
 Prompt(type='Prompt', role='user', contents=[ToolUsePrompt(output='Starting music! Genre: hip-hop, BPM: 120, Volume: 0.9105972755969132', error=None, call_id='call_S0CJOkr6WxFcyhT595nIlGWb', args='{}', name='start_music')], name=None),
 Response(type='Response', role='assistant', contents=[TextResponse(text='The music is now playing! Genre: Hip-hop, BPM: 120, Volume: 0.91. Enjoy the upbeat vibes!')], usage=Usage(model_name=None, prompt_tokens=0, output_tokens=0), raw=None, name='music_agent', is_last_chunk=None, prompt=None)]

# State in the agent

Agent state in existing agent frameworks has some issues on readability, special argument, and traceability. Ideally, state should be expressed by both the dependencies between agents and response schema, no special manner should be taken. It means the state is updated by llm based on the conversation history and response schema while its scope is limited by the agent's dependencies.

In langrila, the combination of the dependencies between the agents and structured output can take the place of the state in agent. Note that planning mode is supported even if multi agent case.

## Response schema

In [13]:
from enum import Enum

from pydantic import BaseModel, Field


class DiscoBallSchema(BaseModel):
    power: bool = Field(..., description="Whether to power the disco ball.")
    spinning: bool = Field(..., description="Whether the disco ball is spinning.")


class MusicGenre(str, Enum):
    rock = "rock"
    pop = "pop"
    jazz = "jazz"
    classical = "classical"
    hip_hop = "hip-hop"


class MusicSchema(BaseModel):
    genre: MusicGenre = Field(
        ...,
        description="The genre of music to play.",
    )
    bpm: int = Field(
        ...,
        description="The BPM of the music.",
        ge=60,
        le=180,
    )
    volume: float = Field(
        ...,
        description="The volume level to set the music to.",
        ge=0,
        le=1,
    )


class LightsSchema(BaseModel):
    brightness: float = Field(
        ...,
        description="The brightness level to set the lights to.",
        ge=0,
        le=1,
    )


class ResponseSchema(BaseModel):
    disco_ball: DiscoBallSchema = Field(..., description="The disco ball settings.")
    music: MusicSchema = Field(..., description="The music settings.")
    lights: LightsSchema = Field(..., description="The lights settings.")

## Task specific agent.

If you don't specify the conversation memory for subagent, `InMemoryConversationMemory` is internally used in default, which is not persisted automatically. Each subagent always need own conversation memory because the state within the multi-agent is kept or updated based on the conversation history and response schema if specified. Please be aware of using internal memory even if you don't specify the conversation memory for the subagent.

In [14]:
lights_agent = Agent(
    client=google_dev_client,
    model="gemini-2.0-flash-exp",
    temperature=0.0,
    tools=[turn_light_on, adjust_lights],
    response_schema_as_tool=LightsSchema,  # The state of the lights.
)

disco_ball_agent = Agent(
    client=openai_client,
    model="gpt-4o-2024-11-20",
    temperature=0.0,
    tools=[power_disco_ball, stop_disco_ball],
    max_tokens=500,
    response_schema_as_tool=DiscoBallSchema,  # The state of the disco ball.
)

music_power_agent = Agent(
    client=openai_client,
    model="gpt-4o-mini-2024-07-18",
    temperature=0.0,
    tools=[start_music],
)

music_control_agent = Agent(
    client=google_dev_client,
    model="gemini-2.0-flash-exp",
    temperature=0.0,
    tools=[change_music, adjust_volume, change_bpm],
    planning=True,  # Planning mode is enable for the subagent.
)

## Orchestrator agent

In [15]:
# Orchestrator agent as a subagent
music_agent = Agent(
    client=anthropic_client,
    model="claude-3-5-sonnet-20240620",
    temperature=0.0,
    subagents=[music_power_agent, music_control_agent],
    max_tokens=500,
    response_schema_as_tool=MusicSchema,  # The state of the music.
)

# Orchestrator agent
orchestrator = Agent(
    client=openai_client,
    model="gpt-4o-2024-11-20",
    temperature=0.0,
    subagents=[lights_agent, disco_ball_agent, music_agent],
    conversation_memory=InMemoryConversationMemory(),
    response_schema_as_tool=ResponseSchema,  # The state of the party.
    planning=True,
)

In [16]:
prompt = "Turn this place into a party mood."

response = orchestrator.generate_text(prompt=prompt)

[32m[2025-01-05 21:50:35][0m [34m[1mDEBUG | Prompt: [TextPrompt(text='Please make a concise plan to answer the following question/requirement, considering the conversation history.\nYou can invoke the sub-agents or tools to answer the questions/requirements shown in the capabilities section.\nAgent has no description while the tools have a description.\n\nQuestion/Requirement:\nTurn this place into a party mood.\n\nCapabilities:\n- lights_agent\n  - turn_light_on: Turn the lights on.\n  - adjust_lights: Adjust the brightness of the lights.\n- disco_ball_agent\n  - power_disco_ball: Powers the spinning dissco ball.\n  - stop_disco_ball: Stop the disco ball power and spinning.\n- music_agent\n  - music_power_agent\n    - start_music: Turn on the music. The genre, BPM, and volume are randomly selected.\n  - music_control_agent\n    - change_music: Change the music genre and BPM.\n    - adjust_volume: Adjust the volume of the music.\n    - change_bpm: Change the BPM of the music.\n')]

In [17]:
list(response.usage.items())

[('music_agent',
  Usage(model_name='claude-3-5-sonnet-20240620', prompt_tokens=3547, output_tokens=472)),
 ('lights_agent',
  Usage(model_name='gemini-2.0-flash-exp', prompt_tokens=470, output_tokens=14)),
 ('music_power_agent',
  Usage(model_name='gpt-4o-mini-2024-07-18', prompt_tokens=283, output_tokens=60)),
 ('disco_ball_agent',
  Usage(model_name='gpt-4o-2024-11-20', prompt_tokens=786, output_tokens=47)),
 ('music_control_agent',
  Usage(model_name='gemini-2.0-flash-exp', prompt_tokens=1184, output_tokens=148)),
 ('root',
  Usage(model_name='gpt-4o-2024-11-20', prompt_tokens=2618, output_tokens=363))]

In [18]:
print(response.contents[0].text)

{"disco_ball": {"power": true, "spinning": true}, "music": {"genre": "pop", "bpm": 120, "volume": 0.8}, "lights": {"brightness": 0.8}}


Validation response schema

In [19]:
valid_resposne = ResponseSchema.model_validate_json(response.contents[0].text)
valid_resposne.model_dump()

{'disco_ball': {'power': True, 'spinning': True},
 'music': {'genre': <MusicGenre.pop: 'pop'>, 'bpm': 120, 'volume': 0.8},
 'lights': {'brightness': 0.8}}

Top-level orchestrator and each subagent has own conversation memory.

In [20]:
orchestrator.load_history()

[Prompt(type='Prompt', role='user', contents=[TextPrompt(text='Please make a concise plan to answer the following question/requirement, considering the conversation history.\nYou can invoke the sub-agents or tools to answer the questions/requirements shown in the capabilities section.\nAgent has no description while the tools have a description.\n\nQuestion/Requirement:\nTurn this place into a party mood.\n\nCapabilities:\n- lights_agent\n  - turn_light_on: Turn the lights on.\n  - adjust_lights: Adjust the brightness of the lights.\n- disco_ball_agent\n  - power_disco_ball: Powers the spinning dissco ball.\n  - stop_disco_ball: Stop the disco ball power and spinning.\n- music_agent\n  - music_power_agent\n    - start_music: Turn on the music. The genre, BPM, and volume are randomly selected.\n  - music_control_agent\n    - change_music: Change the music genre and BPM.\n    - adjust_volume: Adjust the volume of the music.\n    - change_bpm: Change the BPM of the music.\n')], name=None)

Conversation memory of subagent

In [21]:
orchestrator.subagents[2]

Agent(name=music_agent)

In [22]:
orchestrator.subagents[2].load_history()

[Prompt(type='Prompt', role='user', contents=[TextPrompt(text='Start the music and ensure it is set to a party-friendly genre, BPM, and volume.')], name=None),
 Response(type='Response', role='assistant', contents=[TextResponse(text="Certainly! I'll start the music for you and make sure it's set to a party-friendly configuration. To do this, I'll use the music power agent to start the music. The agent will randomly select a genre, BPM, and volume, but I'll instruct it to focus on party-friendly settings."), ToolCallResponse(name='route_music_power_agent', args='{"instruction": "Start the music with settings suitable for a party. Choose a genre that\'s typically popular for parties (like pop, hip-hop, or rock), set the BPM to an energetic level (around 120-140 BPM), and set the volume to a moderately high level (around 0.7-0.8) to create a lively atmosphere without being too loud."}', call_id='toolu_0197rYxcv6RX9sSpBfA5rC4D')], usage=Usage(model_name=None, prompt_tokens=0, output_tokens

In [23]:
orchestrator.subagents[2].subagents[0]

Agent(name=music_power_agent)

In [24]:
orchestrator.subagents[2].subagents[0].load_history()

[Prompt(type='Prompt', role='user', contents=[TextPrompt(text="Start the music with settings suitable for a party. Choose a genre that's typically popular for parties (like pop, hip-hop, or rock), set the BPM to an energetic level (around 120-140 BPM), and set the volume to a moderately high level (around 0.7-0.8) to create a lively atmosphere without being too loud.")], name=None),
 Response(type='Response', role='assistant', contents=[ToolCallResponse(name='start_music', args='{}', call_id='call_fbXGUmSJV4F4QWBhtHdAgmcg')], usage=Usage(model_name=None, prompt_tokens=0, output_tokens=0), raw=None, name='music_power_agent', is_last_chunk=None, prompt=None),
 Prompt(type='Prompt', role='user', contents=[ToolUsePrompt(output='Starting music! Genre: jazz, BPM: 120, Volume: 0.8883200024023981', error=None, call_id='call_fbXGUmSJV4F4QWBhtHdAgmcg', args='{}', name='start_music')], name=None),
 Response(type='Response', role='assistant', contents=[TextResponse(text="The music has started! It'

Next turn prompt

In [25]:
prompt = "Prefer to a jazz music with a calm tempo. Please change the music to jazz and stop the disco ball."

response = orchestrator.generate_text(prompt=prompt)

[32m[2025-01-05 21:51:00][0m [34m[1mDEBUG | Prompt: [TextPrompt(text='Please make a concise plan to answer the following question/requirement, considering the conversation history.\nYou can invoke the sub-agents or tools to answer the questions/requirements shown in the capabilities section.\nAgent has no description while the tools have a description.\n\nQuestion/Requirement:\nPrefer to a jazz music with a calm tempo. Please change the music to jazz and stop the disco ball.\n\nCapabilities:\n- lights_agent\n  - turn_light_on: Turn the lights on.\n  - adjust_lights: Adjust the brightness of the lights.\n- disco_ball_agent\n  - power_disco_ball: Powers the spinning dissco ball.\n  - stop_disco_ball: Stop the disco ball power and spinning.\n- music_agent\n  - music_power_agent\n    - start_music: Turn on the music. The genre, BPM, and volume are randomly selected.\n  - music_control_agent\n    - change_music: Change the music genre and BPM.\n    - adjust_volume: Adjust the volume of 

In [26]:
list(response.usage.items())

[('disco_ball_agent',
  Usage(model_name='gpt-4o-2024-11-20', prompt_tokens=1292, output_tokens=46)),
 ('music_control_agent',
  Usage(model_name='gemini-2.0-flash-exp', prompt_tokens=2211, output_tokens=133)),
 ('root',
  Usage(model_name='gpt-4o-2024-11-20', prompt_tokens=4645, output_tokens=208)),
 ('music_agent',
  Usage(model_name='claude-3-5-sonnet-20240620', prompt_tokens=3339, output_tokens=230))]

Validation response schema

In [27]:
valid_resposne = ResponseSchema.model_validate_json(response.contents[0].text)
valid_resposne.model_dump()

{'disco_ball': {'power': False, 'spinning': False},
 'music': {'genre': <MusicGenre.jazz: 'jazz'>, 'bpm': 70, 'volume': 0.8},
 'lights': {'brightness': 0.8}}

Conversation memory in top-level orchestrator.

In [28]:
orchestrator.load_history()

[Prompt(type='Prompt', role='user', contents=[TextPrompt(text='Please make a concise plan to answer the following question/requirement, considering the conversation history.\nYou can invoke the sub-agents or tools to answer the questions/requirements shown in the capabilities section.\nAgent has no description while the tools have a description.\n\nQuestion/Requirement:\nTurn this place into a party mood.\n\nCapabilities:\n- lights_agent\n  - turn_light_on: Turn the lights on.\n  - adjust_lights: Adjust the brightness of the lights.\n- disco_ball_agent\n  - power_disco_ball: Powers the spinning dissco ball.\n  - stop_disco_ball: Stop the disco ball power and spinning.\n- music_agent\n  - music_power_agent\n    - start_music: Turn on the music. The genre, BPM, and volume are randomly selected.\n  - music_control_agent\n    - change_music: Change the music genre and BPM.\n    - adjust_volume: Adjust the volume of the music.\n    - change_bpm: Change the BPM of the music.\n')], name=None)

Conversation memory of subagent.

In [29]:
orchestrator.subagents[2]

Agent(name=music_agent)

In [30]:
orchestrator.subagents[2].load_history()

[Prompt(type='Prompt', role='user', contents=[TextPrompt(text='Start the music and ensure it is set to a party-friendly genre, BPM, and volume.')], name=None),
 Response(type='Response', role='assistant', contents=[TextResponse(text="Certainly! I'll start the music for you and make sure it's set to a party-friendly configuration. To do this, I'll use the music power agent to start the music. The agent will randomly select a genre, BPM, and volume, but I'll instruct it to focus on party-friendly settings."), ToolCallResponse(name='route_music_power_agent', args='{"instruction": "Start the music with settings suitable for a party. Choose a genre that\'s typically popular for parties (like pop, hip-hop, or rock), set the BPM to an energetic level (around 120-140 BPM), and set the volume to a moderately high level (around 0.7-0.8) to create a lively atmosphere without being too loud."}', call_id='toolu_0197rYxcv6RX9sSpBfA5rC4D')], usage=Usage(model_name=None, prompt_tokens=0, output_tokens