# Libraries

## Import

In [1]:
# Libraries

import sys
sys.path.append('../data/ma-bench/')
sys.path.append('../data/tau-bench/')

import os
import json
import importlib
import argparse
import warnings
import re
import base64
import uuid

import boto3
from botocore.config import Config

# Strands imports
from strands import Agent, tool
from strands.models import BedrockModel
from strands.multiagent import GraphBuilder
from strands.telemetry.config import StrandsTelemetry

# Parameters

In [2]:
# setup boto3 config to allow for retrying
region_name = "us-west-2"
my_config = Config(
    region_name = region_name,
    signature_version = 'v4',
    retries = {
        'max_attempts': 50,
        'mode': 'standard'
    }
)

# select domain
domain = "airline"
# # Parse command line arguments
# parser = argparse.ArgumentParser(description='Run agent with specified domain')
# parser.add_argument('--domain', type=str, default=domain, 
#                     help='Domain to use (e.g., "airline", "retail")')
# args = parser.parse_args()

# # Update domain if provided via command line
# domain = args.domain

######################### LANGFUSE SETUP ########################
# Langfuse credentials
os.environ["LANGFUSE_PUBLIC_KEY"] = "[ADD PUBLIC KEY HERE]"
os.environ["LANGFUSE_SECRET_KEY"] = "[ADD SECRET KEY HERE]"
os.environ["LANGFUSE_HOST"] = "https://us.cloud.langfuse.com"

# Build Basic Auth header
LANGFUSE_AUTH = base64.b64encode(
    f"{os.environ.get('LANGFUSE_PUBLIC_KEY')}:{os.environ.get('LANGFUSE_SECRET_KEY')}".encode()
).decode()

# Configure OpenTelemetry endpoint & headers
os.environ["OTEL_EXPORTER_OTLP_ENDPOINT"] = os.environ.get("LANGFUSE_HOST") + "/api/public/otel/"
os.environ["OTEL_EXPORTER_OTLP_HEADERS"] = f"Authorization=Basic {LANGFUSE_AUTH}"

# Initialize OpenTelemetry BEFORE creating Strands agent

strands_telemetry = StrandsTelemetry()
# strands_telemetry.setup_otlp_exporter()
# strands_telemetry.setup_console_exporter()  # Print traces to console
######################### LANGFUSE SETUP ########################

<strands.telemetry.config.StrandsTelemetry at 0x7f60c58dc580>

# Utils

In [3]:
def import_domain_tools(domain):
    """
    Dynamically import tools based on the domain
    """
    tools_module = importlib.import_module(f'mabench.environments.{domain}.tools_strands')
    tools_dict = {}
    
    # Get all attributes from the tools module
    for attr_name in dir(tools_module):
        if attr_name.startswith('__'):
            continue
        
        try:
            # Try to import each tool
            tool_module = importlib.import_module(f'mabench.environments.{domain}.tools_strands.{attr_name}')
            # Get the tool function from the module
            if hasattr(tool_module, attr_name):
                tools_dict[attr_name] = getattr(tool_module, attr_name)
        except (ImportError, AttributeError):
            pass
    
    return tools_dict


def run_user_agent(user, agent):
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")

        user_response_text = "Hi"

        while "###STOP###" not in user_response_text:
            print("\n\n******** Agent ********\n")
            agent_response = agent(user_response_text)
            agent_response_thinking, agent_response_text = extract_thinking_and_response(str(agent_response))
            print("\n\n******** User *********\n")
            user_response = user(agent_response_text)
            user_response_thinking, user_response_text = extract_thinking_and_response(str(user_response))
        return agent.messages
    
def extract_thinking_and_response(text):
    match = re.search(r'<thinking>(.*?)</thinking>(.*)', text, re.DOTALL | re.IGNORECASE)
    if match:
        return match.group(1).strip(), match.group(2).strip()
    else:
        return "", text.strip()

In [4]:
# Import domain-specific modules
try:
    # Import wiki
    wiki_module = importlib.import_module(f'tau_bench.envs.{domain}.wiki')
    WIKI = getattr(wiki_module, 'WIKI')
    
    # Import data and tasks
    importlib.import_module(f'tau_bench.envs.{domain}.data')
    importlib.import_module(f'tau_bench.envs.{domain}.tasks')
    
    # Import tools
    domain_tools = import_domain_tools(domain)
    
    print(f"Successfully loaded modules for domain: {domain}")
except ImportError as e:
    print(f"Error: Could not import modules for domain '{domain}'. Error: {e}")
    print("Available domains may include: airline, retail")
    sys.exit(1)

Successfully loaded modules for domain: airline


# User

In [5]:
def user_prompt(instruction):
    
    system_prompt_template = """
You are a user interacting with an agent.

{instruction}

Rules:
- generate a one line User Response to simulate the user's message (this message will be sent to the agent).
- Do not give away all the instruction at once. Only provide the information that is necessary for the current step.
- Do not hallucinate information that is not provided in the instruction. For example, if the agent asks for the order id but it is not mentioned in the instruction, do not make up an order id, just say you do not remember or have it.
- If the instruction goal is satisified, generate '###STOP###' as a standalone message without anything else to end the conversation.
- Do not repeat the exact instruction in the conversation. Instead, use your own words to convey the same information.
- Try to make the conversation as natural as possible, and stick to the personalities in the instruction.
"""

    prompt = system_prompt_template.format(instruction = instruction)

    return prompt

def user_model():

    model_id = "anthropic.claude-3-sonnet-20240229-v1:0" # "anthropic.claude-3-sonnet-20240229-v1:0" "anthropic.claude-3-5-sonnet-20240620-v1:0", "us.anthropic.claude-3-5-sonnet-20241022-v2:0" 

    return BedrockModel(
        model_id = model_id,
        region_name = region_name,
        max_tokens= 1024,
        temperature = 0.0,
        top_p = 1,
        boto_client_config=my_config,
    )

def simulated_user_tracing(user_id, session_id, domain):

    trace_attributes = {
        "user.id": user_id, 
        "session.id": session_id,
        "langfuse.tags": [
            user_id,
            session_id,
            f"awsStrands-singleAgent_multiTurn-{domain}",
        ]
    }

    return trace_attributes

def simulated_user(instruction, user_id, session_id, domain):

    prompt = user_prompt(instruction)
    model = user_model()
    trace_attributes = simulated_user_tracing(user_id, session_id, domain)

    return Agent(
        name = f"awsStrands-singleAgent_multiTurn_simulatedUser-{domain}-{user_id}-{session_id}",
        model = model,
        system_prompt = prompt,
        trace_attributes = trace_attributes
    )

# Agent

In [6]:
tools = list(domain_tools.values())

def agent_prompt():
    
    system_prompt_template = """
You are a helpful assistant for a travel website. Help the user answer any questions.

<instructions>
- Remeber to check if the the airport city is in the state mentioned by the user. For example, Houston is in Texas.
- Infer about the the U.S. state in which the airport city resides. For example, Houston is in Texas.
- You should not use made-up or placeholder arguments.
<instructions>

<policy>
{policy}
</policy>
"""

    prompt = system_prompt_template.format(policy = WIKI)

    return prompt


def agent_model():

    # model_id = "us.anthropic.claude-3-5-sonnet-20241022-v2:0"
    # model_id = "openai.gpt-oss-120b-1:0"
    # model_id = "us.deepseek.r1-v1:0"
    model_id = "deepseek.v3-v1:0"

    return BedrockModel(
        model_id = model_id,
        region_name = region_name,
        max_tokens= 1024,
        temperature = 0.0,
        top_p = 1,
        boto_client_config=my_config,
        streaming = False  # Disable streaming for DeepSeek R1 (doesn't support tool use in streaming mode)
    )


def agent_tracing(user_id, session_id, domain):

    trace_attributes = {
        "user.id": user_id, 
        "session.id": session_id,
        "langfuse.tags": [
            user_id,
            session_id,
            f"awsStrands-singleAgent_multiTurn-{domain}",
        ]
    }

    return trace_attributes


def react_agent(tools, user_id, session_id, domain):

    prompt = agent_prompt()
    model = agent_model()
    trace_attributes = agent_tracing(user_id, session_id, domain)

    return Agent(
        name = f"awsStrands-singleAgent_multiTurn-{domain}-{user_id}-{session_id}",
        model = model, 
        tools = tools, 
        system_prompt = prompt,
        trace_attributes = trace_attributes
    )

In [None]:
a

# Run

In [7]:
output_path = os.path.join("..", "data", "tau-bench", "tau_bench", "envs", f"{domain}", "tasks_singleturn.json")
with open(output_path, "r") as file:
    tasks = json.load(file)


In [8]:
# for index,task in enumerate(tasks):

index = 20
task = tasks[index]

index_str = str(index)
num_hashes = (50 - len(index_str) - 9) // 2
print(f"\n{'#' * num_hashes} Index:{index} {'#' * num_hashes}\n")

instruction = task['instruction']
print(f"Processing instruction: {instruction}")

user_id = task['user_id']
session_id= uuid.uuid4()
print(f"User ID: {user_id}\tSession ID: {session_id}\tDomain:{domain}")

user = simulated_user(instruction, user_id, session_id, domain)
agent = react_agent(tools, user_id, session_id, domain)

messages = run_user_agent(user, agent)
print(messages)

    # break


################### Index:20 ###################

Processing instruction: Your user id is james_taylor_7043. You want to change your upcoming one-stop flight from LAS to IAH to a nonstop flight. Your reservation ID is 1N99U6. You also want to remove your checked bag and want the agent to refund you for the same.
User ID: james_taylor_7043	Session ID: 14eec3f0-2508-4bea-82c3-541e6c6f1479	Domain:airline


******** Agent ********

Hello! Welcome to the airline travel assistant. How can I help you with your travel plans today?

******** User *********

I need to change my upcoming one-stop flight from Las Vegas to Houston to a nonstop flight. My reservation number is 1N99U6.

******** Agent ********

Okay, let me look up your reservation details to change your flight from Las Vegas to Houston to a nonstop flight.
Tool #1: get_reservation_details


I see your reservation 1N99U6 is a round-trip economy flight from Las Vegas (LAS) to Houston (IAH) with one stop in Phoenix (PHX) on the outbou