In [1]:
!pip install textarena smithery anthropic httpx mcp --upgrade

Collecting textarena
  Downloading TextArena-0.5.9-py3-none-any.whl.metadata (16 kB)
Downloading TextArena-0.5.9-py3-none-any.whl (8.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.2/8.2 MB[0m [31m6.8 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: textarena
  Attempting uninstall: textarena
    Found existing installation: TextArena 0.5.8
    Uninstalling TextArena-0.5.8:
      Successfully uninstalled TextArena-0.5.8
Successfully installed textarena-0.5.9


In [10]:
import textarena as ta
import os
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Get API key from environment variables
openrouter_api_key = os.getenv("OPENROUTER_API_KEY")
anthropic_api_key = os.getenv("ANTHROPIC_API_KEY")
e2b_api_key= os.getenv("E2B_API_KEY")

In [5]:
from textarena.core import Agent
import textarena as ta
import asyncio
from typing import Optional

STANDARD_GAME_PROMPT = "You are a competitive game player. Make sure you read the game instructions carefully, and always follow the required format."

class AsyncAnthropicAgent(Agent):
    """Agent class using the Anthropic Claude API to generate responses asynchronously."""
    def __init__(self, model_name: str, system_prompt: Optional[str] = STANDARD_GAME_PROMPT, max_tokens: int = 1000, temperature: float = 0.9, verbose: bool = False):
        """
        Initialize the Anthropic agent.

        Args:
            model_name (str): The name of the Claude model (e.g., "claude-3-5-sonnet-20241022").
            system_prompt (Optional[str]): The system prompt to use (default: STANDARD_GAME_PROMPT).
            max_tokens (int): The maximum number of tokens to generate.
            temperature (float): The temperature for randomness in response generation.
            verbose (bool): If True, additional debug info will be printed.
        """
        super().__init__()
        self.model_name = model_name
        self.system_prompt = system_prompt
        self.max_tokens = max_tokens
        self.temperature = temperature
        self.verbose = verbose
        
        try:
            import anthropic
        except ImportError:
            raise ImportError(
                "Anthropic package is required for AsyncAnthropicAgent. "
                "Install it with: pip install anthropic"
            )
            
        self.client = anthropic.AsyncAnthropic()
    
    async def _make_request(self, observation: str) -> str:
        """Make a single API request to Anthropic and return the generated message."""
        response = await self.client.messages.create(
            model=self.model_name,
            max_tokens=self.max_tokens,
            temperature=self.temperature,
            system=self.system_prompt,
            messages=[
                {"role": "user", "content": [{"type": "text", "text": observation}]}
            ]
        )
        
        return response.content[0].text.strip()
    
    async def _retry_request(self, observation: str, retries: int = 3, delay: int = 5) -> str:
        """
        Attempt to make an API request with retries.

        Args:
            observation (str): The input to process.
            retries (int): The number of attempts to try.
            delay (int): Seconds to wait between attempts.

        Raises:
            Exception: The last exception caught if all retries fail.
        """
        last_exception = None
        for attempt in range(1, retries + 1):
            try:
                response = await self._make_request(observation)
                if self.verbose:
                    print(f"\nObservation: {observation}\nResponse: {response}")
                return response
            except Exception as e:
                last_exception = e
                print(f"Attempt {attempt} failed with error: {e}")
                if attempt < retries:
                    await asyncio.sleep(delay)
        raise last_exception
    
    async def __call__(self, observation: str) -> str:
        """
        Process the observation using the Anthropic API and return the generated response.
        
        Args:
            observation (str): The input string to process.
        
        Returns:
            str: The generated response.
        """
        if not isinstance(observation, str):
            raise ValueError(f"Observation must be a string. Received type: {type(observation)}")
        return await self._retry_request(observation)

In [6]:
import textarena as ta
import smithery
import mcp
import os
import json


class MCPAgent(AsyncAnthropicAgent):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

        self.url = smithery.create_smithery_url(
            "wss://server.smithery.ai/e2b/ws", {"e2bApiKey": os.environ["E2B_API_KEY"]}
        )

    async def _make_request(self, observation: str) -> str:
        """Make a single API request to Anthropic and return the generated message."""
        async with smithery.websocket_client(self.url) as streams:
            async with mcp.client.session.ClientSession(*streams) as session:

                try:
                    tools_result = await session.list_tools()
                    tools = tools_result.model_dump()["tools"]

                    tools = [
                        {"input_schema": tool.pop("inputSchema"), **tool}
                        for tool in tools
                        if "inputSchema" in tool
                    ]

                    print("Available tools:", tools)

                    final_response_text = ""
                    is_tool_call_pending = True
                    messages = [
                        {
                            "role": "user",
                            "content": [{"type": "text", "text": observation}],
                        }
                    ]

                    # Loop to handle multiple tool calls in a conversation
                    while is_tool_call_pending:
                        response = await self.client.messages.create(
                            model=self.model_name,
                            max_tokens=self.max_tokens,
                            temperature=self.temperature,
                            system=self.system_prompt,
                            messages=messages,
                            tools=tools,
                        )

                        print("Response:", response)

                        # Check if there's a tool_use in the response
                        is_tool_call_pending = False
                        for content_block in response.content:
                            if content_block.type == "tool_use":
                                is_tool_call_pending = True

                                tool_name = content_block.name
                                tool_input = content_block.input
                                tool_id = content_block.id

                                print(f"Tool called: {tool_name}")
                                print(f"Tool input: {json.dumps(tool_input, indent=2)}")

                                # Execute the tool using MCP session
                                try:
                                    tool_result = await session.call_tool(
                                        tool_name, tool_input
                                    )

                                    # Convert tool result to string format for Anthropic
                                    # The content must be a string, not an object
                                    tool_result_dict = tool_result.model_dump()
                                except Exception as e:
                                    if "MCP error" in str(e):
                                        tool_result_dict = {"error": str(e)}

                                result_str = json.dumps(tool_result_dict)
                                print(f"Tool result: {result_str}")

                                # Add tool call and result to messages
                                messages.append(
                                    {
                                        "role": "assistant",
                                        "content": [content_block.model_dump()],
                                    }
                                )

                                # Add tool response to messages - content must be a string
                                messages.append(
                                    {
                                        "role": "user",
                                        "content": [
                                            {
                                                "type": "tool_result",
                                                "tool_use_id": tool_id,
                                                "content": result_str,  # Now it's a string
                                            }
                                        ],
                                    }
                                )
                            elif content_block.type == "text":
                                # Accumulate text responses
                                final_response_text += content_block.text

                        # If no tool calls were made, we use the text response
                        if not is_tool_call_pending and not final_response_text:
                            final_response_text = response.content[0].text

                except Exception as e:

                    print(f"Error: {e}")
                    raise e

            return final_response_text.strip()

In [11]:
import textarena as ta

# Initialize agents
agents = {
    0: MCPAgent(model_name="claude-3-7-sonnet-20250219"),
    1: AsyncAnthropicAgent(model_name="claude-3-5-haiku-20241022"),
}

# Initialize environment from subset and wrap it
env = ta.make(env_id="SimpleNegotiation-v0")
env = ta.wrappers.LLMObservationWrapper(env=env)
env = ta.wrappers.SimpleRenderWrapper(
    env=env,
    player_names={0: "sonnet", 1: "haiku"},
)

env.reset(num_players=len(agents))
done = False
while not done:
    player_id, observation = env.get_observation()
    action = await agents[player_id](observation)
    done, info = env.step(action=action)
    print("step complete")
rewards = env.close()
print(rewards)

Available tools: [{'input_schema': {'type': 'object', 'properties': {'code': {'type': 'string'}}, 'required': ['code'], 'additionalProperties': False, '$schema': 'http://json-schema.org/draft-07/schema#'}, 'name': 'run_code', 'description': 'Run python code in a secure sandbox by E2B. Using the Jupyter Notebook syntax.'}]
Response: Message(id='msg_0181eNcRMgAh1vvhZTeyfRj8', content=[TextBlock(citations=None, text="I'll help you play the Negotiation Game as Player 0. First, let me analyze the resources we have and their values to determine the best trading strategy.", type='text'), ToolUseBlock(id='toolu_01BAPA1YjdwUT8Lrn8JHbzUL', input={'code': '# Let\'s analyze our resources and their values\nresources = {\n    \'Wheat\': {\'qty\': 21, \'value\': 5},\n    \'Wood\': {\'qty\': 23, \'value\': 8},\n    \'Sheep\': {\'qty\': 6, \'value\': 18},\n    \'Brick\': {\'qty\': 21, \'value\': 24},\n    \'Ore\': {\'qty\': 23, \'value\': 33}\n}\n\n# Calculate total value\ntotal_value = sum(res[\'qty\'

step complete


step complete
Available tools: [{'input_schema': {'type': 'object', 'properties': {'code': {'type': 'string'}}, 'required': ['code'], 'additionalProperties': False, '$schema': 'http://json-schema.org/draft-07/schema#'}, 'name': 'run_code', 'description': 'Run python code in a secure sandbox by E2B. Using the Jupyter Notebook syntax.'}]
Response: Message(id='msg_013iWYTfJD6oNaCsvQcLbWmn', content=[TextBlock(citations=None, text="I'll analyze this trade offer and decide if it's beneficial for me.", type='text'), ToolUseBlock(id='toolu_01Eo4nwz5T7CvjhNruGCU9iA', input={'code': '# My current resources and their values\nresources = {\n    "Wheat": {"Qty": 21, "Value": 5},\n    "Wood": {"Qty": 23, "Value": 8},\n    "Sheep": {"Qty": 6, "Value": 18},\n    "Brick": {"Qty": 21, "Value": 24},\n    "Ore": {"Qty": 23, "Value": 33}\n}\n\n# Calculate current total value\ncurrent_total_value = sum(res["Qty"] * res["Value"] for res in resources.values())\nprint(f"Current total value: {current_total_val

step complete


step complete
Available tools: [{'input_schema': {'type': 'object', 'properties': {'code': {'type': 'string'}}, 'required': ['code'], 'additionalProperties': False, '$schema': 'http://json-schema.org/draft-07/schema#'}, 'name': 'run_code', 'description': 'Run python code in a secure sandbox by E2B. Using the Jupyter Notebook syntax.'}]
Response: Message(id='msg_01WEE2eCgXQ4q27CCBcA4MnG', content=[TextBlock(citations=None, text="I'll analyze this trade offer to determine if it's beneficial for me as Player 0.", type='text'), ToolUseBlock(id='toolu_014G6P9iQ1ydk4GxmhJ8zGWv', input={'code': '# My current resources after the previous trade\ncurrent_resources = {\n    "Wheat": {"Qty": 21 - 4, "Value": 5},\n    "Wood": {"Qty": 23 - 2, "Value": 8},\n    "Sheep": {"Qty": 6 + 1, "Value": 18},\n    "Brick": {"Qty": 21 + 2, "Value": 24},\n    "Ore": {"Qty": 23, "Value": 33}\n}\n\n# Display current resources\nprint("Current resources:")\nfor resource, details in current_resources.items():\n    pri

step complete


step complete
Available tools: [{'input_schema': {'type': 'object', 'properties': {'code': {'type': 'string'}}, 'required': ['code'], 'additionalProperties': False, '$schema': 'http://json-schema.org/draft-07/schema#'}, 'name': 'run_code', 'description': 'Run python code in a secure sandbox by E2B. Using the Jupyter Notebook syntax.'}]
Response: Message(id='msg_01GGkj8zRYqqwj1hZYdRewp5', content=[TextBlock(citations=None, text="I'll analyze this trade offer to determine if it's beneficial for me as Player 0.", type='text'), ToolUseBlock(id='toolu_01YC1qwF5g5xHcJbehjbLohF', input={'code': '# Current resources after previous trades\n# Let\'s calculate my current resources after the previous trades\n\n# Starting resources\nwheat = 21\nwood = 23\nsheep = 6\nbrick = 21\nore = 23\n\n# First trade: I gave 4 Wheat, 2 Wood and received 2 Brick, 1 Sheep\nwheat -= 4  # Gave away 4 Wheat\nwood -= 2   # Gave away 2 Wood\nbrick += 2  # Received 2 Brick\nsheep += 1  # Received 1 Sheep\n\n# Second tra

step complete


step complete
Available tools: [{'input_schema': {'type': 'object', 'properties': {'code': {'type': 'string'}}, 'required': ['code'], 'additionalProperties': False, '$schema': 'http://json-schema.org/draft-07/schema#'}, 'name': 'run_code', 'description': 'Run python code in a secure sandbox by E2B. Using the Jupyter Notebook syntax.'}]
Response: Message(id='msg_015uM61TgKyLZoERF7a1uei6', content=[TextBlock(citations=None, text="I'll analyze this trade offer to determine if it benefits me as Player 0.", type='text'), ToolUseBlock(id='toolu_019F2TQ5K4MKckyYkqC2nHpf', input={'code': '# Current resources after previous trades\nwheat = 17  # Started with 21, gave 4 in first trade\nwood = 21   # Started with 23, gave 2 in first trade\nsheep = 5   # Started with 6, got 1 in first trade, gave 2 in second trade\nbrick = 26  # Started with 21, got 2 in first trade, got 3 in second trade\nore = 22    # Started with 23, gave 1 in second trade\n\n# Resource values\nwheat_value = 5\nwood_value = 8\n

step complete


step complete
{0: 1, 1: -1}
