In [None]:
"""
LLM Agent Example for EldenGym

This notebook demonstrates using Claude (Anthropic) to play Elden Ring by:
1. Sending visual observations to Claude
2. Getting action decisions based on game state
3. Using multi-binary action space
"""

import anthropic
import base64
import io
import numpy as np
from PIL import Image
import eldengym
import json

In [None]:
# Initialize Anthropic client
client = anthropic.Anthropic(api_key="your-api-key")


def frame_to_base64(frame, target_size=(768, 432), quality=85):
    """
    Convert numpy frame to base64 for Claude with resizing.

    Args:
        frame: numpy array (H, W, C) in BGR format
        target_size: (width, height) - default 768x432 keeps 16:9 ratio
        quality: JPEG quality 1-100 (higher = better quality, larger size)
    """
    # Convert BGR to RGB (game frames come in BGR format)
    frame_rgb = frame[:, :, ::-1]
    img = Image.fromarray(frame_rgb)

    # Resize if needed
    if img.size != target_size:
        img = img.resize(target_size, Image.Resampling.LANCZOS)
        print(f"Resized from {frame.shape[:2][::-1]} to {target_size}")

    # Use JPEG for better compression
    buffer = io.BytesIO()
    img.save(buffer, format="JPEG", quality=quality, optimize=True)
    size_kb = len(buffer.getvalue()) / 1024
    print(f"Image size: {size_kb:.1f} KB")

    return base64.b64encode(buffer.getvalue()).decode()


def get_claude_action(
    env, observation, info, conversation_history=[], image_config=None
):
    """
    Get multi-binary action from Claude based on observation.

    Args:
        env: Environment instance (for action_keys)
        observation: Dict with 'frame' and memory attributes
        info: Dict with processed info (normalized HP, etc.)
        conversation_history: List of previous messages
        image_config: dict with 'target_size' and 'quality' keys

    Returns:
        Multi-binary action array, reasoning, description, messages
    """
    if image_config is None:
        image_config = {"target_size": (768, 432), "quality": 85}

    # Get available keys
    available_keys = env.action_keys
    keys_text = "\n".join([f"  - {key}" for key in available_keys])

    # Create text description with new observation format
    obs_text = f"""You are playing Elden Ring against Margit. Analyze the image and game state:

GAME STATE:
- Boss HP: {info.get('boss_hp_normalized', 1.0) * 100:.1f}%
- Player HP: {info.get('player_hp_normalized', 1.0) * 100:.1f}%
- Boss Animation ID: {observation.get('NpcAnimId', 'unknown')}
- Player Animation ID: {observation.get('HeroAnimId', 'unknown')}

AVAILABLE KEYS (you can press multiple simultaneously):
{keys_text}

STRATEGY:
- Dodge when boss is attacking (watch for animation changes)
- Attack when close and boss is recovering
- Do not attack during boss windup
- Dodge timing is critical - not too early!
- Move to maintain good positioning

Respond with JSON containing:
{{
    "description": "what's happening in the fight",
    "reasoning": "why you're taking this action",
    "keys": ["KEY1", "KEY2", ...]  // list of keys to press this frame
}}"""

    # Build messages
    messages = conversation_history + [
        {
            "role": "user",
            "content": [
                {
                    "type": "image",
                    "source": {
                        "type": "base64",
                        "media_type": "image/jpeg",
                        "data": frame_to_base64(
                            observation["frame"],
                            target_size=image_config["target_size"],
                            quality=image_config["quality"],
                        ),
                    },
                },
                {"type": "text", "text": obs_text},
            ],
        }
    ]

    # Get Claude's response
    response = client.messages.create(
        model="claude-sonnet-4-20250514", max_tokens=500, messages=messages
    )

    response_text = response.content[0].text

    # Parse JSON response
    import re

    json_match = re.search(r"```json\s*(.*?)\s*```", response_text, re.DOTALL)
    if json_match:
        json_str = json_match.group(1)
        response_json = json.loads(json_str)
    else:
        raise ValueError("No JSON found in response")

    print(response_json)

    # Convert keys to multi-binary action
    action = np.zeros(len(available_keys), dtype=np.int8)
    pressed_keys = response_json.get("keys", [])
    for key in pressed_keys:
        if key in available_keys:
            idx = available_keys.index(key)
            action[idx] = 1

    reasoning = response_json["reasoning"]
    description = response_json["description"]

    return action, reasoning, description, messages


def play_game(env, num_steps=100, keep_history=False, image_config=None):
    """
    Play game with Claude using multi-binary actions.

    Args:
        env: EldenGym environment
        num_steps: Maximum number of steps
        keep_history: Whether to maintain conversation context
        image_config: dict with resolution settings
    """
    if image_config is None:
        image_config = {"target_size": (768, 432), "quality": 85}

    print(f"Starting game with image config: {image_config}")
    print(f"Available keys: {env.action_keys}")

    obs, info = env.reset()
    conversation_history = []

    for step in range(num_steps):
        print(f"\n{'='*60}")
        print(f"STEP {step}")
        print(f"{'='*60}")

        # Get action from Claude
        history = conversation_history if keep_history else []
        action, reasoning, description, messages = get_claude_action(
            env, obs, info, history, image_config
        )

        # Debug output
        player_hp = info.get("player_hp_normalized", 1.0) * 100
        boss_hp = info.get("boss_hp_normalized", 1.0) * 100

        print(f"Boss HP: {boss_hp:.1f}% | Player HP: {player_hp:.1f}%")
        print(f"Description: {description}")
        print(f"\nClaude's reasoning:\n{reasoning}")

        # Show which keys are being pressed
        pressed_keys = [env.action_keys[i] for i, val in enumerate(action) if val == 1]
        print(f"\nKeys pressed: {', '.join(pressed_keys) if pressed_keys else 'none'}")

        # Update conversation history if keeping context
        if keep_history:
            conversation_history = messages + [
                {"role": "assistant", "content": reasoning}
            ]
            # Keep last 10 exchanges to avoid token limits
            if len(conversation_history) > 20:
                conversation_history = conversation_history[-20:]

        # Step environment
        obs, reward, terminated, truncated, info = env.step(action)
        print(f"Reward: {reward:.2f}")

        if terminated or truncated:
            if info.get("boss_hp_normalized", 1.0) <= 0:
                print("\nðŸŽ‰ BOSS DEFEATED! You win!")
            else:
                print("\nðŸ’€ YOU DIED!")
            print("Episode finished.")
            break

    return obs

In [None]:
# Create environment with appropriate memory attributes for LLM
env = eldengym.make(
    "Margit-v0",
    memory_attributes=[
        "HeroHp",
        "HeroMaxHp",
        "NpcHp",
        "NpcMaxHp",
        "HeroAnimId",
        "NpcAnimId",
    ],
    frame_format="jpeg",
    frame_quality=85,
    max_steps=200,
)

# Run with Claude
play_game(env, num_steps=50, keep_history=True)

env.close()

Starting game with image config: {'target_size': (768, 432), 'quality': 85}

STEP 0
Resized from (1444, 864) to (768, 432)
Image size: 60.7 KB
{'description': 'The boss Margit, the Fell Omen is at full health (100%) and appears to be in an idle or neutral stance (animation 2002000). The player character is standing still at medium-long distance (36.8 units away) with full health (100%). Both combatants are at the start of the encounter.', 'reason': "At the start of the fight with distance at 36.8 units, I need to close the gap to engage the boss. The boss is in a neutral animation state (not attacking), so it's safe to move forward. I should approach cautiously to get into combat range while being ready to react to the boss's first attack pattern.", 'action': '1'}
Boss HP: 100.0% | Player HP: 100.0%
Distance: 36.8
Description: The boss Margit, the Fell Omen is at full health (100%) and appears to be in an idle or neutral stance (animation 2002000). The player character is standing stil

{'frame': array([[[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         ...,
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],
 
        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         ...,
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],
 
        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         ...,
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],
 
        ...,
 
        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         ...,
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],
 
        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         ...,
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],
 
        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         ...,
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]]], shape=(864, 1444, 3), dtype=uint8),
 'boss_hp': 1.0,
 'player_hp': 0.0,
 'distance': np.float64(32.630220050577805),
 'boss_animation': 2003017,
 'player_a

In [None]:
# Optional: Adjust game speed for slower/faster gameplay
# env.client.set_game_speed(0.5)  # Slower (easier for LLM)
# env.client.set_game_speed(1.0)  # Normal speed