In [11]:
import anthropic
import base64
import io
from PIL import Image
from eldengym import EldenGymEnv
import json

In [None]:
# Initialize Anthropic client
client = anthropic.Anthropic(api_key="your-api-key")


def frame_to_base64(frame, target_size=(1440, 810), quality=85):
    """
    Convert numpy frame to base64 for Claude with resizing

    Args:
        frame: numpy array (H, W, C) in BGR format
        target_size: (width, height) - default 768x432 keeps 16:9 ratio
        quality: JPEG quality 1-100 (higher = better quality, larger size)
    """
    # Convert BGR to RGB (game frames come in BGR format)
    frame_rgb = frame[:, :, ::-1]
    img = Image.fromarray(frame_rgb)

    # Resize if needed
    if img.size != target_size:
        img = img.resize(target_size, Image.Resampling.LANCZOS)
        print(f"Resized from {frame.shape[:2][::-1]} to {target_size}")

    # Use JPEG for better compression on game frames
    buffer = io.BytesIO()
    img.save(buffer, format="JPEG", quality=quality, optimize=True)
    import os

    next_step = len(
        [f for f in os.listdir("debug") if f.startswith("step") and f.endswith(".jpg")]
    )
    img.save(f"debug/step{next_step}.jpg")
    size_kb = len(buffer.getvalue()) / 1024
    print(f"Image size: {size_kb:.1f} KB")

    return base64.b64encode(buffer.getvalue()).decode()


def get_claude_action(observation, conversation_history=[], image_config=None):
    """
    Get action from Claude based on observation

    Args:
        image_config: dict with 'target_size' and 'quality' keys
    """
    if image_config is None:
        image_config = {"target_size": (768, 432), "quality": 85}

    # print(observation)
    # Create text description
    obs_text = f"""You are playing a game of Elden Ring. Use the image to see the current state of the game. You also have access to theses information:
    BOSS HP: {observation['boss_hp']:.1%}
    PLAYER HP: {observation['player_hp']:.1%}
    DISTANCE: {observation['distance']:.1f}
    BOSS ANIMATION: {observation['boss_animation']}
    PLAYER ANIMATION: {observation['player_animation']}
    
    Available actions:
    0: no-op, 1: forward, 2: backward, 3: left, 4: right, 5: jump
    6: dodge_forward, 7: dodge_backward, 8: dodge_left, 9: dodge_right
    10: interact, 11: attack, 12: use_item
    
    Choose ONE action (just the number). Consider:
    - Do not just focus on closing the distance, dodging ranged and melee attacks is the most important thing
    - Attack when close and you see an opening
    - Do not attack when boss is winding up for an attack
    - Dodge when boss is attacking, do not dodge prematurely, windup for an attack is not an attack,
    - Move to maintain good distance if you are low on health and heal if you can
    - You will pick an action for every 500 ms, and are locked for that time
    Respond with json with the following keys: description, reason, action in the following format:
    {{
        "description": "description of the what the boss is doing and what the player is doing",
        "reason": "reasoning for the action",
        "action": "action number"
    }}
    """

    # Build messages
    messages = conversation_history + [
        {
            "role": "user",
            "content": [
                {
                    "type": "image",
                    "source": {
                        "type": "base64",
                        "media_type": "image/jpeg",
                        "data": frame_to_base64(
                            observation["frame"],
                            target_size=image_config["target_size"],
                            quality=image_config["quality"],
                        ),
                    },
                },
                {"type": "text", "text": obs_text},
            ],
        }
    ]

    # Get Claude's response
    response = client.messages.create(
        model="claude-sonnet-4-5-20250929", max_tokens=500, messages=messages
    )

    response_text = response.content[0].text
    # Parse action (extract first number)
    import re

    json_match = re.search(r"```json\s*(.*?)\s*```", response_text, re.DOTALL)
    if json_match:
        json_str = json_match.group(1)
        response_json = json.loads(json_str)
    else:
        raise ValueError("No JSON found in response")

    print(response_json)

    action = int(response_json["action"])
    reason = response_json["reason"]
    description = response_json["description"]

    return action, reason, description, messages


# Main game loop
def play_game(env, num_steps=100, keep_history=False, image_config=None):
    """
    Play game with Claude

    Args:
        image_config: dict with resolution settings
            Examples:
            - {'target_size': (768, 432), 'quality': 85}  # Balanced (default)
            - {'target_size': (512, 288), 'quality': 75}  # Fast/cheap
            - {'target_size': (1024, 576), 'quality': 90} # High quality
    """
    if image_config is None:
        image_config = {"target_size": (768, 432), "quality": 85}

    print(f"Starting game with image config: {image_config}")

    obs, info = env.reset()
    conversation_history = []

    for step in range(num_steps):
        print(f"\n{'='*60}")
        print(f"STEP {step}")
        print(f"{'='*60}")

        # Get action from Claude
        history = conversation_history if keep_history else []
        action, reasoning, description, messages = get_claude_action(
            obs, history, image_config
        )

        # Debug output
        print(f"Boss HP: {obs['boss_hp']:.1%} | Player HP: {obs['player_hp']:.1%}")
        print(f"Distance: {obs['distance']:.1f}")
        print(f"Description: {description}")
        print(f"\nClaude's reasoning:\n{reasoning}")
        print(f"\nChosen action: {action}")

        # Update conversation history if keeping context
        if keep_history:
            conversation_history = messages + [
                {"role": "assistant", "content": reasoning}
            ]
            # Keep last 10 exchanges to avoid token limits
            if len(conversation_history) > 20:
                conversation_history = conversation_history[-20:]

        # Step environment
        obs, reward, terminated, truncated, info = env.step(action)
        done = terminated or truncated
        print(f"Reward: {reward:.2f}")

        if done:
            print("\n🎮 Episode finished!")
            break

    return obs

In [13]:
env = EldenGymEnv()
play_game(env, num_steps=50)

Starting game with image config: {'target_size': (768, 432), 'quality': 85}

STEP 0
Resized from (1444, 864) to (768, 432)
Image size: 60.7 KB
{'description': 'The boss Margit, the Fell Omen is at full health (100%) and appears to be in an idle or neutral stance (animation 2002000). The player character is standing still at medium-long distance (36.8 units away) with full health (100%). Both combatants are at the start of the encounter.', 'reason': "At the start of the fight with distance at 36.8 units, I need to close the gap to engage the boss. The boss is in a neutral animation state (not attacking), so it's safe to move forward. I should approach cautiously to get into combat range while being ready to react to the boss's first attack pattern.", 'action': '1'}
Boss HP: 100.0% | Player HP: 100.0%
Distance: 36.8
Description: The boss Margit, the Fell Omen is at full health (100%) and appears to be in an idle or neutral stance (animation 2002000). The player character is standing stil

{'frame': array([[[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         ...,
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],
 
        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         ...,
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],
 
        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         ...,
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],
 
        ...,
 
        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         ...,
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],
 
        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         ...,
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],
 
        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         ...,
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]]], shape=(864, 1444, 3), dtype=uint8),
 'boss_hp': 1.0,
 'player_hp': 0.0,
 'distance': np.float64(32.630220050577805),
 'boss_animation': 2003017,
 'player_a

In [32]:
env.client.set_game_speed(1.0)