# Human Player Test for ASCII Map Environment

This notebook demonstrates how to use the human player with the new ASCII map-based environment generation.


In [3]:
import hydra
import numpy as np
from omegaconf import DictConfig, OmegaConf

# sorrel imports
from sorrel.examples.staghunt_physical.agents_v2 import (
    StagHuntAgent,
    StagHuntObservation,
)
from sorrel.examples.staghunt_physical.entities import Empty, entity_list
from sorrel.examples.staghunt_physical.env import StagHuntEnv
from sorrel.examples.staghunt_physical.world import StagHuntWorld
from sorrel.action.action_spec import ActionSpec
from sorrel.models.human_player import HumanPlayer

In [4]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Human player test for ASCII map environment


In [None]:
def main():
    # Load ASCII map configuration
    config = OmegaConf.load("../configs/config.yaml")

    # Create world with ASCII map generation
    world = StagHuntWorld(config=config, default_entity=Empty())
    experiment = StagHuntEnv(world, config)

    print(f"World dimensions: {world.height}x{world.width}")
    print(f"Number of agents: {len(experiment.agents)}")
    print(f"Agent spawn points: {len(world.agent_spawn_points)}")

    # Create observation spec with dynamic dimensions
    observation_spec = StagHuntObservation(
        entity_list=entity_list,
        full_view=True,
        env_dims=(world.height, world.width),  # Use actual world dimensions
    )

    # Create action spec for StagHunt environment
    action_spec = ActionSpec(
        [
            "NOOP",
            "FORWARD",
            "BACKWARD",
            "STEP_LEFT",
            "STEP_RIGHT",
            "TURN_LEFT",
            "TURN_RIGHT",
            "ATTACK",
            "PUNISH",
        ]
    )

    # Create a custom HumanPlayer that works with StagHuntAgent
    class StagHuntHumanPlayer(HumanPlayer):
        def __init__(self, input_size, action_space, memory_size):
            super().__init__(input_size, action_space, memory_size)

        def take_action(self, state: np.ndarray):
            """Custom take_action that handles StagHuntAgent's extra features."""
            if self.show:
                from IPython.display import clear_output

                clear_output(wait=True)

                # Simple ASCII visualization
                print("Current Environment State:")
                print("=" * 50)

                # Get the world from the environment
                if hasattr(self, "world") and self.world is not None:
                    # Print basic world info
                    print(f"World dimensions: {self.world.height}x{self.world.width}")
                    print(
                        f"Agent location: {getattr(self, 'agent_location', 'Unknown')}"
                    )
                    print(
                        f"Agent orientation: {getattr(self, 'agent_orientation', 'Unknown')}"
                    )

                    # Print inventory if available
                    if hasattr(self, "agent_inventory"):
                        print(f"Inventory: {self.agent_inventory}")
                    print("=" * 50)

            # Get action from user with StagHunt-specific controls
            action = None
            num_retries = 0
            while not isinstance(action, int):
                action_ = input(
                    "Select Action (w=FORWARD, s=BACKWARD, a=TURN_LEFT, d=TURN_RIGHT, q=STEP_LEFT, e=STEP_RIGHT, r=ATTACK, p=PUNISH, 0=NOOP): "
                )
                if action_ in ["w", "s", "a", "d", "q", "e", "r", "p"]:
                    if action_ == "w":
                        action = 1  # FORWARD (relative to orientation)
                    elif action_ == "s":
                        action = 2  # BACKWARD (relative to orientation)
                    elif action_ == "a":
                        action = 5  # TURN_LEFT
                    elif action_ == "d":
                        action = 6  # TURN_RIGHT
                    elif action_ == "q":
                        action = 3  # STEP_LEFT
                    elif action_ == "e":
                        action = 4  # STEP_RIGHT
                    elif action_ == "r":
                        action = 7  # ATTACK
                    elif action_ == "p":
                        action = 8  # PUNISH
                elif action_ in [str(act) for act in self.action_list]:
                    action = int(action_)
                elif action_ == "0":
                    action = 0  # NOOP
                elif action_ == "quit":
                    raise KeyboardInterrupt("Quitting...")
                else:
                    num_retries += 1
                    if num_retries > 5:
                        raise KeyboardInterrupt("Too many invalid inputs. Quitting...")
                    print("Please try again. Possible actions are below.")
                    print(
                        "Keys: w=FORWARD, s=BACKWARD, a=TURN_LEFT, d=TURN_RIGHT, q=STEP_LEFT, e=STEP_RIGHT, r=ATTACK, p=PUNISH, 0=NOOP"
                    )
                    print("Or enter action number (0-8) or 'quit'")

            return action

    # Create a custom StagHuntAgent that works with human player
    class StagHuntHumanAgent(StagHuntAgent):
        def get_action(self, state: np.ndarray) -> int:
            """Override get_action to work with human player."""
            # For human player, we don't need memory stacking
            # Just pass the state directly to the model
            action = self.model.take_action(state)
            return action

        def add_memory(
            self, state: np.ndarray, action: int, reward: float, done: bool
        ) -> None:
            """Override add_memory to handle dimension mismatch for human player."""
            # For human player, we don't need to store experiences in memory
            # The human player doesn't learn from experience, so we can skip this
            pass

    # Create human player with dynamic dimensions
    # HumanPlayer expects (height, width, channels) format
    human_player = StagHuntHumanPlayer(
        input_size=(world.height, world.width, 3),  # 3 layers: terrain, dynamic, beam
        action_space=action_spec.n_actions,
        memory_size=1,
    )

    # Create human agent
    human_agent = StagHuntHumanAgent(
        observation_spec=observation_spec,
        action_spec=action_spec,
        model=human_player,
    )

    # Replace all agents with human agent for single-player mode
    experiment.override_agents(agents=[human_agent])

    print("\nStarting human player game with ASCII map environment...")
    print("Controls:")
    print("  w = FORWARD, s = BACKWARD")
    print("  a = TURN_LEFT, d = TURN_RIGHT")
    print("  q = STEP_LEFT, e = STEP_RIGHT")
    print("  r = ATTACK, p = PUNISH")
    print("  0 = NOOP, quit = Exit game")
    print("\nPress Enter to start...")
    input()

    experiment.run_experiment()

### Run the human player game


In [6]:
main()

World dimensions: 24x25
Number of agents: 3
Agent spawn points: 88


IndexError: list index out of range