# Human Player Test for Ingroup Bias Game

This notebook allows you to play the ingroup bias game manually using keyboard controls.
You can test the game mechanics, observe agent behavior, and understand the coordination dynamics.


In [None]:
import numpy as np
from omegaconf import DictConfig, OmegaConf

# sorrel imports
from sorrel.examples.ingroupbias.agents import IngroupBiasAgent
from sorrel.examples.ingroupbias.entities import Empty, entity_list
from sorrel.examples.ingroupbias.env import IngroupBiasEnv
from sorrel.examples.ingroupbias.world import IngroupBiasWorld
from sorrel.action.action_spec import ActionSpec
from sorrel.observation.observation_spec import OneHotObservationSpec
from sorrel.models.human_player import HumanPlayer, HumanObservation

In [None]:
%load_ext autoreload
%autoreload 2

# Exclude problematic modules from autoreload
%aimport -tensorboard
%aimport -tensorboard.compat
%aimport -tensorboard.compat.tensorflow_stub

### Human player test for the ingroup bias game


In [None]:
def main(config_path):
    config = OmegaConf.load(config_path)
    world = IngroupBiasWorld(config=config, default_entity=Empty())
    experiment = IngroupBiasEnv(world, config)
    agents = experiment.agents

    # Use actual world dimensions instead of config dimensions
    observation_spec = HumanObservation(
        entity_list=entity_list,
        full_view=True,
        env_dims=(world.height, world.width),  # Use actual world dimensions
    )
    action_spec = ActionSpec(
        [
            "move_up",
            "move_down",
            "move_left",
            "move_right",
            "turn_left",
            "turn_right",
            "strafe_left",
            "strafe_right",
            "interact",
        ]
    )

    # Create a custom HumanPlayer that works with IngroupBiasAgent
    class IngroupBiasHumanPlayer(HumanPlayer):
        def __init__(self, input_size, action_space, memory_size):
            super().__init__(input_size, action_space, memory_size)
            # Calculate the expected visual size (without extra features)
            self.visual_size = (
                input_size[0]
                * input_size[1]
                * input_size[2]
                * (self.tile_size**2)
                * self.num_channels
            )
            # The IngroupBiasAgent will provide visual_size + 4 extra features (inventory + ready)
            self.total_input_size = self.visual_size + 4

        def take_action(self, state: np.ndarray):
            """Custom take_action that handles IngroupBiasAgent's extra features."""
            if self.show:
                from IPython.display import clear_output

                clear_output(wait=True)

                # Render the world properly with all layers
                from sorrel.utils.visualization import render_sprite, image_from_array
                import matplotlib.pyplot as plt

                # Render the world properly with all layers
                layers = render_sprite(self.world, tile_size=[32, 32])

                # Composite the layers properly
                composited = image_from_array(layers)
                composited_array = np.array(composited)
                print(f"World visualization shape: {composited_array.shape}")

                # Display the composited result
                plt.figure(figsize=(12, 12))
                plt.imshow(composited_array)
                plt.title("Ingroup Bias Environment - Human Player Test")
                plt.xlabel("X coordinate")
                plt.ylabel("Y coordinate")

                # Add legend
                legend_elements = [
                    plt.Rectangle((0, 0), 1, 1, facecolor="gray", label="Wall"),
                    plt.Rectangle((0, 0), 1, 1, facecolor="brown", label="Sand"),
                    plt.Rectangle((0, 0), 1, 1, facecolor="red", label="Red Resource"),
                    plt.Rectangle(
                        (0, 0), 1, 1, facecolor="green", label="Green Resource"
                    ),
                    plt.Rectangle(
                        (0, 0), 1, 1, facecolor="blue", label="Blue Resource"
                    ),
                    plt.Rectangle((0, 0), 1, 1, facecolor="yellow", label="Agent"),
                    plt.Rectangle((0, 0), 1, 1, facecolor="white", label="Empty"),
                ]
                plt.legend(handles=legend_elements, loc="upper right")

                plt.show()

            # Get action from user
            action = None
            num_retries = 0
            while not isinstance(action, int):
                action_ = input(
                    "Select Action (w=up, s=down, a=left, d=right, q=turn_left, e=turn_right, z=strafe_left, c=strafe_right, r=interact, 0=noop): "
                )
                if action_ in ["w", "s", "a", "d", "q", "e", "z", "c", "r"]:
                    if action_ == "w":
                        action = 0  # move_up
                    elif action_ == "s":
                        action = 1  # move_down
                    elif action_ == "a":
                        action = 2  # move_left
                    elif action_ == "d":
                        action = 3  # move_right
                    elif action_ == "q":
                        action = 4  # turn_left
                    elif action_ == "e":
                        action = 5  # turn_right
                    elif action_ == "z":
                        action = 6  # strafe_left
                    elif action_ == "c":
                        action = 7  # strafe_right
                    elif action_ == "r":
                        action = 8  # interact
                elif action_ in [str(act) for act in self.action_list]:
                    action = int(action_)
                elif action_ == "0":
                    action = 0  # noop (same as move_up for simplicity)
                elif action_ == "quit":
                    raise KeyboardInterrupt("Quitting...")
                else:
                    num_retries += 1
                    if num_retries > 5:
                        raise KeyboardInterrupt("Too many invalid inputs. Quitting...")
                    print("Please try again. Possible actions are below.")
                    print(
                        "Keys: w=up, s=down, a=left, d=right, q=turn_left, e=turn_right, z=strafe_left, c=strafe_right, r=interact, 0=noop"
                    )
                    print("Or enter action number (0-8) or 'quit'")

            return action

    # Create a custom IngroupBiasAgent that bypasses the memory stacking issue
    class IngroupBiasHumanAgent(IngroupBiasAgent):
        def get_action(self, state: np.ndarray) -> int:
            """Override get_action to bypass memory stacking for human player."""
            # For human player, we don't need memory stacking
            # Just pass the state directly to the model
            action = self.model.take_action(state)
            return action

        def add_memory(
            self, state: np.ndarray, action: int, reward: float, done: bool
        ) -> None:
            """Override add_memory to handle dimension mismatch for human player."""
            # For human player, we don't need to store experiences in memory
            # The human player doesn't learn from experience, so we can skip this
            pass

        def can_act(self) -> bool:
            """Override can_act to allow human player to act even when frozen."""
            # Human players can act when frozen (for testing), but not when removed
            return not self.is_removed

    # Create human player
    human_player = IngroupBiasHumanPlayer(
        input_size=(
            world.height,
            world.width,
            3,
        ),  # 3 layers: terrain, dynamic, beam
        action_space=action_spec.n_actions,
        memory_size=1,
    )

    # Store world reference for visualization
    human_player.world = world

    # Create human agent
    human_agent = IngroupBiasHumanAgent(
        observation_spec=observation_spec,
        action_spec=action_spec,
        model=human_player,
    )

    # Replace one of the agents with human player
    agents[0] = human_agent
    experiment.override_agents(agents)

    print("Starting Ingroup Bias Human Player Test")
    print("=" * 50)
    print("Controls:")
    print("  w = move up")
    print("  s = move down")
    print("  a = move left")
    print("  d = move right")
    print("  q = turn left")
    print("  e = turn right")
    print("  z = strafe left")
    print("  c = strafe right")
    print("  r = interact")
    print("  0 = noop")
    print("  quit = exit")
    print("=" * 50)
    print("\nGame Rules:")
    print("- Collect resources of your color group")
    print("- Interact with other agents to coordinate")
    print("- Avoid collecting resources of other groups")
    print("- You are the YELLOW agent in the visualization")
    print("\nStarting game...")

    experiment.run_experiment()

### Turn taking loop
Choose an action from [0, 1, 2, 3, 4, 5, 6, 7, 8] to act on the environment.


In [None]:
# Configuration file paths
config_path = "../configs/config_ascii_map.yaml"  # ASCII map version
# config_path = "../configs/config.yaml"  # Random generation version

In [None]:
main(config_path)

### Game Information

**Objective:** Collect resources that match your group color while coordinating with other agents.

**Mechanics:**
- **Movement:** Use WASD keys to move around the environment
- **Rotation:** Use Q/E keys to turn left/right
- **Strafing:** Use Z/C keys to strafe left/right
- **Interaction:** Use R key to interact with other agents
- **No Action:** Use 0 key for no operation

**Group Dynamics:**
- Agents are assigned to color groups (red, green, blue)
- Collecting resources of your group color gives positive reward
- Collecting resources of other groups may give negative reward
- Coordination with same-group agents is beneficial

**Visual Legend:**
- **Gray:** Walls (impassable)
- **Brown:** Sand (traversable terrain)
- **Red:** Red resources
- **Green:** Green resources  
- **Blue:** Blue resources
- **Yellow:** Your agent
- **White:** Empty spaces

**Tips:**
- Try to coordinate with other agents of your group
- Avoid collecting resources of other groups
- Use interaction to communicate with other agents
- Observe how the AI agents behave and learn from their strategies
