In [21]:
import numpy as np
import random
import time
from enum import Enum
from typing import Tuple, Dict, List, Optional
from IPython.display import clear_output

class Action(Enum):
    UP = 0
    DOWN = 1
    LEFT = 2
    RIGHT = 3
    STAY = 4

    @staticmethod
    def get_all_actions():
        return [Action.UP, Action.DOWN, Action.LEFT, Action.RIGHT, Action.STAY]
    
    @staticmethod
    def random_action():
        return random.choice(Action.get_all_actions())
    
    def get_direction(self):
        if self == Action.UP:
            return (-1, 0)
        elif self == Action.DOWN:
            return (1, 0)
        elif self == Action.LEFT:
            return (0, -1)
        elif self == Action.RIGHT:
            return (0, 1)
        else:  # STAY
            return (0, 0)


class Player(Enum):
    A = 0
    B = 1
    
    def opponent(self):
        return Player.B if self == Player.A else Player.A


class SoccerGame:
    def __init__(self):
        # 4 rows x 5 columns grid
        self.rows = 4
        self.cols = 5
        
        # Initialize state
        self.reset()
        
        # Track game history
        self.history = []
    
    def reset(self):
        # Player positions: (row, col)
        self.player_positions = {
            Player.A: (0, 0),  # Player A starts at position (0,0)
            Player.B: (0, 4),  # Player B starts at position (0,4)
        }
        
        # Ball possession (randomly assigned)
        self.ball_possession = random.choice([Player.A, Player.B])
        
        # Game status
        self.game_over = False
        self.winner = None
        
        # Clear history
        self.history = []
        
        return self.get_state()
    
    def get_state(self):
        """Returns the current state of the game as a tuple:
        ((player_A_row, player_A_col), (player_B_row, player_B_col), ball_possession)"""
        return (
            self.player_positions[Player.A],
            self.player_positions[Player.B],
            self.ball_possession
        )
    
    def is_valid_position(self, row, col):
        """Check if a position is within the grid boundaries"""
        return 0 <= row < self.rows and 0 <= col < self.cols
    
    def get_next_position(self, player: Player, action: Action) -> Tuple[int, int]:
        """Calculate the next position for a player given an action"""
        curr_row, curr_col = self.player_positions[player]
        d_row, d_col = action.get_direction()
        
        next_row = curr_row + d_row
        next_col = curr_col + d_col
        
        # If the next position is outside the grid, stay in the current position
        if not self.is_valid_position(next_row, next_col):
            return curr_row, curr_col
        
        return next_row, next_col
    
    def is_goal_scored(self, player: Player) -> bool:
        """Check if a player has scored a goal"""
        row, col = self.player_positions[player]
        
        # Player has the ball and is in the goal area
        if self.ball_possession == player:
            if player == Player.A and col == self.cols - 1:  # Player A scores at rightmost column
                return True
            elif player == Player.B and col == 0:  # Player B scores at leftmost column
                return True
        return False
    
    def step(self, actions: Dict[Player, Action]) -> Tuple[Tuple, Dict[Player, float], bool, Dict]:
        """
        Execute one game step with the given actions for both players.
        
        Args:
            actions: Dictionary mapping players to their chosen actions
            
        Returns:
            next_state: The new game state
            rewards: Dictionary mapping players to their rewards
            done: Whether the game is over
            info: Additional information
        """
        if self.game_over:
            raise ValueError("Game is already over. Call reset() to start a new game.")
        
        # Store current state for history
        curr_state = self.get_state()
        
        # Calculate next positions for both players
        next_positions = {
            player: self.get_next_position(player, actions[player])
            for player in Player
        }
        
        # Process actions in random order
        players = list(Player)
        random.shuffle(players)
        
        for player in players:
            # Get the next position
            next_pos = next_positions[player]
            opponent = player.opponent()
            
            # Check for collision with the opponent
            if next_pos == self.player_positions[opponent]:
                # Check if opponent is stationary (didn't move from previous step)
                if actions[opponent] == Action.STAY:
                    # Opponent is stationary, player doesn't move and ball possession
                    # switches to the stationary player
                    next_positions[player] = self.player_positions[player]
                    if self.ball_possession == player:
                        self.ball_possession = opponent
                else:
                    # Both players are moving to the same square
                    # In this case, just update the position and no possession change
                    pass
        
        # Update positions
        self.player_positions = next_positions
        
        # Check for goal after updating positions
        rewards = {Player.A: 0.0, Player.B: 0.0}
        info = {"scoring_player": None}
        
        # Check if either player scored
        for player in Player:
            if self.is_goal_scored(player):
                # Game over, set rewards
                if player == Player.A:
                    rewards[Player.A] = 1.0
                    rewards[Player.B] = -1.0
                else:
                    rewards[Player.A] = -1.0
                    rewards[Player.B] = 1.0
                
                self.game_over = True
                self.winner = player
                info["scoring_player"] = player
                break
        
        # Get new state
        next_state = self.get_state()
        
        # Record history
        self.history.append({
            "state": curr_state,
            "actions": {p: a for p, a in actions.items()},
            "next_state": next_state,
            "rewards": rewards.copy()
        })
        
        return next_state, rewards, self.game_over, info
    
    def render(self, delay=0.5):
        """Display the current game state with dots representing empty positions"""
        # Clear previous output
        clear_output(wait=True)
        
        # Initialize grid with dots for empty spaces
        grid = [['· ' for _ in range(self.cols)] for _ in range(self.rows)]
        
        # Mark goal areas
        for row in range(self.rows):
            # B's goal (leftmost column)
            grid[row][0] = '. '
            # A's goal (rightmost column)
            grid[row][self.cols-1] = '. '
        
        # Mark player positions
        for player, (row, col) in self.player_positions.items():
            symbol = player.name
            if self.ball_possession == player:
                symbol += 'o'  # Add ball indicator
            grid[row][col] = symbol
        
        # Draw grid with border
        print("+" + "-" * (2 * self.cols + 1) + "+")
        for row in grid:
            print("| " + " ".join(f"{cell}" for cell in row) + " |")
        print("+" + "-" * (2 * self.cols + 1) + "+")
        
        # Print additional info
        print(f"Ball possession: Player {self.ball_possession.name}")
        
        # Print game status
        if self.game_over:
            print(f"Game over! Winner: Player {self.winner.name}")
        
        # Add delay for animation effect
        if delay > 0:
            time.sleep(delay)
    
    def get_valid_actions(self, player: Player):
        """Get list of valid actions for a player (all actions are always valid)"""
        return Action.get_all_actions()


class RandomAgent:
    """Agent that selects random actions"""
    def __init__(self, player: Player):
        self.player = player
    
    def get_action(self, state):
        return Action.random_action()


class FixedPolicyAgent:
    """Agent that follows a predefined policy mapping states to actions"""
    def __init__(self, player: Player, policy=None):
        self.player = player
        # Default policy is to stay in place
        self.policy = policy or (lambda state: Action.STAY)
    
    def get_action(self, state):
        return self.policy(state)


def simulate_game(env: SoccerGame, agent_a, agent_b, max_steps=100, render=True, delay=0.5):
    """Simulate a full game between two agents"""
    state = env.reset()
    done = False
    steps = 0
    
    if render:
        print("Initial state:")
        env.render(delay=0)
        print()
    
    while not done and steps < max_steps:
        # Get actions from both agents
        action_a = agent_a.get_action(state)
        action_b = agent_b.get_action(state)
        
        # Take step in environment
        state, rewards, done, info = env.step({Player.A: action_a, Player.B: action_b})
        
        steps += 1
        
        if render:
            print(f"Step {steps}:")
            print(f"Player A action: {action_a.name}")
            print(f"Player B action: {action_b.name}")
            env.render(delay=delay)
            if done:
                print(f"Rewards: A={rewards[Player.A]}, B={rewards[Player.B]}")
                print()
    
    if steps == max_steps and not done:
        print(f"Game reached maximum steps ({max_steps}) without a winner.")
    
    return {
        "steps": steps,
        "winner": env.winner,
        "history": env.history
    }


# Simple policy examples
def go_to_goal_policy(player, state):
    """Policy that attempts to head toward the goal"""
    player_pos, opponent_pos, ball_owner = state
    
    # If player doesn't have the ball, try to get it
    if ball_owner != player:
        # Move toward opponent
        opponent_row, opponent_col = opponent_pos
        player_row, player_col = player_pos
        
        if player_row < opponent_row:
            return Action.DOWN
        elif player_row > opponent_row:
            return Action.UP
        elif player_col < opponent_col:
            return Action.RIGHT
        elif player_col > opponent_col:
            return Action.LEFT
        else:
            return Action.STAY
    
    # Player has the ball, head toward goal
    if player == Player.A:
        # Player A's goal is on the right
        return Action.RIGHT
    else:
        # Player B's goal is on the left
        return Action.LEFT


# Example usage
# if __name__ == "__main__":
#     # Create environment
#     env = SoccerGame()
    
#     # Create agents
#     agent_a = RandomAgent(Player.A)
#     agent_b = RandomAgent(Player.B)
    
#     # Simulate a game
#     result = simulate_game(env, agent_a, agent_b, render=True, delay=0.5)
    
#     if result["winner"]:
#         print(f"Player {result['winner'].name} won after {result['steps']} steps!")
#     else:
#         print("No winner.")


# Run this in a Jupyter Notebook to visualize the game
def play_game_in_notebook():
    env = SoccerGame()
    
    # Create agents with simple policies
    agent_a = FixedPolicyAgent(Player.A, lambda state: go_to_goal_policy(Player.A, state))
    agent_b = FixedPolicyAgent(Player.B, lambda state: go_to_goal_policy(Player.B, state))
    
    result = simulate_game(env, agent_a, agent_b, render=True, delay=0.5)
    
    if result["winner"]:
        print(f"Player {result['winner'].name} won after {result['steps']} steps!")
    else:
        print("No winner.")

In [24]:
play_game_in_notebook()

+-----------+
| .  ·  ·  ·  B |
| .  ·  ·  ·  .  |
| .  ·  ·  ·  .  |
| .  ·  ·  ·  .  |
+-----------+
Ball possession: Player A
Game over! Winner: Player A
Rewards: A=1.0, B=-1.0

Player A won after 4 steps!
