### Random Notes Here

Train "12th man" RL agent to become a perfect tackler. Metric is deviance from this path.

GHOST IN THE MACHINE; concept of "ghosts" like in mario kart and other video games

Model from different positions - i.e. agent from defensive lineman initial starting position
    remove impossible scenarios based on distribution of distance covered and speed for example

# Creating an American Football Reinforcement Learning Enviornment 

In [1]:
import pygame
import gymnasium as gym
from gymnasium import spaces
import numpy as np
import pandas as pd

pygame 2.5.2 (SDL 2.28.3, Python 3.8.3)
Hello from the pygame community. https://www.pygame.org/contribute.html


In [2]:
# Read in the tracking data
tracking = pd.read_csv("data/tracking_week_1.csv")

In [3]:
tracking.head()

Unnamed: 0,gameId,playId,nflId,displayName,frameId,time,jerseyNumber,club,playDirection,x,y,s,a,dis,o,dir,event
0,2022090800,56,35472.0,Rodger Saffold,1,2022-09-08 20:24:05.200000,76.0,BUF,left,88.37,27.27,1.62,1.15,0.16,231.74,147.9,
1,2022090800,56,35472.0,Rodger Saffold,2,2022-09-08 20:24:05.299999,76.0,BUF,left,88.47,27.13,1.67,0.61,0.17,230.98,148.53,pass_arrived
2,2022090800,56,35472.0,Rodger Saffold,3,2022-09-08 20:24:05.400000,76.0,BUF,left,88.56,27.01,1.57,0.49,0.15,230.98,147.05,
3,2022090800,56,35472.0,Rodger Saffold,4,2022-09-08 20:24:05.500000,76.0,BUF,left,88.64,26.9,1.44,0.89,0.14,232.38,145.42,
4,2022090800,56,35472.0,Rodger Saffold,5,2022-09-08 20:24:05.599999,76.0,BUF,left,88.72,26.8,1.29,1.24,0.13,233.36,141.95,


In [4]:
class PreprocessTrackingData():
    def __init__(self, df, gameId, playId):
        self.gameId = gameId
        self.playId = playId
        self.df = df
    
    def get_play(self):
        return self.df[(self.df['gameId'] == self.gameId) & (self.df['playId'] == self.playId)]

In [5]:
class FootballPlay(gym.Env):
    metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 10}

    def __init__(self, dataframe, render_mode=None, max_frames=100):
        '''
        Define the initial variables for the custom football field environment:
        field dimensions, hash marks, window size, and player tracking data.
        '''

        # DataFrame containing player positions for each frame
        self.dataframe = dataframe

        # Dimensions of a football field in grid units
        self.xmin = 0
        self.xmax = 160 / 3
        self.hash_right = 38.35
        self.hash_left = 12
        self.hash_width = 3.3

        self.size_x = int(self.xmax)
        self.size_y = int(self.hash_right + self.hash_width + (self.xmax - self.hash_left))

        # Window size for rendering
        self.window_size = 512  

        # Observations are dictionaries with the agent's and the target's location.
        self.observation_space = spaces.Dict(
            {
                "agent": spaces.Box(np.array([self.xmin, self.xmin]), np.array([self.size_x - 1, self.size_y - 1]), dtype=np.int32),
                "target": spaces.Box(np.array([self.xmin, self.xmin]), np.array([self.size_x - 1, self.size_y - 1]), dtype=np.int32),
            }
        )

        # Continuous action space: each action is a 2D vector with components in the range [-1, 1]
        self.action_space = spaces.Box(low=-1, high=1, shape=(2,), dtype=np.float32)

        # Set the maximum number of frames per episode based on the tracking data
        self.max_frames = max_frames
        self.current_frame = 0

        assert render_mode is None or render_mode in self.metadata["render_modes"]
        self.render_mode = render_mode

        self.window = None
        self.clock = None

    def _get_obs(self):
        return {"agent": self._agent_location, "target": self._target_location}
    
    def _get_info(self):
        return {
            "distance": np.linalg.norm(
                self._agent_location - self._target_location, ord=1
            )
        }
    
    def reset(self, seed=None, options=None):
        super().reset(seed=seed)

        # Initialize the player positions from the first frame of the dataframe
        self.player_positions = {nflId: np.array([row['x'], row['y']])
                                 for nflId, row in self.dataframe[self.dataframe['frameId'] == 1].iterrows()}

        self._agent_location = self.np_random.integers(0, [self.size_x, self.size_y], dtype=np.int32)
        self._target_location = self._agent_location
        while np.array_equal(self._target_location, self._agent_location):
            self._target_location = self.np_random.integers(0, [self.size_x, self.size_y], dtype=np.int32)

        observation = self._get_obs()
        info = self._get_info()

        if self.render_mode == "human":
            self._render_frame()

        # Reset the current frame for each iteration
        self.current_frame = 0

        return observation, info
    
    def step(self, action):
        # Normalize the action to make sure it's within the allowed range
        action = np.clip(action, self.action_space.low, self.action_space.high)

        # Update agent's location based on the action
        self._agent_location = np.clip(
            self._agent_location + action, [self.xmin, self.xmin], [self.size_x - 1, self.size_y - 1]
        ).astype(np.int32)

        # Update player positions for the current frame
        current_frame_data = self.dataframe[self.dataframe['frameId'] == self.current_frame]
        for nflId, row in current_frame_data.iterrows():
            self.player_positions[nflId] = np.array([row['x'], row['y']])

        self.current_frame += 1
        terminated = np.array_equal(self._agent_location, self._target_location) or self.current_frame >= self.max_frames
        reward = 1 if terminated else 0
        observation = self._get_obs()
        info = self._get_info()

        if self.render_mode == "human":
            self._render_frame()

        return observation, reward, terminated, False, info

    def _render_frame(self):
        if self.render_mode == "human":
            if self.window is None:
                pygame.init()
                self.window = pygame.display.set_mode((self.window_size, self.window_size))
                self.clock = pygame.time.Clock()

            self.window.fill((255, 255, 255))  # Fill the background with white

            # Set pixel size for each grid unit
            pix_square_size_x = self.window_size / self.size_x
            pix_square_size_y = self.window_size / self.size_y

            # Draw each player
            for nflId, position in self.player_positions.items():
                pygame.draw.circle(
                    self.window,
                    (0, 255, 0),  # Green color for players
                    (int(position[0] * pix_square_size_x), int(position[1] * pix_square_size_y)),
                    min(pix_square_size_x, pix_square_size_y) / 4,
                )

            pygame.display.flip()
            self.clock.tick(self.metadata["render_fps"])

    def render(self):
        if self.render_mode == "human":
            self._render_frame()

    def close(self):
        if self.window is not None:
            pygame.quit()
            self.window = None


In [6]:
gameId = 2022090800
playId = 56

In [7]:
preprocessor = PreprocessTrackingData(tracking, 2018090600, 75)
play = preprocessor.get_play()

In [8]:
max_frames = play['frameId'].max()

In [9]:
from gymnasium.envs.registration import register

register(
     id="FootballPlay-v0",
     entry_point=FootballPlay,
     max_episode_steps=300,
)

In [11]:
# Usage
# Create an instance of FootballPlay
env = FootballPlay(play, render_mode='human')

# Reset and step through the environment
observation, info = env.reset()
done = False
while not done:
    action = env.action_space.sample()  # replace with your action logic
    observation, reward, done, _ = env.step(action)
    env.render()

ValueError: too many values to unpack (expected 4)