### Random Notes Here

Train "12th man" RL agent to become a perfect tackler. Metric is deviance from this path.

GHOST IN THE MACHINE; concept of "ghosts" like in mario kart and other video games

Model from different positions - i.e. agent from defensive lineman initial starting position
    remove impossible scenarios based on distribution of distance covered and speed for example

TODO : adjust window to match action space?

# Creating an American Football Reinforcement Learning Enviornment 

In [47]:
import pygame
import gymnasium as gym
from gymnasium import spaces
import numpy as np
import pandas as pd

In [48]:
# Read in the tracking data
tracking = pd.read_csv("data/tracking_week_1.csv")

In [49]:
tracking.head()

Unnamed: 0,gameId,playId,nflId,displayName,frameId,time,jerseyNumber,club,playDirection,x,y,s,a,dis,o,dir,event
0,2022090800,56,35472.0,Rodger Saffold,1,2022-09-08 20:24:05.200000,76.0,BUF,left,88.37,27.27,1.62,1.15,0.16,231.74,147.9,
1,2022090800,56,35472.0,Rodger Saffold,2,2022-09-08 20:24:05.299999,76.0,BUF,left,88.47,27.13,1.67,0.61,0.17,230.98,148.53,pass_arrived
2,2022090800,56,35472.0,Rodger Saffold,3,2022-09-08 20:24:05.400000,76.0,BUF,left,88.56,27.01,1.57,0.49,0.15,230.98,147.05,
3,2022090800,56,35472.0,Rodger Saffold,4,2022-09-08 20:24:05.500000,76.0,BUF,left,88.64,26.9,1.44,0.89,0.14,232.38,145.42,
4,2022090800,56,35472.0,Rodger Saffold,5,2022-09-08 20:24:05.599999,76.0,BUF,left,88.72,26.8,1.29,1.24,0.13,233.36,141.95,


In [50]:
class PreprocessTrackingData():
    def __init__(self, df, gameId, playId):
        self.gameId = gameId
        self.playId = playId
        self.df = df
    
    def get_play(self):
        return self.df[(self.df['gameId'] == self.gameId) & (self.df['playId'] == self.playId)]

In [51]:
class FootballPlay(gym.Env):
    metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 10}

    def __init__(self, render_mode=None, max_frames=100):
        super().__init__()

        # Dimensions of a football field in yards
        self.xmin = 0
        self.xmax = 120  # Including endzones
        self.ymax = 53.3  # Standard width of a football field
        self.ymin = 0

        self.size_x = int(self.xmax)
        self.size_y = int(self.ymax)

        # Window size for rendering
        self.window_size = 512  

        # Observations are dictionaries with the agent's and the target's location.
        self.observation_space = spaces.Dict(
            {
                "agent": spaces.Box(np.array([self.xmin, self.ymin]), np.array([self.size_x - 1, self.size_y - 1]), dtype=np.int32),
                "target": spaces.Box(np.array([self.xmin, self.ymin]), np.array([self.size_x - 1, self.size_y - 1]), dtype=np.int32),
            }
        )

        # Continuous action space: each action is a 2D vector with components in the range [-1, 1]
        self.action_space = spaces.Box(low=-1, high=1, shape=(2,), dtype=np.float32)

        # Define the obstacle location
        self.obstacle_location = np.array([88.37, 27.27])

        # Set the maximum number of frames per episode based on the tracking data
        self.max_frames = max_frames
        self.current_frame = 0

        assert render_mode is None or render_mode in self.metadata["render_modes"]
        self.render_mode = render_mode

        self.window = None
        self.clock = None

    def _get_obs(self):
        return {"agent": self._agent_location, "target": self._target_location}
    
    def _get_info(self):
        return {
            "distance": np.linalg.norm(
                self._agent_location - self._target_location, ord=1
            )
        }
    
    def reset(self, seed=None, options=None):
        super().reset(seed=seed)

        self._agent_location = self.np_random.integers(0, [self.size_x, self.size_y], dtype=np.int32)
        self._target_location = self._agent_location
        while np.array_equal(self._target_location, self._agent_location):
            self._target_location = self.np_random.integers(0, [self.size_x, self.size_y], dtype=np.int32)

        observation = self._get_obs()
        info = self._get_info()

        if self.render_mode == "human":
            self._render_frame()

        # Reset the current frame for each iteration
        self.current_frame = 0

        return observation, info
    
    def step(self, action):
        # Normalize the action to make sure it's within the allowed range
        action = np.clip(action, self.action_space.low, self.action_space.high)

        # Check for collision with the obstacle
        new_location = self._agent_location + action
        if np.array_equal(new_location, self.obstacle_location):
            # Prevent movement if it results in a collision
            new_location = self._agent_location

        # Update agent's location
        self._agent_location = np.clip(
            new_location, [self.xmin, self.xmin], [self.size_x - 1, self.size_y - 1]
        ).astype(np.int32)

        self.current_frame += 1

        terminated = np.array_equal(self._agent_location, self._target_location) or self.current_frame >= self.max_frames
        reward = 1 if terminated else 0
        observation = self._get_obs()
        info = self._get_info()

        if self.render_mode == "human":
            self._render_frame()

        return observation, reward, terminated, False, info
    
    def render(self):
        if self.render_mode == "rgb_array":
            return self._render_frame()

    def _render_frame(self):
        if self.window is None and self.render_mode == "human":
            pygame.init()
            pygame.display.init()
            self.window = pygame.display.set_mode(
                (self.window_size, self.window_size)
            )
        if self.clock is None and self.render_mode == "human":
            self.clock = pygame.time.Clock()

        canvas = pygame.Surface((self.window_size, self.window_size))
        canvas.fill((255, 255, 255))  # Fill with white

        # Drawing logic
        # Calculate pixel size based on the window size and field dimensions
        pix_square_size_x = self.window_size / self.size_x
        pix_square_size_y = self.window_size / self.size_y

        # Draw the field
        field_color = (0, 128, 0)  # Dark green for the field
        pygame.draw.rect(
            canvas,
            field_color,
            pygame.Rect(0, 0, self.window_size, self.window_size)
        )

        # Draw yard lines
        for x in range(self.size_x):
            line_color = (255, 255, 255)  # White for yard lines
            if x % 10 == 0:  # Every 10 yards
                pygame.draw.line(
                    canvas,
                    line_color,
                    (x * pix_square_size_x, 0),
                    (x * pix_square_size_x, self.window_size)
                )

        # Draw the agent
        pygame.draw.circle(
            canvas,
            (0, 0, 255),  # Blue color for the agent
            (
                int(self._agent_location[0] * pix_square_size_x + pix_square_size_x / 2),
                int(self._agent_location[1] * pix_square_size_y + pix_square_size_y / 2),
            ),
            int(pix_square_size_x / 2),
        )

        # Draw the target
        pygame.draw.circle(
            canvas,
            (255, 0, 0),  # Red color for the target
            (
                int(self._target_location[0] * pix_square_size_x + pix_square_size_x / 2),
                int(self._target_location[1] * pix_square_size_y + pix_square_size_y / 2),
            ),
            int(pix_square_size_x / 2),
        )

        # Draw the obstacle
        pygame.draw.rect(
            canvas,
            (0, 255, 0),  # Green color for the obstacle
            pygame.Rect(
                self.obstacle_location[0] * pix_square_size_x,
                self.obstacle_location[1] * pix_square_size_y,
                pix_square_size_x,
                pix_square_size_y,
            ),
        )

        if self.render_mode == "human":
            self.window.blit(canvas, (0, 0))
            pygame.display.flip()
            self.clock.tick(self.metadata["render_fps"])

        return np.transpose(np.array(pygame.surfarray.pixels3d(canvas)), axes=(1, 0, 2))

    def close(self):
        if self.window is not None:
            pygame.display.quit()
            pygame.quit()


In [52]:
from gymnasium.envs.registration import register

register(
     id="FootballPlay-v0",
     entry_point=FootballPlay,
     max_episode_steps=300,
)

  logger.warn(f"Overriding environment {new_spec.id} already in registry.")


In [53]:
gameId = 2022090800
playId = 56

# Assuming df is your DataFrame containing the tracking data
preprocessor = PreprocessTrackingData(tracking, gameId, playId)
play_data = preprocessor.get_play()

# Find the maximum frame ID
max_frames = play_data['frameId'].max()

# Now, create an instance of FootballPlay with this max_frame_id
env = FootballPlay(render_mode='human', max_frames=max_frames)

In [54]:
for _ in range(1000):
    observation, info = env.reset()
    done = False

    while not done:
        action = env.action_space.sample()  # Sample an action
        observation, reward, terminated, truncated, info = env.step(action)

        if terminated or truncated:
            done = True  # Mark the episode as done

env.close()

KeyboardInterrupt: 

In [None]:
play_data

Unnamed: 0,gameId,playId,nflId,displayName,frameId,time,jerseyNumber,club,playDirection,x,y,s,a,dis,o,dir,event
0,2022090800,56,35472.0,Rodger Saffold,1,2022-09-08 20:24:05.200000,76.0,BUF,left,88.370000,27.270000,1.62,1.15,0.16,231.74,147.90,
1,2022090800,56,35472.0,Rodger Saffold,2,2022-09-08 20:24:05.299999,76.0,BUF,left,88.470000,27.130000,1.67,0.61,0.17,230.98,148.53,pass_arrived
2,2022090800,56,35472.0,Rodger Saffold,3,2022-09-08 20:24:05.400000,76.0,BUF,left,88.560000,27.010000,1.57,0.49,0.15,230.98,147.05,
3,2022090800,56,35472.0,Rodger Saffold,4,2022-09-08 20:24:05.500000,76.0,BUF,left,88.640000,26.900000,1.44,0.89,0.14,232.38,145.42,
4,2022090800,56,35472.0,Rodger Saffold,5,2022-09-08 20:24:05.599999,76.0,BUF,left,88.720000,26.800000,1.29,1.24,0.13,233.36,141.95,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
501,2022090800,56,,football,18,2022-09-08 20:24:06.900000,,football,left,78.160004,36.650002,0.07,0.27,0.01,,,tackle
502,2022090800,56,,football,19,2022-09-08 20:24:07.000000,,football,left,78.230003,36.389999,0.05,0.18,0.27,,,
503,2022090800,56,,football,20,2022-09-08 20:24:07.099999,,football,left,78.230003,36.389999,0.04,0.13,0.00,,,
504,2022090800,56,,football,21,2022-09-08 20:24:07.200000,,football,left,78.220001,36.389999,0.02,0.09,0.00,,,
