In [None]:
from __future__ import print_function

import time, sys, os, copy
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' 
import numpy as np
import pandas as pd
from math import sqrt
import random
import subprocess
from collections import Counter

import matplotlib.pyplot as plt
import matplotlib.animation as animation
import matplotlib.lines as lines
import matplotlib.patches as patches

from multiprocessing import Pool
import multiprocessing

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout, TimeDistributed, BatchNormalization
from tensorflow.keras.layers import LSTM, GRU
from tensorflow.keras.optimizers import RMSprop, Adagrad, Adam, SGD
#from keras.models import load_model
import tensorflow.keras.backend as K



import warnings
warnings.filterwarnings("ignore")

tracking_data = []
other = []

for dirname, _, filenames in os.walk('../input/nfl-big-data-bowl-2021'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
        if 'week' in filename:
            tracking_data.append(os.path.join(dirname, filename))
        else:
            other.append(os.path.join(dirname, filename))

In [None]:
### functions for plotting and unrolling 


def get_sequences_from_game(tracking, play_info,game_info, gameId,  num_players = 14):
    ###
    game_info['homeTeamAbbr'] = game_info['homeTeamAbbr'].astype('category')
    teams = game_info['homeTeamAbbr'].value_counts().index
    team_dict = {t: n for n, t in enumerate(teams)}
    gameIndex = tracking['gameId'].value_counts().index
    defensive_lineup = ['CB','SS', 'FS', 'S', 'MLB', 'OLB', 'ILB', 'LB', 'DB', 'DE']
    offensive_lineup = ['WR', 'RB', 'HB','TE', 'FB', 'QB']
    sorter = defensive_lineup + offensive_lineup + ['NaN'] #nan is football
    sorterIndex = dict(zip(sorter, range(len(sorter))))
    sequences = []
    seq_play = []
    
    added_plays = [[] for _ in gameIndex]
    aborted_plays = [[] for _ in gameIndex]
    
    print('Starting with game: ' + str(gameId))
    start_game = time.time()
    playIds = tracking.query('gameId== ' + str(gameId))['playId'].value_counts().index
    for playId in playIds:


        ### compute meta info 
        play = tracking.query('playId == ' +str(playId) + 'and gameId== ' + str(gameId))
        yardline = play_info.query('playId == ' +str(playId) + 'and gameId== ' + str(gameId))
        absoluteYardlineNumber = yardline['absoluteYardlineNumber'].to_numpy()
        if np.isnan(absoluteYardlineNumber).any():
            print(gameId)
            print(playId)
            continue

        yardsLeft = absoluteYardlineNumber if play['playDirection'].all() == 'left' else 100 - absoluteYardlineNumber
        play['yardsLeft'] = np.ones(len(play))*yardsLeft/100
        play['down'] = np.ones(len(play))*yardline['down'].to_numpy()


        ## compute team info
        team_info = game_info.query('gameId=='+str(gameId))
        if yardline.possessionTeam.to_numpy() == team_info['homeTeamAbbr'].to_numpy():
            team = team_dict[team_info['visitorTeamAbbr'].to_numpy()[0]]
        else:
            team = team_dict[team_info['homeTeamAbbr'].to_numpy()[0]]
        play['teamInfo'] = np.ones(len(play))*team



        start = time.time()
        if (set(list(play.position.value_counts().index)) <= set(sorter)) and (len(play.displayName.value_counts().index)==num_players): 
            try:
                play = play.apply(compute_speed, axis=1)
                player_trajectories = []
                football = play.query('team=="football"')
                football = football.sort_values('frameId')
                devensive_players = 0
                off_player = 0 
                for player in sorter:
                    trajectory1 = play.loc[play['position']==player]
                    for i in trajectory1.displayName.value_counts().index:
                        if i == 'Football':
                            continue
                        if player in defensive_lineup:
                            devensive_players +=1    
                        else:
                            off_player +=1 
                        trajectory = trajectory1.loc[trajectory1['displayName']==i]
                        player_seq = trajectory.sort_values(['frameId'])
                        try:
                            dist_to_ball_x = np.absolute(player_seq[['x']].to_numpy()-football[['x']].to_numpy())
                            dist_to_ball_y = np.absolute(player_seq[['y']].to_numpy()-football[['y']].to_numpy())
                        except:
                            px = np.vstack([player_seq[['x']].to_numpy(),player_seq[['x']].to_numpy()[-1]])
                            py = np.vstack([player_seq[['y']].to_numpy(),player_seq[['y']].to_numpy()[-1]])
                            dist_to_ball_x = np.absolute(px-football[['x']].to_numpy())
                            dist_to_ball_y = np.absolute(py-football[['y']].to_numpy())
                        player_seq['x'] -=  football['x'].iloc[0]
                        player_seq['y'] -=  football['y'].iloc[0]
                        player_seq = player_seq[['x','y','v_x','v_y']].to_numpy()
                        final_player_traj = np.hstack((player_seq, dist_to_ball_x, dist_to_ball_y))
                        player_trajectories.append(final_player_traj)
                football['x'] -=  football['x'].iloc[0]
                football['y'] -=  football['y'].iloc[0]
                football_seq = football[['x','y']].to_numpy()
                a = np.hstack(player_trajectories)

                a = np.hstack((a,football_seq,np.expand_dims(trajectory['yardsLeft'].to_numpy(),-1), 
                              np.expand_dims(trajectory['down'].to_numpy(),-1)))#,
                              #np.expand_dims(trajectory['teamInfo'].to_numpy(),-1)))
                sequences.append(a)
                seq_play.append(yardline)
            except:
                print(playId)
        else:
            print(playId)
            continue
    print('Done with one game - Time Passed in Seconds: ' + str(time.time()-start_game))
    return sequences, seq_play, added_plays, aborted_plays




def create_football_field(linenumbers=True,
                          endzones=True,
                          highlight_line=False,
                          highlight_line_number=50,
                          highlighted_name='Line of Scrimmage',
                          fifty_is_los=False,
                          figsize=(12, 6.33)):
    """
    Function that plots the football field for viewing plays.
    Allows for showing or hiding endzones.
    """
    rect = patches.Rectangle((0, 0), 120, 53.3, linewidth=0.1,
                             edgecolor='r', facecolor='darkgreen', zorder=0)

    fig, ax = plt.subplots(1, figsize=figsize)
    ax.add_patch(rect)

    plt.plot([10, 10, 10, 20, 20, 30, 30, 40, 40, 50, 50, 60, 60, 70, 70, 80,
              80, 90, 90, 100, 100, 110, 110, 120, 0, 0, 120, 120],
             [0, 0, 53.3, 53.3, 0, 0, 53.3, 53.3, 0, 0, 53.3, 53.3, 0, 0, 53.3,
              53.3, 0, 0, 53.3, 53.3, 0, 0, 53.3, 53.3, 53.3, 0, 0, 53.3],
             color='white')
    if fifty_is_los:
        plt.plot([60, 60], [0, 53.3], color='gold')
        plt.text(62, 50, '<- Player Yardline at Snap', color='gold')
    # Endzones
    if endzones:
        ez1 = patches.Rectangle((0, 0), 10, 53.3,
                                linewidth=0.1,
                                edgecolor='r',
                                facecolor='blue',
                                alpha=0.2,
                                zorder=0)
        ez2 = patches.Rectangle((110, 0), 120, 53.3,
                                linewidth=0.1,
                                edgecolor='r',
                                facecolor='blue',
                                alpha=0.2,
                                zorder=0)
        ax.add_patch(ez1)
        ax.add_patch(ez2)
    plt.xlim(0, 120)
    plt.ylim(-5, 58.3)
    plt.axis('off')
    if linenumbers:
        for x in range(20, 110, 10):
            numb = x
            if x > 50:
                numb = 120 - x
            plt.text(x, 5, str(numb - 10),
                     horizontalalignment='center',
                     fontsize=20,  # fontname='Arial',
                     color='white')
            plt.text(x - 0.95, 53.3 - 5, str(numb - 10),
                     horizontalalignment='center',
                     fontsize=20,  # fontname='Arial',
                     color='white', rotation=180)
    if endzones:
        hash_range = range(11, 110)
    else:
        hash_range = range(1, 120)

    for x in hash_range:
        ax.plot([x, x], [0.4, 0.7], color='white')
        ax.plot([x, x], [53.0, 52.5], color='white')
        ax.plot([x, x], [22.91, 23.57], color='white')
        ax.plot([x, x], [29.73, 30.39], color='white')

    if highlight_line:
        hl = highlight_line_number + 10
        plt.plot([hl, hl], [0, 53.3], color='yellow')
        plt.text(hl + 2, 50, '<- {}'.format(highlighted_name),
                 color='yellow')
    return fig, ax



# 0. Introduction 

in this report we present our analysis for the NFL Big Data Bowl Competition.
One of the most interesting questions, posed by the NFL was: 
* How does a defense react to certain types of offensive plays?


In this report, we want to give a tool, that can estimate how defense is reacting to specific offensive plays. 
Therefore:

* We leveraged immitation learning to simulate defensive team behavior [(Paper)](https://arxiv.org/abs/1703.03121)
* Analysed player movement of the simulation
* Included Meta data of the game state to change behavior

### Case No. 1
Imagine you are the coach of the Denver Broncos - your team is a few yards away of scoring a touchdown and you and your team are, *of course*, interested how the opposing teams defense is going to behave. So you basically scatched the offensive play, and you know how the defensive team is going to line up.
This could look like:

In [None]:
### load example data
plays = pd.read_csv(other[-1])
tracking = pd.read_csv(tracking_data[-1])
games =pd.read_csv(other[1])
play = tracking.query('gameId == 2018123013 and playId == 1930')
play_red = play.query('frameId >= 11 and frameId <= 29')
yardlineNumber = plays.query('gameId==' + str(2018123013) + ' and playId==' + str(1930))['yardlineNumber'].item()
fig, ax = create_football_field(highlight_line=True, highlight_line_number=yardlineNumber)
example_play_home = play_red.query('team=="home"')
example_play_away = play_red.query('team == "away"')
example_football = play_red.query('team == "football"')
example_play_home.plot(x='x', y='y', kind='scatter', ax=ax, color='orange', s=30, legend='Away')
#example_play_away.plot(x='x', y='y', kind='scatter', ax=ax, color='blue', s=30, legend='Home')
example_football.plot(x='x', y='y', kind='scatter', ax=ax, color='red', s=30, legend='football')

plt.title('Scatch of the play before throwing')
plt.legend()
plt.show()

After scatching your offensive play, you basically need your defensive coach, who tells you how he would defend it. And he would scribble something on the board, which helps to adapt your offensive play. This is how the players actually moved before the pass was thrown: 

In [None]:
fig, ax = create_football_field(highlight_line=True, highlight_line_number=yardlineNumber)
example_play_home = play_red.query('team=="home"')
example_play_away = play_red.query('team == "away"')
example_football = play_red.query('team == "football"')
example_play_home.plot(x='x', y='y', kind='scatter', ax=ax, color='orange', s=30, legend='Away')
example_play_away.plot(x='x', y='y', kind='scatter', ax=ax, color='blue', s=30, legend='Home')
example_football.plot(x='x', y='y', kind='scatter', ax=ax, color='red', s=30, legend='football')

plt.title('Players Movement until the Quaterback threw the forward pass')
plt.legend()
plt.show()

Would you guessed the movement in a similar faishon? Or your defensive coach?

We present the **Average Defense Behavior Prediction Network**, which is able to simulate the average defensive team behavior.

# 1. Data

For training the model, we used 14 weeks of the regular season game data, 2 weeks for evaluation and one week for testing. 

the input data consists of all the plays conducted with 13 tracked players and the football.

For each player, we had the features: 
* absolute position,
* relative position to the ball,
* velocity in x and y coordinates 

for each timestep, we hand the model the information of:

* features of each player, 
* down, 
* yards left for the offensive team to get the next touchdown

We do NOT use the orientation of the players, as it is hard to predict the upper body orientation, as it is somewhat uncorrelated to the movement and have not really improved the prediction as it also needed to be estimated.

All the presented features resulted in a feature vector length of 82. 78 of those entries are features correlated to specific players 2 are the ball absolute position and the last 2 features are the meta data.

The players are sorted according to their team and position. first we included the features of the defensive team sorted after following positions: 

1. 'CB'
2. 'SS'
3. 'FS'
4. 'S'
5. 'DB'
6. 'DE'

The offensive team is sorted as follows: 


1. 'WR'
2. 'RB'
3. 'TE'
4. 'FB'
5. 'QB'

As the most positions are double staffed, we filled up the positions in the appearing order. So if the defensive team looks i.e. somewhat: 2CB-2S-2DE- 1DB, the positions where filled as CB-CB-S-S-DB-DE-DE. 


# 3. Model 

The problem of synthesising the defensive team can be formulated as predicting the next position of a set of previous positions of a single agent and collaborators. Therefore, our dataset consist of a sequence of demonstrations of the players positions and its meta data described in (2. Data). We assume, that every single agents identity does not change over a single demonstration - this means that the role does NOT change dynamically within the same sequence, this reduces our prediciton problem, as we do not need to match the agents with their corresponding models in training.

This problem is described in [Ross et al.](https://arxiv.org/abs/1011.0686) and is a subarea of reinforcement learning -> Policy Learning. 
The problem is abstracted to learning the action $a_k$ of agent k corresponding to policy $\pi_k$ as $\pi_k(s_k)=a_k$ where $s_k$ describes the state of agent k. This decentralized setting can be decomposed into minimizing $$\mathcal{L}_{imitation} = \sum_k \mathrm{E}_s[l(\pi_k(s_k))]$$ where $l$ determines the loss function: In our case its the L1-Loss.

To prevent the model to deveate to much from the training distribution we use DAgger to train the estimator. DAgger is coded as: 
1. Predict next action with estimator from the baseline dataset
2. Update trainig Data with prediction from 1. and create an updated dataset $\mathcal{D}_{star}$
3. Train Estimator with dataset $\mathcal{D}_{star}$

> The main idea is to use the learned policy's own prediction in the construction of subsequent states, thus simulating the test time performance during training. 
[Le et al.](https://arxiv.org/abs/1703.03121)

As estimator we use LSTM Networks as they are especially matching the requirements of learning sequential information. The training data are all the plays from ball snap until the (bitter) end. We use a sequence length of 15 frames and train with sequence overlap of 5 frames.
Overall we train with 62.000 training samples a 15 frame sequences with 82 features. The validation set contains 8.000 samples. The test set is the last week of the regualar season.

# 3. Results

Due to a lag of time, we did not train our networks till the validation set did not further improve or build ensamble models -> those will be found in the github repository belonging to this report. The models discussed here hat a mean training error of 1.2m, were not pretrained with the pure immitation dataset and were not fully converged at the time of submitting the report.


Let's compare the estimated trajectories from ball snap to Quarterback forward pass with the true trajectories to get a feeling for the accuracy of the predictions:

In [None]:
#### Functions
def compute_speed(data):
    data['v_x'] = np.sin(data['dir']*np.pi/180) * data['s']
    data['v_y'] = np.cos(data['dir']*np.pi/180) * data['s']
    data['v_theta'] = np.arctan(data['v_x']/data['v_y']) if data['v_y'] != 0 else 0 
    return data

def roll_out(params):

    prev_feature_vector, legacy_feature_vector, pos_prediction, roleOrder = params
    #prev_feature_vector = np.concatenate((prev_feature_vector[:roleOrder*13], np.zeros(3),prev_feature_vector[roleOrder*13:]   ))
    #legacy_feature_vector = np.concatenate((legacy_feature_vector[:roleOrder*13], np.zeros(3),legacy_feature_vector[roleOrder*13:]   ))
    ### Automate rollout for different amount of players
    legacy_current = legacy_feature_vector[:,0:78] # 308 = 28*11
    ball_current = legacy_feature_vector[:,-4:-2] # 317 = 28*11 +9
    meta_current = legacy_feature_vector[:,-2:]
    legacy = legacy_current.reshape(legacy_current.shape[0],13,6)#30,13)
    previous = prev_feature_vector[:,:78].reshape(legacy_current.shape[0],13,6)
    ball = ball_current[:,0:2]
    new_matrix = np.array(legacy)


    for person in range(pos_prediction.shape[0]):
        player = legacy[:,person]
        player[:,:2] = pos_prediction[person,:, :] #position
        player[:,2:4] = pos_prediction[person,:, :] - previous[:,person, :2] #velocity
        player[:,4:] = np.abs(player[:, :2]-ball)
        new_matrix[:, person] = player
    new_matrix = new_matrix.reshape(new_matrix.shape[0],new_matrix.shape[1]* new_matrix.shape[2])
    new_feature_vector = np.hstack([new_matrix,ball,meta_current])
    return new_feature_vector

def roll_out_val(params):
    prev_feature_vector, legacy_feature_vector, pos_prediction, roleOrder = params
    #prev_feature_vector = np.concatenate((prev_feature_vector[:roleOrder*13], np.zeros(3),prev_feature_vector[roleOrder*13:]   ))
    #legacy_feature_vector = np.concatenate((legacy_feature_vector[:roleOrder*13], np.zeros(3),legacy_feature_vector[roleOrder*13:]   ))

    legacy_current = legacy_feature_vector[:78] # 308 = 28*11
    ball_current = legacy_feature_vector[-4:-2] # 317 = 28*11 +9
    meta_current = legacy_feature_vector[-2:]
    legacy = legacy_current.reshape(13,6)#30,13)
    previous = prev_feature_vector[:78].reshape(13,6)
    ball = ball_current[0:2]
    new_matrix = np.array(legacy)


    for person in range(pos_prediction.shape[0]):
        player = legacy[person]
        player[:2] = pos_prediction[person,:] #position
        player[2:4] = pos_prediction[person, :] - previous[person, :2] #velocity
        player[4:] = np.abs(player[ :2]-ball)
        new_matrix[person] = player
    new_matrix = new_matrix.flatten()
    new_feature_vector = np.concatenate((new_matrix,ball,meta_current))
    return new_feature_vector



In [None]:
def get_sequences_from_game(tracking, play_info,game_info, gameId,  num_players = 14):
    ###
    game_info['homeTeamAbbr'] = game_info['homeTeamAbbr'].astype('category')
    teams = game_info['homeTeamAbbr'].value_counts().index
    team_dict = {t: n for n, t in enumerate(teams)}
    gameIndex = tracking['gameId'].value_counts().index
    defensive_lineup = ['CB','SS', 'FS', 'S', 'MLB', 'OLB', 'ILB', 'LB', 'DB', 'DE']
    offensive_lineup = ['WR', 'RB', 'HB','TE', 'FB', 'QB']
    sorter = defensive_lineup + offensive_lineup + ['NaN'] #nan is football
    sorterIndex = dict(zip(sorter, range(len(sorter))))
    sequences = []
    seq_play = []
    
    added_plays = [[] for _ in gameIndex]
    aborted_plays = [[] for _ in gameIndex]
    
    print('Starting with game: ' + str(gameId))
    start_game = time.time()
    playIds = tracking.query('gameId== ' + str(gameId))['playId'].value_counts().index
    for playId in playIds:


        ### compute meta info 
        play = tracking.query('playId == ' +str(playId) + 'and gameId== ' + str(gameId))
        yardline = play_info.query('playId == ' +str(playId) + 'and gameId== ' + str(gameId))
        absoluteYardlineNumber = yardline['absoluteYardlineNumber'].to_numpy()
        if np.isnan(absoluteYardlineNumber).any():
            print(gameId)
            print(playId)
            continue

        yardsLeft = absoluteYardlineNumber if play['playDirection'].all() == 'left' else 100 - absoluteYardlineNumber
        play['yardsLeft'] = np.ones(len(play))*yardsLeft/100
        play['down'] = np.ones(len(play))*yardline['down'].to_numpy()


        ## compute team info
        team_info = game_info.query('gameId=='+str(gameId))
        if yardline.possessionTeam.to_numpy() == team_info['homeTeamAbbr'].to_numpy():
            team = team_dict[team_info['visitorTeamAbbr'].to_numpy()[0]]
        else:
            team = team_dict[team_info['homeTeamAbbr'].to_numpy()[0]]
        play['teamInfo'] = np.ones(len(play))*team



        start = time.time()
        if (set(list(play.position.value_counts().index)) <= set(sorter)) and (len(play.displayName.value_counts().index)==num_players): 
            try:
                play = play.apply(compute_speed, axis=1)
                player_trajectories = []
                football = play.query('team=="football"')
                football = football.sort_values('frameId')
                devensive_players = 0
                off_player = 0 
                for player in sorter:
                    trajectory1 = play.loc[play['position']==player]
                    for i in trajectory1.displayName.value_counts().index:
                        if i == 'Football':
                            continue
                        if player in defensive_lineup:
                            devensive_players +=1    
                        else:
                            off_player +=1 
                        trajectory = trajectory1.loc[trajectory1['displayName']==i]
                        player_seq = trajectory.sort_values(['frameId'])
                        try:
                            dist_to_ball_x = np.absolute(player_seq[['x']].to_numpy()-football[['x']].to_numpy())
                            dist_to_ball_y = np.absolute(player_seq[['y']].to_numpy()-football[['y']].to_numpy())
                        except:
                            px = np.vstack([player_seq[['x']].to_numpy(),player_seq[['x']].to_numpy()[-1]])
                            py = np.vstack([player_seq[['y']].to_numpy(),player_seq[['y']].to_numpy()[-1]])
                            dist_to_ball_x = np.absolute(px-football[['x']].to_numpy())
                            dist_to_ball_y = np.absolute(py-football[['y']].to_numpy())
                        player_seq['x'] -=  football['x'].iloc[0]
                        player_seq['y'] -=  football['y'].iloc[0]
                        player_seq = player_seq[['x','y','v_x','v_y']].to_numpy()
                        final_player_traj = np.hstack((player_seq, dist_to_ball_x, dist_to_ball_y))
                        player_trajectories.append(final_player_traj)
                football['x'] -=  football['x'].iloc[0]
                football['y'] -=  football['y'].iloc[0]
                football_seq = football[['x','y']].to_numpy()
                a = np.hstack(player_trajectories)

                a = np.hstack((a,football_seq,np.expand_dims(trajectory['yardsLeft'].to_numpy(),-1), 
                              np.expand_dims(trajectory['down'].to_numpy(),-1)))#,
                              #np.expand_dims(trajectory['teamInfo'].to_numpy(),-1)))
                sequences.append(a)
                seq_play.append(yardline)
            except:
                print(playId)
        else:
            print(playId)
            continue
    print('Done with one game - Time Passed in Seconds: ' + str(time.time()-start_game))
    return sequences, seq_play, added_plays, aborted_plays



In [None]:


defensive_lineup = ['CB','SS', 'FS', 'S', 'MLB', 'OLB', 'ILB', 'LB', 'DB', 'DE']
### load models 
model_list = 7#defensive_lineup
numOfPrevSteps = 15
featurelen = 82
print('Load models...')
policies = []
#### Load the model
for model_name in range(model_list):
    model = Sequential()
    model.add(tf.keras.layers.Bidirectional(LSTM(512 ,return_sequences=True)))
    model.add(tf.keras.layers.Bidirectional(LSTM(512 , return_sequences=False)))
    model.add(Dense(32,activation='relu'))
    model.add(Dense(2, activation = 'linear') )
    model.compile(loss='mae', optimizer='adam')
    #model.load_weights(model_name)
    #model.load_weights('init_weights_minibatch1024_10epochs.h5') # Load the pretrained model
    model.reset_states()
    policies.append(model)   
print('Loaded!')

### loading pretrained models:
val_policies = []
for n,model in enumerate(range(model_list)): 
    val_policies.append(tf.keras.models.load_model('/kaggle/input/train-average-model/model_'+str(n)))
val_policies
for n,model in enumerate(range(model_list)): 
    val_policies[n].save('model_'+str(n))

In [None]:
plays = pd.read_csv(other[-1])
tracking = pd.read_csv(tracking_data[-1])
games =pd.read_csv(other[1])
sequences, seq_play, added_plays, aborted_plays = get_sequences_from_game(tracking, plays, games, gameId = 2018123013)

In [None]:
### load example data
gId = 2018123013
pId = 3286
plays = pd.read_csv(other[-1])
tracking = pd.read_csv(tracking_data[-1])
games =pd.read_csv(other[1])
play = tracking.query('gameId =='+str(gId)+ ' and playId ==' +str(pId))

frameId_snap = play.query( 'event == "ball_snap"').frameId.to_numpy()[0]
frameId_pass = play.query('event=="pass_forward"').frameId.to_numpy()[0]


play_red = play.query('frameId >= '+str(frameId_snap)+' and frameId <=' + str(frameId_pass))
yardlineNumber = plays.query('gameId==' + str(gId) + ' and playId==' + str(pId))['yardlineNumber'].item()
fig, ax = create_football_field(highlight_line=True, highlight_line_number=yardlineNumber)
example_play_home = play_red.query('team=="home"')
example_play_away = play_red.query('team == "away"')
example_football = play_red.query('team == "football"')
example_play_home.plot(x='x', y='y', kind='scatter', ax=ax, color='orange', s=30, legend='Away')
example_play_away.plot(x='x', y='y', kind='scatter', ax=ax, color='blue', s=30, legend='Home')
example_football.plot(x='x', y='y', kind='scatter', ax=ax, color='red', s=30, legend='football')

plt.title(seq_play[15].playDescription.item())
plt.legend()
plt.show()

## Visual Investigation 

Let's investigate the play where Keenum is passing deep left to Sutton and gained 25 yards.

Above we can see the play from ball snap to forward pass. Denver is shown in yellow while the LA Chargers are defending in blue.

Below we can see, how our algorithm is predicting the defensive sequence.


In [None]:
## forward pass through the model 
seq_play[15]
sample_seq = sequences[15]
batchSize=10
sequence_length= 15
k=1
pred = []
for index in range(len(policies)):
    val_policies[index].reset_states()

for i in range(sample_seq.shape[0]-2): ### roll out over the whole play 
    next_prediction_all = []
    n_fts = 6
    if i<15:
        continue
    for index in range(len(policies)):
        inp = np.array(sample_seq[i-sequence_length:i,:])
        next_prediction = val_policies[index].predict(np.expand_dims(inp,0))
        next_prediction_all.append(next_prediction.flatten())
    next_prediction_all = np.vstack(next_prediction_all)
    pred.append(next_prediction_all)

    ## rollout 
    for index in range(len(policies)):
        prev_feature = sample_seq[i+k,:].copy()
        legacy_feature =  sample_seq[i+k+1, :].copy()
        order = np.empty(batchSize).astype(int)
        params = (prev_feature, legacy_feature, next_prediction_all, order)
        result = roll_out_val(params)
        sample_seq[i+k+1,:] = np.array(result)

In [None]:
yardlineNumber = plays.query('gameId==' + str(gId) + ' and playId==' + str(pId))['yardlineNumber'].item()
fig, ax = create_football_field(highlight_line=True, highlight_line_number=yardlineNumber)
example_play_home = play_red.query('team=="home"')
example_play_away = play_red.query('team == "away"')
example_football = play_red.query('team == "football"')
example_football_ = play.query('team == "football"')

example_play_home.plot(x='x', y='y', kind='scatter', ax=ax, color='orange', s=30, legend='Away')
#example_play_away.plot(x='x', y='y', kind='scatter', ax=ax, color='blue', s=30, legend='Home')
example_football.plot(x='x', y='y', kind='scatter', ax=ax, color='red', s=30, legend='football')

for p in pred[:frameId_pass]: 
    offset = [example_football_.query('frameId ==1')['x'].to_numpy(),  example_football_.query('frameId ==1')['y'].to_numpy()]
    ax.scatter(p[:,0].flatten() + offset[0],
               p[:,1].flatten() + offset[1], color='blue', s=30)


Seems like our model is a little bit stuttery and a few inches of, yet it already captures the movement of players and their running destination. 


## How is Denver doing in our first example AND how are our policies doing?
Well to answer this quesition just have a look into our models prediction for the respective play


In [None]:
## forward pass through the model 
seq_play[0]
sample_seq = sequences[0]

gId = 2018123013
pId = 1930

batchSize=10
sequence_length= 15
k=1
pred = []
for index in range(len(policies)):
    val_policies[index].reset_states()

for i in range(sample_seq.shape[0]-2): ### roll out over the whole play 
    next_prediction_all = []
    n_fts = 6
    if i<15:
        continue
    for index in range(len(policies)):
        inp = np.array(sample_seq[i-sequence_length:i,:])
        next_prediction = val_policies[index].predict(np.expand_dims(inp,0))
        next_prediction_all.append(next_prediction.flatten())
    next_prediction_all = np.vstack(next_prediction_all)
    pred.append(next_prediction_all)

    ## rollout 
    for index in range(len(policies)):
        prev_feature = sample_seq[i+k,:].copy()
        legacy_feature =  sample_seq[i+k+1, :].copy()
        order = np.empty(batchSize).astype(int)
        params = (prev_feature, legacy_feature, next_prediction_all, order)
        result = roll_out_val(params)
        sample_seq[i+k+1,:] = np.array(result)

play = tracking.query('gameId =='+str(gId)+ ' and playId ==' +str(pId))

frameId_snap = play.query( 'event == "ball_snap"').frameId.to_numpy()[0]
frameId_pass = play.query('event=="pass_forward"').frameId.to_numpy()[0]


play_red = play.query('frameId >= '+str(frameId_snap)+' and frameId <=' + str(frameId_pass))        
        
        
yardlineNumber = plays.query('gameId==' + str(gId) + ' and playId==' + str(pId))['yardlineNumber'].item()
fig, ax = create_football_field(highlight_line=True, highlight_line_number=yardlineNumber)
example_play_home = play_red.query('team=="home"')
example_play_away = play_red.query('team == "away"')
example_football = play_red.query('team == "football"')
example_football_ = play.query('team == "football"')

example_play_home.plot(x='x', y='y', kind='scatter', ax=ax, color='orange', s=30, legend='Away')
#example_play_away.plot(x='x', y='y', kind='scatter', ax=ax, color='blue', s=30, legend='Home')
example_football.plot(x='x', y='y', kind='scatter', ax=ax, color='red', s=30, legend='football')

for p in pred[:frameId_pass]: 
    offset = [example_football_.query('frameId ==1')['x'].to_numpy(),  example_football_.query('frameId ==1')['y'].to_numpy()]
    ax.scatter(p[:,0].flatten() + offset[0],
               p[:,1].flatten() + offset[1], color='blue', s=30)
plt.title(seq_play[0].playDescription.item())
plt.legend()
plt.show()


The model seems to be a little bit more defensive then the players actually would be, but it predicts the movement of the players for this simple formation well, while in the previous (more complicated play) the defensive positions were predicted worse.

# 4. Summary and Future Outlook

We trained a neural network witch is able to capture general movement of players. Theoretically, it is possible to scatch offensive plays and let the model generate the defensive player movement. The models are not fully converged and we assume that after a few more hours of training we are going to have a more precise model without any wiggeling. With the current model it is not really useful to change meta data as number of down, yards-to-go or player alignment, as the details of the predicted position would vanish in the noise of the model, yet I am confident future models in the [Repository](https://github.com/marcimarc1/NFL2021) are going to be able to capture small differences in defender movement.

# -1. Links and Resources
[Repo](https://github.com/marcimarc1/NFL2021)

[DAgger](https://arxiv.org/abs/1011.0686)

[Immitation Learning](https://arxiv.org/abs/1703.03121)
