In [None]:

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

train_df = pd.read_csv('/kaggle/input/nfl-big-data-bowl-2020/train.csv')

In [None]:
def get_horizontal_and_vertical(df, af, f, hf, vf):
    b = (df[af] >= 0) & (df[af] < 90)
    d = df.loc[b, af]
    df.loc[b, vf] = np.cos(np.deg2rad(d)) * df.loc[b, f]
    df.loc[b, hf] = np.sin(np.deg2rad(d)) * df.loc[b, f]

    b = (df[af] >= 90) & (df[af] < 180)
    d = df.loc[b, af]
    df.loc[b, vf] = -1 * np.cos(np.deg2rad(180 - d)) * df.loc[b, f]
    df.loc[b, hf] = np.sin(np.deg2rad(180 - d)) * df.loc[b, f]

    b = (df[af] >= 180) & (df[af] < 270)
    d = df.loc[b, af]
    df.loc[b, vf] = -1 * np.cos(np.deg2rad(-180 + d)) * df.loc[b, f]
    df.loc[b, hf] = -1 * np.sin(np.deg2rad(-180 + d)) * df.loc[b, f]

    b = (df[af] >= 270) & (df[af] < 360)
    d = df.loc[b, af]
    df.loc[b, vf] = np.cos(np.deg2rad(360 - d)) * df.loc[b, f]
    df.loc[b, hf] = -1 * np.sin(np.deg2rad(360 - d)) * df.loc[b, f]

def preprocessing(df):    
    df.loc[df['PossessionTeam'] == 'ARZ', 'PossessionTeam'] = 'ARI'
    df.loc[df['PossessionTeam'] == 'BLT', 'PossessionTeam'] = 'BAL'
    df.loc[df['PossessionTeam'] == 'CLV', 'PossessionTeam'] = 'CLE'
    df.loc[df['PossessionTeam'] == 'HST', 'PossessionTeam'] = 'HOU'

    df.loc[df['FieldPosition'] == 'ARZ', 'FieldPosition'] = 'ARI'
    df.loc[df['FieldPosition'] == 'BLT', 'FieldPosition'] = 'BAL'
    df.loc[df['FieldPosition'] == 'CLV', 'FieldPosition'] = 'CLE'
    df.loc[df['FieldPosition'] == 'HST', 'FieldPosition'] = 'HOU'

    # define attacking side
    df['IsRushing'] = df['NflIdRusher'] == df['NflId']
    temp_df = df[df['IsRushing']].groupby(['PlayId'])['Team'].first().reset_index().rename(
        columns={'Team': 'AttackingTeam'})
    df = pd.merge(df, temp_df, on='PlayId', how='left')
    df['IsOffense'] = df['Team'] == df['AttackingTeam']

    # make all attacking team rushing to the right from the left.
    target_bools = df['PlayDirection'] == 'left'
    df.loc[target_bools, 'PlayDirection'] = 'right'

    df.loc[target_bools, 'X'] = 120 - df.loc[target_bools, 'X']
    df['X'] -= 10
    df.loc[target_bools, 'Y'] = 160 / 3 - df.loc[target_bools, 'Y']

    df.loc[target_bools, 'Orientation'] = np.mod(180 + df.loc[target_bools, 'Orientation'], 360)
    df.loc[target_bools, 'Dir'] = np.mod(180 + df.loc[target_bools, 'Dir'], 360)
    df.loc[df['Season'] == 2017, 'Orientation'] = df.loc[df['Season'] == 2017, 'Dir']

    to_the_other_bools = df['FieldPosition'].fillna('') != df['PossessionTeam']
    df.loc[to_the_other_bools, 'YardLine'] = 50 + (50 - df.loc[to_the_other_bools, 'YardLine'])
    
    
    df['XFromYardLine'] = df['X'] - df['YardLine']
    df['YFromMidLIne'] = df['Y'] - 160/3
    
    # team historical statistic
    df.loc[df['AttackingTeam'] == 'home', 'AttackingTeamAbbr'] = df.loc[df['AttackingTeam'] == 'home', 'HomeTeamAbbr']
    df.loc[df['AttackingTeam'] == 'away', 'AttackingTeamAbbr'] = df.loc[
        df['AttackingTeam'] == 'away', 'VisitorTeamAbbr']
    df.loc[df['AttackingTeam'] == 'home', 'DefendingTeamAbbr'] = df.loc[
        df['AttackingTeam'] == 'home', 'VisitorTeamAbbr']
    df.loc[df['AttackingTeam'] == 'away', 'DefendingTeamAbbr'] = df.loc[df['AttackingTeam'] == 'away', 'HomeTeamAbbr']

    df['ATDFPair'] = df['AttackingTeamAbbr'] + df['DefendingTeamAbbr']
    return df

train_df = preprocessing(train_df)

In [None]:
def pitch_control_process(train_df):

    yard_line_group = train_df.groupby('PlayId')['YardLine'].first()



    offense_train = train_df[train_df.IsOffense == True]
    defense_train = train_df[train_df.IsOffense == False]

    offense_data = offense_train.groupby(['NflId','PlayId'])['DisplayName'].count().reset_index()
    defense_data = defense_train.groupby(['NflId','PlayId'])['DisplayName'].count().reset_index()

    X_offense = np.array(offense_train.groupby('PlayId')['X'].apply(np.array)-yard_line_group)
    Y_offense = np.array(offense_train.groupby('PlayId')['Y'].apply(np.array))

    X_defense = np.array(defense_train.groupby('PlayId')['X'].apply(np.array)-yard_line_group)
    Y_defense = np.array(defense_train.groupby('PlayId')['Y'].apply(np.array))

    angle_offense = np.array(offense_train.groupby('PlayId')['Orientation'].apply(np.array)*3.1415926/180)
    angle_defense = np.array(defense_train.groupby('PlayId')['Orientation'].apply(np.array)*3.1415926/180)

    speed_offense = np.array(offense_train.groupby('PlayId')['S'].apply(np.array))
    speed_defense = np.array(offense_train.groupby('PlayId')['S'].apply(np.array))

    rusher_X = np.array(offense_train[offense_train.NflIdRusher == offense_train.NflId]['X'].apply(np.array))
    rusher_Y = np.array(offense_train[offense_train.NflIdRusher == offense_train.NflId]['Y'].apply(np.array))
    
    return X_offense, Y_offense, X_defense, Y_defense, angle_offense, angle_defense, speed_offense, speed_defense, rusher_X, rusher_Y

In [None]:
X_offense, Y_offense, X_defense, Y_defense, angle_offense, angle_defense, speed_offense, speed_defense, rusher_X, rusher_Y = pitch_control_process(train_df)

In [None]:
rusher_X

In [None]:
def radius_calc(dist_to_ball):
    ''' I know this function is a bit awkward but there is not the exact formula in the paper,
    so I try to find something polynomial resembling
    Please consider this function as a parameter rather than fixed
    I'm sure experts in NFL could find a way better curve for this'''
    return 4 + 6 * (dist_to_ball >= 15) + (dist_to_ball ** 3) / 560 * (dist_to_ball < 15)

def compute_influence( speed, x_coord, y_coord, theta, rusher_X, rusher_Y):
    '''Compute the influence of a certain player over a coordinate (x, y) of the pitch
    '''


    player_coords = np.array([x_coord, y_coord]).T
    S_ratio = (speed / 13) ** 2    # we set max_speed to 13 m/s
    dist_to_ball = np.sqrt(np.square(x_coord-rusher_X) + np.square(y_coord-rusher_Y))
    RADIUS = radius_calc(dist_to_ball)                     # to be discussed/improved

    S_matrix = np.array([[RADIUS * (1 + S_ratio), np.zeros(11)], [np.zeros(11), RADIUS * (1 - S_ratio)]])
    R_matrix = np.array([[np.cos(theta), - np.sin(theta)], [np.sin(theta), np.cos(theta)]])
    COV_matrix = np.einsum('ij...,jk...->ik...',np.einsum('ij...,jk...->ik...',np.einsum('ij...,jk...->ik...', R_matrix, S_matrix),S_matrix),np.linalg.inv(R_matrix.T).T)

    var_x = np.sqrt(COV_matrix[0,0])
    var_y = np.sqrt(COV_matrix[1,1])
    cor = COV_matrix[0,1]/var_x/var_y
    norm_fact = (1 / 2 * np.pi) * (1 / np.sqrt(np.linalg.det(COV_matrix.T)))    
    
    mu_play = player_coords + (np.array([speed,speed]) * np.array([np.cos(theta), np.sin(theta)]) / 2).T

    intermed_scalar_player = np.einsum('ij,ji->i', np.einsum('ij,ijk->ik', (player_coords - mu_play) ,np.linalg.inv(COV_matrix.T)),np.transpose((player_coords - mu_play)))
    player_influence = norm_fact * np.exp(- .5 * intermed_scalar_player)
    

    return player_influence, var_x, var_y, cor, mu_play



def func(xx,yy,off_var_x,off_var_y,off_cor,off_player_influence,off_mu_play,def_var_x,def_var_y,def_cor,def_player_influence,def_mu_play):
    return 1/(1+np.exp(-np.sum(1/off_player_influence.reshape(-1,1,1)*np.exp(-0.5/(1-np.square(off_cor.reshape(-1,1,1)))*((np.square((xx.reshape(-1,1) - off_mu_play[:,0]).T.reshape(len(off_mu_play[:,0]),1,-1))/np.square(off_var_x.reshape(-1,1,1))+np.square((yy.reshape(-1,1) -off_mu_play[:,1]).T.reshape(len(off_mu_play[:,1]),-1,1))/np.square(off_var_y.reshape(-1,1,1))- 2 * off_cor.reshape(-1,1,1) * (xx.reshape(-1,1) - off_mu_play[:,0]).T.reshape(len(off_mu_play[:,0]),1,-1)*(yy.reshape(-1,1) -off_mu_play[:,1]).T.reshape(len(off_mu_play[:,1]),-1,1) / ((off_var_x*off_var_y).reshape(-1,1,1)))))*((1/2*np.pi) * (1/((off_var_x*off_var_y).reshape(-1,1,1)*np.sqrt(1-np.square(off_cor.reshape(-1,1,1)))))),axis=0)+\
                        np.sum(1/def_player_influence.reshape(-1,1,1)*np.exp(-0.5/(1-np.square(def_cor.reshape(-1,1,1)))*((np.square((xx.reshape(-1,1) - def_mu_play[:,0]).T.reshape(len(def_mu_play[:,0]),1,-1))/np.square(def_var_x.reshape(-1,1,1))+np.square((yy.reshape(-1,1) -def_mu_play[:,1]).T.reshape(len(def_mu_play[:,1]),-1,1))/np.square(def_var_y.reshape(-1,1,1))- 2 * def_cor.reshape(-1,1,1) * (xx.reshape(-1,1) - def_mu_play[:,0]).T.reshape(len(def_mu_play[:,0]),1,-1)*(yy.reshape(-1,1) -def_mu_play[:,1]).T.reshape(len(def_mu_play[:,1]),-1,1) / ((def_var_x*def_var_y).reshape(-1,1,1)))))*((1/2*np.pi) * (1/((def_var_x*def_var_y).reshape(-1,1,1)*np.sqrt(1-np.square(def_cor.reshape(-1,1,1)))))),axis=0)))




In [None]:
# x = np.arange(-10, 50, 0.4)
# y = np.arange(0, 50, 0.5)
# xx, yy = np.meshgrid(x, y, sparse=True)



pitch_control = []

for i in range(len(X_offense)):
    off_player_influence, off_var_x, off_var_y, off_cor, off_mu_play = compute_influence(speed_offense[i],X_offense[i],Y_offense[i],angle_offense[i],rusher_X[i],rusher_Y[i])
    def_player_influence, def_var_x, def_var_y, def_cor, def_mu_play = compute_influence(speed_defense[i],X_defense[i],Y_defense[i],angle_defense[i],rusher_X[i],rusher_Y[i])
    
    pitch_control.append(func(np.linspace(-5,10,60),np.linspace(0,50,100),off_var_x,off_var_y,off_cor,off_player_influence, off_mu_play,def_var_x,def_var_y,def_cor,def_player_influence, def_mu_play ))

In [None]:
import matplotlib.pyplot as plt
plt.pcolor(X, Y, pitch_control[7])

In [None]:
# features_array = np.array(pitch_control ).reshape(-1,np.array(pitch_control).shape[1]*np.array(pitch_control).shape[2])