In [None]:
# Apparently you may use different seed values at each stage
seed_value= 0

# 1. Set the `PYTHONHASHSEED` environment variable at a fixed value
import os
os.environ['PYTHONHASHSEED']=str(seed_value)

# 2. Set the `python` built-in pseudo-random generator at a fixed value
import random
random.seed(seed_value)

# 3. Set the `numpy` pseudo-random generator at a fixed value
import numpy as np
np.random.seed(seed_value)
from scipy import constants
import scipy
# 4. Set the `tensorflow` pseudo-random generator at a fixed value
import tensorflow as tf
tf.set_random_seed(seed_value)

import pandas as pd
from scipy.spatial import Voronoi, voronoi_plot_2d
from scipy.spatial import ConvexHull

import sklearn.metrics as mtr
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer
import tqdm
from tqdm import tqdm_notebook
import keras
import sys
np.set_printoptions(threshold=sys.maxsize)
pd.set_option('display.max_columns', 300)
pd.set_option('display.max_rows', 150)

import warnings
warnings.filterwarnings("ignore",message="All-NaN slice encountered")
from keras.models import Sequential
from keras.callbacks import Callback, EarlyStopping
from keras.layers import Dense,BatchNormalization,Dropout
from keras.optimizers import RMSprop,Adam, SGD
from keras.layers import Dense, Activation
from keras.callbacks import ReduceLROnPlateau
from sklearn.model_selection import train_test_split, GroupKFold, KFold, GroupShuffleSplit,RepeatedKFold
import keras.backend as K
import datetime

## 1. Load Data

In [None]:
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

train = pd.read_csv('../input/nfl-big-data-bowl-2020/train.csv', dtype={'WindSpeed': 'object'})
outcomes = train[['GameId','PlayId','Yards']].drop_duplicates()

## 2. Preprocessing

In [None]:
def Preprocessing(train):
    """
    Clean Data and perform FE globally
    """
    # Impute Nulls for DefendersInTheBox
    train['DefendersInTheBox'].fillna(7, inplace = True)
    
    train['Orientation'].fillna(np.nanmean(train['Orientation']), inplace=True)
    train['Dir'].fillna(np.nanmean(train['Dir']), inplace=True)
    
    # PlayerHeight: in inches, 1ft=12in; PlayerBMI
    train['PlayerHeight'] = train['PlayerHeight'].apply(lambda x: 12*int(x.split('-')[0])+int(x.split('-')[1]))
    train['PlayerBMI'] = 703*(train['PlayerWeight']/(train['PlayerHeight'])**2)
    
    # Team name mismatch https://www.kaggle.com/c/nfl-big-data-bowl-2020/discussion/112303#latest-667885
    map_abbr = {'ARI': 'ARZ', 'BAL': 'BLT', 'CLE': 'CLV', 'HOU': 'HST'}
    for abb in train['PossessionTeam'].unique():
        map_abbr[abb] = abb

    train['PossessionTeam'] = train['PossessionTeam'].map(map_abbr)
    train['HomeTeamAbbr'] = train['HomeTeamAbbr'].map(map_abbr)
    train['VisitorTeamAbbr'] = train['VisitorTeamAbbr'].map(map_abbr)
    
    # Speed 
    train.loc[train['Season'] == 2017, 'S'] = (train['S'][train['Season'] == 2017] - 2.4355) / 1.2930 * 1.4551 + 2.7570
    train.loc[train['Season'] == 2017, 'Orientation'] = ((train['Orientation'][train['Season'] == 2017] + 90) % 360)
    train['Rusher'] = train['NflIdRusher'] == train['NflId']
    return train

## 3. Feature Engineering

In [None]:

def create_features(df, deploy=False):
    
    def strtoseconds(txt):
        txt = txt.split(':')
        ans = int(txt[0])*60 + int(txt[1]) + int(txt[2])/60
        return ans

    def new_X(x_coordinate, play_direction):
        """
        Adjust X based on play direction
        """
        
        if play_direction == 'left':
            return 120.0 - x_coordinate
        else:
            return x_coordinate

    def new_line(rush_team, field_position, yardline):
        """
        adjust yard line
        """
        if rush_team == field_position:
            # offense starting at X = 0 plus the 10 yard endzone plus the line of scrimmage
            return 10.0 + yardline
        else:
            # half the field plus the yards between midfield and the line of scrimmage
            return 60.0 + (50 - yardline)

    def new_orientation(angle, play_direction):
        if play_direction == 'left':
            new_angle = 360.0 - angle
            if new_angle == 360.0:
                new_angle = 0.0
            return new_angle
        else:
            return angle

    def euclidean_distance(x1,y1,x2,y2):
        x_diff = (x1-x2)**2
        y_diff = (y1-y2)**2

        return np.sqrt(x_diff + y_diff)

    def voronoi_rusher_vertices(play_id, train):
        """
        create voronoi plot based on player coordinates and fetch 
        rusher's vertices
        """
        df = train[train.PlayId == play_id]
        xy = df[['X', 'Y']].values
        n_points = xy.shape[0]
        xy1 = xy.copy()
        xy1[:,1] = - xy[:,1]
        xy2 = xy.copy()
        xy2[:,1] = 320/3 - xy[:,1]
        xy3 = xy.copy()
        xy3[:,0] = 20 - xy[:,0]
        xy4 = xy.copy()
        xy4[:,0] = 220 - xy[:,0]
        xy = np.concatenate((xy, xy1, xy2, xy3, xy4), axis=0)
        Rusher = df['Rusher'].values
        vor = Voronoi(xy)
        for r in range(n_points):
            region = vor.regions[vor.point_region[r]]
            if not -1 in region:
                polygon = [vor.vertices[i] for i in region]
                if Rusher[r]:
                    rusher_polygon = polygon
        return rusher_polygon
    
    def voronoi_volumes(play_id, train):
        """
        create voronoi plot based on player coordinates and fetch 
        the convex hull of rusher's area
        """
        df = train[train.PlayId == play_id]
        xy = df[['X', 'Y']].values
        n_points = xy.shape[0]
        xy1 = xy.copy()
        xy1[:,1] = - xy[:,1]
        xy2 = xy.copy()
        xy2[:,1] = 320/3 - xy[:,1]
        xy3 = xy.copy()
        xy3[:,0] = 20 - xy[:,0]
        xy4 = xy.copy()
        xy4[:,0] = 220 - xy[:,0]
        xy = np.concatenate((xy, xy1, xy2, xy3, xy4), axis=0)
        Rusher = df['Rusher'].values

        vor = Voronoi(xy)
        volume = np.zeros(n_points)
        
        for r in range(n_points):
            region = vor.regions[vor.point_region[r]]
            if not -1 in region:
                polygon = [vor.vertices[i] for i in region]
                volume[r] = ConvexHull(polygon).volume           

        return volume
    
    def voronoi_volumes_handoff(play_id, train):
        df = train[train.PlayId == play_id]
        xy = df[['X_handoff', 'Y_handoff']].values
        n_points = xy.shape[0]
        xy1 = xy.copy()
        xy1[:,1] = - xy[:,1]
        xy2 = xy.copy()
        xy2[:,1] = 320/3 - xy[:,1]
        xy3 = xy.copy()
        xy3[:,0] = 20 - xy[:,0]
        xy4 = xy.copy()
        xy4[:,0] = 220 - xy[:,0]
        xy = np.concatenate((xy, xy1, xy2, xy3, xy4), axis=0)
        Rusher = df['Rusher'].values

        vor = Voronoi(xy)
        volume = np.zeros(n_points)
        
        for r in range(n_points):
            region = vor.regions[vor.point_region[r]]
            if not -1 in region:
                polygon = [vor.vertices[i] for i in region]
                volume[r] = ConvexHull(polygon).volume           

        return volume
    
    
    def back_direction(orientation):
        if orientation > 180.0:
            return 1
        else:
            return 0
        
    def move_direction(orientation):
        if (orientation > 90.0) & (orientation < 270.0):
            return 1
        else:
            return 0
        
    def update_yardline(df):
        new_yardline = df[df['NflId'] == df['NflIdRusher']].copy()
        new_yardline['YardLine'] = new_yardline[['PossessionTeam','FieldPosition','YardLine']].apply(lambda x: new_line(x[0],x[1],x[2]), axis=1)
        new_yardline = new_yardline[['GameId','PlayId','YardLine']]

        return new_yardline

    def update_orientation(df, yardline):
        """
        Update orientation for all players
        """
        df['X'] = df[['X','PlayDirection']].apply(lambda x: new_X(x[0],x[1]), axis=1)
        df['Orientation'] = df[['Orientation','PlayDirection']].apply(lambda x: new_orientation(x[0],x[1]), axis=1)
        df['Dir'] = df[['Dir','PlayDirection']].apply(lambda x: new_orientation(x[0],x[1]), axis=1)

        df = df.drop('YardLine', axis=1)
        df = pd.merge(df, yardline, on=['GameId','PlayId'], how='inner')

        return df
    
    def update_X_Y_when_handoff(df):
        df['TimeHandoff'] = df['TimeHandoff'].apply(lambda x: datetime.datetime.strptime(x, "%Y-%m-%dT%H:%M:%S.%fZ"))
        df['TimeSnap'] = df['TimeSnap'].apply(lambda x: datetime.datetime.strptime(x, "%Y-%m-%dT%H:%M:%S.%fZ"))
        df['TimeDelta'] = df.apply(lambda x: (x['TimeHandoff'] - x['TimeSnap']).total_seconds(), axis=1)
        df['TimeDelta'] = 1
        df['X_handoff'] = 0
        df['Y_handoff'] = 0
        
        group_df = df[['X','Dir','S','TimeDelta']][(df['Dir']>=0) & (df['Dir']<90)].copy()
        df.loc[group_df.index,'X_handoff'] = df['X'] + abs(np.sin(np.deg2rad(df['Dir'])))*df['S']*df['TimeDelta']
        df.loc[group_df.index,'Y_handoff'] = df['Y'] + abs(np.cos(np.deg2rad(df['Dir'])))*df['S']*df['TimeDelta']
        group_df = df[['X','Dir','S','TimeDelta']][(df['Dir']>=90) & (df['Dir']<180)].copy()
        df.loc[group_df.index,'X_handoff'] = df['X'] + abs(np.sin(np.deg2rad(df['Dir'])))*df['S']*df['TimeDelta']
        df.loc[group_df.index,'Y_handoff'] = df['Y'] - abs(np.cos(np.deg2rad(df['Dir'])))*df['S']*df['TimeDelta']
        group_df = df[['X','Dir','S','TimeDelta']][(df['Dir']>=180) & (df['Dir']<270)].copy()
        df.loc[group_df.index,'X_handoff'] = df['X'] - abs(np.sin(np.deg2rad(df['Dir'])))*df['S']*df['TimeDelta']
        df.loc[group_df.index,'Y_handoff'] = df['Y'] - abs(np.cos(np.deg2rad(df['Dir'])))*df['S']*df['TimeDelta']
        group_df = df[['X','Dir','S','TimeDelta']][(df['Dir']>=270) & (df['Dir']<=360)].copy()
        df.loc[group_df.index,'X_handoff'] = df['X'] - abs(np.sin(np.deg2rad(df['Dir'])))*df['S']*df['TimeDelta']
        df.loc[group_df.index,'Y_handoff'] = df['Y'] + abs(np.cos(np.deg2rad(df['Dir'])))*df['S']*df['TimeDelta']
        return df.drop(['TimeHandoff','TimeSnap','TimeDelta'], axis=1)
        
    # Play Level: Game Id, playId, new yardline
    yardline = update_yardline(df)
    
    # update info for all players
    df = update_orientation(df, yardline)
    
    # update X Y when handoff happens
    df = update_X_Y_when_handoff(df)
    
    def back_features(df):
        """
        Grab carrier information first,and create new columns for rusher direction and orientation
        Variables start with back_ are rusher specific variables
        """
        df_vor = df.copy()
        carriers = df_vor[df_vor['NflId'] == df_vor['NflIdRusher']][['GameId','PlayId','NflIdRusher','X','Y','S', 'A','X_handoff','Y_handoff','PlayerWeight',
                                                                     'Orientation','Dir','YardLine', 'DefendersInTheBox']]
        
        rusher = df_vor[df_vor['NflId'] == df_vor['NflIdRusher']][['GameId','PlayId','Team','X','Y']]
        rusher.columns = ['GameId','PlayId','RusherTeam','RusherX','RusherY']
        
        defense = pd.merge(df_vor,rusher,on=['GameId','PlayId'],how='inner')
        defense = defense[(defense['Team'] != defense['RusherTeam'])|(defense['NflId'] == defense['NflIdRusher'])]
        
        area = []
        voronoi_close_x = []
        voronoi_far_x = []
        def_only_area = []
        def_only_voronoi_close_x = []
        def_only_voronoi_far_x = []
        
        for play in carriers['PlayId'].unique().tolist():
            vertices = voronoi_rusher_vertices(play, df)
            area.append(ConvexHull(vertices).volume)
            voronoi_close_x.append(np.min([ver[0] for ver in vertices]))
            voronoi_far_x.append(np.max([ver[0] for ver in vertices]))
            
            def_only_vertices = voronoi_rusher_vertices(play, defense)
            def_only_area.append(ConvexHull(def_only_vertices).volume)
            def_only_voronoi_close_x.append(np.min([ver[0] for ver in def_only_vertices]))
            def_only_voronoi_far_x.append(np.max([ver[0] for ver in def_only_vertices]))
            
        carriers['voronoi_rb_area'] = area
        carriers['voronoi_close_x'] = voronoi_close_x
        carriers['voronoi_far_x'] = voronoi_far_x
        
        carriers['voronoi_rb_area_def_only'] = def_only_area
        carriers['voronoi_close_x_def_only'] = def_only_voronoi_close_x
        carriers['voronoi_far_x_def_only'] = def_only_voronoi_far_x
        carriers['voronoi_rb_area_change'] = carriers.apply(lambda x: x['voronoi_rb_area_def_only'] / x['voronoi_rb_area'] if x['voronoi_rb_area'] != 0 else x['voronoi_rb_area_def_only'], axis=1)
        
        carriers['back_from_scrimmage'] = carriers['YardLine'] - carriers['X']
        carriers['back_from_scrimmage_handoff'] = carriers['YardLine'] - carriers['X_handoff']
        
        carriers['back_oriented_down_field'] = carriers['Orientation'].apply(lambda x: back_direction(x))
        carriers['back_moving_down_field'] = carriers['Dir'].apply(lambda x: back_direction(x))
        
        carriers['PlayerWeightKG'] = carriers["PlayerWeight"] * 0.45359237 / scipy.constants.g
        carriers['back_momentum'] =  carriers['PlayerWeightKG'] * carriers['S'] * 0.9144
        carriers['back_force'] = carriers['PlayerWeightKG'] * carriers['A'] * 0.9144

        carriers = carriers.rename(columns={'X':'back_X',
                                            'Y':'back_Y',
                                            'Orientation':'back_Orientation',
                                            'Dir':'back_Dir',
                                            'A':'back_A',
                                            'S':'back_S'})
        
        carriers = carriers[['GameId','PlayId','back_X','back_Y', 'back_Orientation', 
                             'back_Dir', 'back_A', 'back_S',
                             'back_from_scrimmage','back_from_scrimmage_handoff',
                             'back_oriented_down_field','back_moving_down_field',
                             'voronoi_rb_area','voronoi_rb_area_change','voronoi_close_x', 'voronoi_far_x',
                             'voronoi_rb_area_def_only','voronoi_close_x_def_only', 'voronoi_far_x_def_only','back_momentum','back_force']]
        return carriers
    
    def Bowen_features(df):
        """
        features based on player position
        """
        df['IsRusher'] = df['NflId'] == df['NflIdRusher'] 
        df = df[['GameId', 'PlayId', 'Position', 'X_handoff', 'Y_handoff', 'S', 'A','Dir','IsRusher', 'Team','X','Y','PlayerWeight','PlayerHeight']].sort_values(by='PlayId').copy()
        qb_S_mean = df[df['Position']=='QB']['S'].mean()
        qb_A_mean = df[df['Position']=='QB']['A'].mean()
        RB_v_vertical = []
        RB_v_horizontal = []
        qb_S = []
        qb_A = []
        def_closest_time_avg = []
        def_closest_time_min = []
        def_closest_time_max = []
        def_closest_time_std = []
        def_treat = []
        off_def_dir_degree_rate =  []
        def_dir_degree_std = []
        off_dir_degree_std = []
        def_Y_travel_distance = []
        def_X_travel_distance = []
        off_Y_travel_distance = []
        off_X_travel_distance = []
        closest_def_X = []
        closest_def_Y = []
        closest_def_S = []
        closest_def_A = []
        RB_travel_distance = []
        RB_strategy = []
        RB_home_or_away = []
        RB_BMI = []
        
        def_behind_off = []
        def_behind_off_handoff = []
        def_core_momentum_min = []
        def_core_momentum_max = []
        def_core_momentum_mean = []
        def_core_momentum_std = []
        def_core_force_min = []
        def_core_force_max = []
        def_core_force_mean = []
        def_core_force_std = []
        
        def_core_convexhull = []
        def_convexhull = []
        def_core_convexhull_handoff = []
        def_convexhull_handoff = []
        
        for i in range(0,len(df),22):
            df_play = df.iloc[i:i+22,].copy()
            rusher_team = df_play[df_play['IsRusher']]['Team'].values[0]
            train_def = df_play[df_play['Team'] != rusher_team].copy()
            train_off = df_play[df_play['Team'] == rusher_team].copy()
            RB = train_off[train_off['Position'] == 'RB'].copy()
            def_core = train_def[~train_def['Position'].isin(['FS', 'S', 'CB', 'SS', 'SAF'])].reset_index(drop=True).copy()
            off_core = train_off[~train_off['Position'].isin(['WR','RB','HB','QB'])].reset_index(drop=True).copy()
            def_core['distance_to_RB'] = def_core.apply(lambda x: euclidean_distance(x['X_handoff'],x['Y_handoff'],RB['X_handoff'].mean(),RB['Y_handoff'].mean()), axis=1)
            def_core['time_to_RB'] = def_core.apply(lambda x: x['distance_to_RB'] / x['S'] if x['S'] != 0 else np.nan, axis=1)
            def_core['time_to_RB'] = def_core['time_to_RB'].fillna(value=def_core['time_to_RB'].max())
            closest_def = def_core[def_core['time_to_RB'] == def_core['time_to_RB'].min()].copy()
            
            # Travel time from offense to RB
            def_closest_time_avg.append(def_core['time_to_RB'].mean())
            def_closest_time_min.append(def_core['time_to_RB'].min())
            def_closest_time_max.append(def_core['time_to_RB'].max())
            def_closest_time_std.append(np.std(def_core['time_to_RB']))
            def_treat.append(np.nanpercentile(def_core['time_to_RB'],25))
            # RB speed          
            rb_s = df_play[df_play['Position'] == 'RB']['S'].mean()
            rb_d = df_play[df_play['Position'] == 'RB']['Dir'].mean()
            RB_v_vertical.append(abs(np.cos(np.deg2rad(rb_d)))*rb_s)
            RB_v_horizontal.append(abs(np.sin(np.deg2rad(rb_d)))*rb_s)  
            
            # defense behind offense. (1) mean(x) of offense, (2) number of defense x < mean(x) of offense
            def_behind_off.append(train_def[train_def['X'] < train_off['X'].mean()].shape[0])
            def_behind_off_handoff.append(train_def[train_def['X_handoff'] < train_off['X_handoff'].mean()].shape[0])
            # momentum and force
            def_core['PlayerWeightKG'] = def_core["PlayerWeight"] * 0.45359237 / scipy.constants.g
            def_core['back_momentum'] = def_core['PlayerWeightKG'] * def_core['S'] * 0.9144
            def_core['back_force'] = def_core['PlayerWeightKG'] * def_core['A'] * 0.9144
            def_core_momentum_min.append(def_core['back_momentum'].min())
            def_core_momentum_max.append(def_core['back_momentum'].max())
            def_core_momentum_mean.append(def_core['back_momentum'].mean())
            def_core_momentum_std.append(np.std(def_core['back_momentum']))
            def_core_force_min.append(def_core['back_force'].min())
            def_core_force_max.append(def_core['back_force'].max())
            def_core_force_mean.append(def_core['back_force'].mean())
            def_core_force_std.append(np.std(def_core['back_force']))
            
            # def convex hull
            def_core_convexhull.append(ConvexHull(def_core[['X', 'Y']].values).volume)
            def_convexhull.append(ConvexHull(train_def[['X', 'Y']].values).volume)
            def_core_convexhull_handoff.append(ConvexHull(def_core[['X_handoff', 'Y_handoff']].values).volume)
            def_convexhull_handoff.append(ConvexHull(train_def[['X_handoff', 'Y_handoff']].values).volume)
            
            #QB
            if (df_play['Position'] == 'QB').sum() < 1:
                qb_S.append(qb_S_mean)
                qb_A.append(qb_A_mean)
            else:
                qb_S.append(df_play[df_play['Position'] == 'QB']['S'].mean())
                qb_A.append(df_play[df_play['Position'] == 'QB']['A'].mean())
            
            # Defense Dir degree vs. Offense Dir degree
            if ((def_core['Dir'].max() - def_core['Dir'].min()) > (off_core['Dir'].max() - off_core['Dir'].min())):
                off_def_dir_degree_rate.append(1)
            else:
                off_def_dir_degree_rate.append(0)
            def_dir_degree_std.append(np.std(def_core['Dir']))
            off_dir_degree_std.append(np.std(off_core['Dir']))
            # Strategy
            if RB['X'].mean() > train_off['X'].min():
                RB_strategy.append(1)
            else:
                RB_strategy.append(0)
            
            # X,Y travel distance for off and def
            def_Y_travel_distance.append(sum(abs(def_core['Y'] - def_core['Y_handoff'])))
            def_X_travel_distance.append(sum(abs(def_core['X'] - def_core['X_handoff'])))
            off_Y_travel_distance.append(sum(abs(off_core['Y'] - off_core['Y_handoff'])))
            off_X_travel_distance.append(sum(abs(off_core['X'] - off_core['X_handoff'])))
            
            # Closest def X Y A S
            closest_def_X.append(closest_def['X'].mean())
            closest_def_Y.append(closest_def['Y'].mean())
            closest_def_A.append(closest_def['A'].mean())
            closest_def_S.append(closest_def['S'].mean())
            
            # RB_travel_distance
            RB_travel_distance.append(euclidean_distance(RB['X'].mean(),RB['Y'].mean(),RB['X_handoff'].mean(),RB['Y_handoff'].mean()))
            
            # RB_home_or_away
            RB_home_or_away.append(1 if sum(RB['Team'] == 'home')>0 else 0)
            RB_BMI = 703*(RB['PlayerWeight'].mean()/(train['PlayerHeight'].mean())**2)
        df = df[['GameId', 'PlayId']].drop_duplicates().copy()
        df['qb_S'] = qb_S
        df['qb_A'] = qb_A
        df['RB_v_vertical'] = RB_v_vertical
        df['RB_v_horizontal'] = RB_v_horizontal
        df['def_closest_time_avg'] = def_closest_time_avg
        df['def_closest_time_min'] = def_closest_time_min
        df['def_closest_time_max'] = def_closest_time_max
        df['def_closest_time_std'] = def_closest_time_std
        df['off_def_dir_degree_rate'] = off_def_dir_degree_rate
        df['def_dir_degree_std'] = def_dir_degree_std
        df['off_dir_degree_std'] = off_dir_degree_std
        df['RB_strategy'] = RB_strategy 
        df['def_treat'] = def_treat
        df['def_Y_travel_distance'] = def_Y_travel_distance
        df['def_X_travel_distance'] = def_X_travel_distance
        df['off_Y_travel_distance'] = off_Y_travel_distance
        df['off_X_travel_distance'] = off_X_travel_distance
        df['closest_def_X'] = closest_def_X
        df['closest_def_Y'] = closest_def_Y
        df['closest_def_A'] = closest_def_A
        df['closest_def_S'] = closest_def_S
        df['RB_travel_distance'] = RB_travel_distance
        df['RB_home_or_away'] = RB_home_or_away
        df['RB_BMI'] = RB_BMI
        df['def_behind_off'] = def_behind_off
        df['def_behind_off_handoff'] = def_behind_off_handoff
        df['def_core_force_min'] = def_core_force_min
        df['def_core_force_max'] = def_core_force_max
        df['def_core_force_mean'] = def_core_force_mean
        df['def_core_force_std'] = def_core_force_std
        
        df['def_core_convexhull'] = def_core_convexhull
        df['def_convexhull'] = def_convexhull
        df['def_core_convexhull_handoff'] = def_core_convexhull_handoff
        df['def_convexhull_handoff'] = def_convexhull_handoff

        return df
    
    def all_distance_relative_to_back(df):
        """
        all player distance related to rusher
        """
        rusher = df[df['NflId'] == df['NflIdRusher']][['GameId','PlayId','Team','X_handoff','Y_handoff']]
        rusher.columns = ['GameId','PlayId','RusherTeam','RusherX','RusherY']
        
        all_players = pd.merge(df,rusher,on=['GameId','PlayId'],how='inner')
        all_players = all_players[all_players['NflId'] != all_players['NflIdRusher']][['GameId','PlayId','X_handoff','Y_handoff','RusherX','RusherY']]
        
        all_players['all_dist_to_back'] = all_players[['X_handoff','Y_handoff','RusherX','RusherY']].apply(lambda x: euclidean_distance(x[0],x[1],x[2],x[3]), axis=1)

        all_players = all_players.groupby(['GameId','PlayId'])\
                                 .agg({'all_dist_to_back':['min','max','mean','std']})\
                                 .reset_index()
        
        all_players.columns = ['GameId','PlayId','all_min_dist','all_max_dist','all_mean_dist','all_std_dist']

        return all_players
    
    
    def offense_distance_relative_to_back(df):
        """
        offense distance to rusher
        """
        rusher = df[df['NflId'] == df['NflIdRusher']][['GameId','PlayId','Team','X','Y']]
        rusher.columns = ['GameId','PlayId','RusherTeam','RusherX','RusherY']

        offense = pd.merge(df,rusher,on=['GameId','PlayId'],how='inner')
        offense = offense[(offense['Team'] == offense['RusherTeam']) & (offense['NflId'] != offense['NflIdRusher'])][['GameId','PlayId','X','Y','RusherX','RusherY']]
        offense['off_dist_to_back'] = offense[['X','Y','RusherX','RusherY']].apply(lambda x: euclidean_distance(x[0],x[1],x[2],x[3]), axis=1)

        offense = offense.groupby(['GameId','PlayId'])\
                         .agg({'off_dist_to_back':['min','max','mean','std']})\
                         .reset_index()
        offense.columns = ['GameId','PlayId','off_min_dist','off_max_dist','off_mean_dist','off_std_dist']

        return offense
    
    def offense_distance_relative_to_back_handoff(df):
        """
        offense distance to rusher
        """
        rusher = df[df['NflId'] == df['NflIdRusher']][['GameId','PlayId','Team','X_handoff','Y_handoff']]
        rusher.columns = ['GameId','PlayId','RusherTeam','RusherX','RusherY']

        offense = pd.merge(df,rusher,on=['GameId','PlayId'],how='inner')
        offense = offense[(offense['Team'] == offense['RusherTeam']) & (offense['NflId'] != offense['NflIdRusher'])][['GameId','PlayId','X_handoff','Y_handoff','RusherX','RusherY']]
        offense['off_dist_to_back'] = offense[['X_handoff','Y_handoff','RusherX','RusherY']].apply(lambda x: euclidean_distance(x[0],x[1],x[2],x[3]), axis=1)

        offense = offense.groupby(['GameId','PlayId'])\
                         .agg({'off_dist_to_back':['min','max','mean','std']})\
                         .reset_index()
        offense.columns = ['GameId','PlayId','off_min_dist_handoff','off_max_dist_handoff','off_mean_dist_handoff','off_std_dist_handoff']

        return offense
    
    def defense_distance_relative_to_back(df):
        """
        Defense distance to rusher
        """
        rusher = df[df['NflId'] == df['NflIdRusher']][['GameId','PlayId','Team','X','Y']]
        rusher.columns = ['GameId','PlayId','RusherTeam','RusherX','RusherY']

        defense = pd.merge(df,rusher,on=['GameId','PlayId'],how='inner')
        defense = defense[defense['Team'] != defense['RusherTeam']][['GameId','PlayId','X','Y','RusherX','RusherY']]
        defense['def_dist_to_back'] = defense[['X','Y','RusherX','RusherY']].apply(lambda x: euclidean_distance(x[0],x[1],x[2],x[3]), axis=1)

        defense = defense.groupby(['GameId','PlayId'])\
                         .agg({'def_dist_to_back':['min','max','mean','std']})\
                         .reset_index()
        defense.columns = ['GameId','PlayId','def_min_dist','def_max_dist','def_mean_dist','def_std_dist']

        return defense
    
    def defense_distance_relative_to_back_handoff(df):
        """
        Defense distance to rusher
        """
        rusher = df[df['NflId'] == df['NflIdRusher']][['GameId','PlayId','Team','X_handoff','Y_handoff']]
        rusher.columns = ['GameId','PlayId','RusherTeam','RusherX','RusherY']

        defense = pd.merge(df,rusher,on=['GameId','PlayId'],how='inner')
        defense = defense[defense['Team'] != defense['RusherTeam']][['GameId','PlayId','X_handoff','Y_handoff','RusherX','RusherY']]
        defense['def_dist_to_back'] = defense[['X_handoff','Y_handoff','RusherX','RusherY']].apply(lambda x: euclidean_distance(x[0],x[1],x[2],x[3]), axis=1)

        defense = defense.groupby(['GameId','PlayId'])\
                         .agg({'def_dist_to_back':['min','max','mean','std']})\
                         .reset_index()
        defense.columns = ['GameId','PlayId','def_min_dist_handoff','def_max_dist_handoff','def_mean_dist_handoff','def_std_dist_handoff']

        return defense
        
    def defense_vor_area(df):
        """
        defense voronoi area
        """
        df_vor = df.copy()
        df_vor['voronoi_volumes'] = [0] * df_vor.shape[0]
        for play in df_vor['PlayId'].unique().tolist():
            df_vor.loc[df_vor.PlayId == play, 'voronoi_volumes'] = df_vor.loc[df_vor.PlayId == play, 'voronoi_volumes'] + voronoi_volumes(play, df_vor)
        df_vor['voronoi_volumes'] = df_vor['voronoi_volumes'].fillna(0)
        
        rusher = df_vor[df_vor['NflId'] == df_vor['NflIdRusher']][['GameId','PlayId','Team','X','Y']]
        rusher.columns = ['GameId','PlayId','RusherTeam','RusherX','RusherY']

        defense = pd.merge(df_vor,rusher,on=['GameId','PlayId'],how='inner')
        defense = defense[defense['Team'] != defense['RusherTeam']][['GameId','PlayId','X','Y','RusherX','RusherY','voronoi_volumes']]
        
        defense['off_dist_to_back'] = defense[['X','Y','RusherX','RusherY']].apply(lambda x: euclidean_distance(x[0],x[1],x[2],x[3]), axis=1)
        #defense_closest_to_rb = defense.loc[defense.groupby(['GameId','PlayId']).off_dist_to_back.idxmin()][['GameId','PlayId','RusherX','RusherY','X','Y']]
        defense = defense.groupby(['GameId','PlayId'])\
                         .agg({'voronoi_volumes':['min','max','mean','std','sum']})\
                         .reset_index()
        
        defense.columns = ['GameId','PlayId','def_vor_area_min','def_vor_area_max','def_vor_area_mean','def_vor_area_std','def_vor_area_sum']
        return defense
    
    def defense_only_vor_area(df):
        """
        defense voronoi area
        """
        df_vor = df.copy()
        df_vor['def_only_voronoi_volumes'] = [0] * df_vor.shape[0]
        
        rusher = df_vor[df_vor['NflId'] == df_vor['NflIdRusher']][['GameId','PlayId','Team','X','Y']]
        rusher.columns = ['GameId','PlayId','RusherTeam','RusherX','RusherY']
        
        defense = pd.merge(df_vor,rusher,on=['GameId','PlayId'],how='inner')
        defense = defense[(defense['Team'] != defense['RusherTeam']) 
                          | (defense['NflId'] == defense['NflIdRusher'])]
        
        for play in defense['PlayId'].unique().tolist():
            defense.loc[defense.PlayId == play, 'def_only_voronoi_volumes'] = defense.loc[defense.PlayId == play, 'def_only_voronoi_volumes'] + voronoi_volumes(play, defense)
        defense['def_only_voronoi_volumes'] = defense['def_only_voronoi_volumes'].fillna(0)

        defense = defense.groupby(['GameId','PlayId'])\
                         .agg({'def_only_voronoi_volumes':['min','max','mean','std','sum']})\
                         .reset_index()
        
        defense.columns = ['GameId','PlayId','def_only_vor_area_min','def_only_vor_area_max','def_only_vor_area_mean','def_only_vor_area_std','def_only_vor_area_sum']
        return defense
        

    def defense_to_line_features(df):
        """
        Defense distance to line
        """
        rusher = df[df['NflId'] == df['NflIdRusher']][['GameId','PlayId','Team','X','Y']]
        rusher.columns = ['GameId','PlayId','RusherTeam','RusherX','RusherY']

        defense = pd.merge(df,rusher,on=['GameId','PlayId'],how='inner')
        defense = defense[defense['Team'] != defense['RusherTeam']][['GameId','PlayId','X','Y','RusherX','RusherY', 'YardLine']]
        
        #defense['def_dist_to_back'] = defense[['X','Y','RusherX','RusherY']].apply(lambda x: euclidean_distance(x[0],x[1],x[2],x[3]), axis=1)
        defense['def_dist_to_line'] = defense['X'] - defense['YardLine']
        defense = defense.groupby(['GameId','PlayId'])\
                         .agg({'def_dist_to_line':['min','max','mean','std']})\
                         .reset_index()
        defense.columns = ['GameId','PlayId','def_toline_min_dist','def_toline_max_dist','def_toline_mean_dist','def_toline_std_dist']

        return defense
    
    
    def defense_to_line_features_handoff(df):
        """
        Defense distance to line after 1s
        """
        rusher = df[df['NflId'] == df['NflIdRusher']][['GameId','PlayId','Team','X_handoff','Y_handoff']]
        rusher.columns = ['GameId','PlayId','RusherTeam','RusherX','RusherY']

        defense = pd.merge(df,rusher,on=['GameId','PlayId'],how='inner')
        defense = defense[defense['Team'] != defense['RusherTeam']][['GameId','PlayId','X_handoff','Y_handoff','RusherX','RusherY', 'YardLine']]
        
        #defense['def_dist_to_back'] = defense[['X_handoff','Y_handoff','RusherX','RusherY']].apply(lambda x: euclidean_distance(x[0],x[1],x[2],x[3]), axis=1)
        defense['def_dist_to_line'] = defense['X_handoff'] - defense['YardLine']
        defense = defense.groupby(['GameId','PlayId'])\
                         .agg({'def_dist_to_line':['min','max','mean','std']})\
                         .reset_index()
        defense.columns = ['GameId','PlayId','def_toline_min_dist_handoff','def_toline_max_dist_handoff','def_toline_mean_dist_handoff','def_toline_std_dist_handoff']

        return defense
    
    
    def defense_in_back_way(df):
        
        """
        number of defense players in rusher's way
        """
        rusher = df[df['NflId'] == df['NflIdRusher']][['GameId','PlayId','Team','X','Y','Dir']]
        rusher['back_moving_lower'] = rusher['Dir'].apply(lambda x: move_direction(x))
        rusher.columns = ['GameId','PlayId','RusherTeam','RusherX','RusherY','RusherDir','back_moving_lower']

        defense = pd.merge(df,rusher,on=['GameId','PlayId'],how='inner')
        defense = defense[defense['Team'] != defense['RusherTeam']][['GameId','PlayId','X','Y','Dir','RusherX','RusherY', 'back_moving_lower']]
        defense['def_back_Y_diff'] = defense['Y'] - defense['RusherY']
        defense['defense_in_back_way'] = defense.apply(lambda x: 1 if ((x['back_moving_lower'] == 1) and (x['def_back_Y_diff'] <= 0)) \
                                                                  or ((x['back_moving_lower'] == 0) and (x['def_back_Y_diff'] > 0)) else 0, axis=1)

        defense = defense.groupby(['GameId','PlayId'])\
                         .agg({'defense_in_back_way':['sum']})\
                         .reset_index()
        defense.columns = ['GameId','PlayId','defense_in_back_way_count']
        return defense
    
    def defense_speed(df):
        """
        Defense players speed features
        """
        rusher = df[df['NflId'] == df['NflIdRusher']][['GameId','PlayId','Team','X','Y']]
        rusher.columns = ['GameId','PlayId','RusherTeam','RusherX','RusherY']
        defense = pd.merge(df,rusher,on=['GameId','PlayId'],how='inner')
        defense = defense[defense['Team'] != defense['RusherTeam']][['GameId','PlayId','S', 'A']]
        defense['AcceleratedSpeed'] = defense['S'] + defense['A']
        defense.columns = ['GameId','PlayId','Defense_S', 'Defense_A', 'Defense_AcceleratedSpeed']
        defense = defense.groupby(['GameId','PlayId'])\
                         .agg({'Defense_S':['min','max','mean']}).reset_index()
        defense.columns = ['GameId','PlayId','Defense_S_min','Defense_S_max','Defense_S_mean']
        return defense
    
    def static_features(df):
        static_features = df[df['NflId'] == df['NflIdRusher']][['GameId','PlayId','Dis','YardLine','Quarter','Down',
                                                                'Distance','DefendersInTheBox','Season','HomeScoreBeforePlay','VisitorScoreBeforePlay']].drop_duplicates()
        
        # fill mean
        static_features['DefendersInTheBox'] = static_features['DefendersInTheBox'].fillna(np.mean(static_features['DefendersInTheBox']))
        
        static_features["ScoreDiffBeforePlay"] = static_features["HomeScoreBeforePlay"] - static_features["VisitorScoreBeforePlay"]
        static_features["ScoreDiffBeforePlay"] = static_features["ScoreDiffBeforePlay"].fillna(np.mean(static_features["ScoreDiffBeforePlay"]))
        
        # Create DefendersInTheBox_vs_Distance
        static_features['DefendersInTheBox_vs_Distance'] = static_features['DefendersInTheBox'] / static_features['Distance']
        static_features["DefendersInTheBox_vs_Distance"] = static_features["DefendersInTheBox_vs_Distance"].fillna(np.mean(static_features["DefendersInTheBox_vs_Distance"]))
        
        # Quater Adjustment
        static_features['Quarter'] = static_features['Quarter'].apply(lambda x: 0 if (x == 1  or x == 3) else 1)
        
        # DefendersInTheBox / YardLine
        #static_features['DefendersInTheBox_vs_YardLine'] = static_features['DefendersInTheBox'] / static_features['YardLine']
        
        # Adjust GameClock
        static_features['Season'] = static_features['Season'].apply(lambda x: 1 if x==2017 else 2)
        
        return static_features

    def defense_closest_player_feature(df):
        """
        closest defense features
        """
        df_vor = df.copy()        
        rusher = df_vor[df_vor['NflId'] == df_vor['NflIdRusher']][['GameId','PlayId','Team','X','Y','S','Dir']]
        rusher.columns = ['GameId','PlayId','RusherTeam','RusherX','RusherY','RusherS','RusherDir']

        defense = pd.merge(df_vor,rusher,on=['GameId','PlayId'],how='inner')
        defense = defense[defense['Team'] != defense['RusherTeam']][['GameId','PlayId','X','Y','S','A','Dir','PlayerWeight','RusherX','RusherY','RusherS','RusherDir']]
        
        defense['off_dist_to_back'] = defense[['X','Y','RusherX','RusherY']].apply(lambda x: euclidean_distance(x[0],x[1],x[2],x[3]), axis=1)
        
        defense_closest_to_rb = defense.loc[defense.groupby(['GameId','PlayId']).off_dist_to_back.idxmin()][['GameId','PlayId','off_dist_to_back','S','A','X','Y','Dir','PlayerWeight',
                                                                                                             'RusherS','RusherX','RusherY','RusherDir']]

        defense_closest_to_rb['closest_def_RB_Dir'] = defense_closest_to_rb.apply(lambda x: 360-abs(x['Dir']-x['RusherDir']) if abs(x['Dir']-x['RusherDir'])>180 else abs(x['Dir']-x['RusherDir']), axis=1)
        
        defense_closest_to_rb['closest_def_time_to_RB'] = defense_closest_to_rb.apply(lambda x: x['off_dist_to_back'] / x['S'] if x['S'] != 0 else np.nan, axis=1)
        defense_closest_to_rb['closest_def_time_to_RB'] = defense_closest_to_rb['closest_def_time_to_RB'].fillna(value=defense_closest_to_rb['closest_def_time_to_RB'].mean())
        
        defense_closest_to_rb['closest_def_s_to_RB_s'] = defense_closest_to_rb.apply(lambda x: x['S'] / x['RusherS'] if x['RusherS'] != 0 else np.nan, axis=1)
        defense_closest_to_rb['closest_def_s_to_RB_s'] = defense_closest_to_rb['closest_def_s_to_RB_s'].fillna(value=defense_closest_to_rb['closest_def_s_to_RB_s'].mean())
        
        defense_closest_to_rb['closest_defense_rusher_xdiff'] = defense_closest_to_rb['X'] - defense_closest_to_rb['RusherX']
        defense_closest_to_rb['closest_defense_rusher_ydiff'] = defense_closest_to_rb['Y'] - defense_closest_to_rb['RusherY']
        
        defense_closest_to_rb['PlayerWeightKG'] = defense_closest_to_rb["PlayerWeight"] * 0.45359237 / scipy.constants.g
        defense_closest_to_rb['closest_def_momentum'] =  defense_closest_to_rb['PlayerWeightKG'] * defense_closest_to_rb['S'] * 0.9144
        defense_closest_to_rb['closest_def_force'] = defense_closest_to_rb['PlayerWeightKG'] * defense_closest_to_rb['A'] * 0.9144
        
        defense_closest_to_rb = defense_closest_to_rb[['GameId','PlayId','S','A','X','Y', 'Dir', 
                                                       'closest_def_time_to_RB','closest_def_s_to_RB_s',
                                                       'closest_defense_rusher_xdiff', 'closest_defense_rusher_ydiff','closest_def_RB_Dir','closest_def_momentum','closest_def_force']]
        
        defense_closest_to_rb.columns = ['GameId','PlayId','closest_def_S','closest_def_A','closest_def_X','closest_def_Y','closest_def_Dir',
                                         'closest_def_time_to_RB','closest_def_s_to_RB_s',
                                         'closest_defense_rusher_xdiff', 'closest_defense_rusher_ydiff','closest_def_RB_Dir','closest_def_momentum','closest_def_force']
        
        return defense_closest_to_rb
        
    def defense_closest_player_feature_handoff(df):
        """
        closest defense features after 1 s
        """
        df_vor = df.copy()        
        rusher = df_vor[df_vor['NflId'] == df_vor['NflIdRusher']][['GameId','PlayId','Team','X_handoff','Y_handoff','S','Dir']]
        rusher.columns = ['GameId','PlayId','RusherTeam','RusherX','RusherY','RusherS','RusherDir']

        defense = pd.merge(df_vor,rusher,on=['GameId','PlayId'],how='inner')
        defense = defense[defense['Team'] != defense['RusherTeam']][['GameId','PlayId','X_handoff','Y_handoff','S','A','Dir','RusherX','RusherY','RusherS','RusherDir']]
        
        defense['off_dist_to_back_handoff'] = defense[['X_handoff','Y_handoff','RusherX','RusherY']].apply(lambda x: euclidean_distance(x[0],x[1],x[2],x[3]), axis=1)
        
        defense_closest_to_rb = defense.loc[defense.groupby(['GameId','PlayId']).off_dist_to_back_handoff.idxmin()][['GameId','PlayId','off_dist_to_back_handoff','S','A','X_handoff','Y_handoff','Dir',
                                                                                                             'RusherS','RusherX','RusherY','RusherDir']]
        
        defense_closest_to_rb['closest_def_RB_Dir_handoff'] = defense_closest_to_rb.apply(lambda x: 360-abs(x['Dir']-x['RusherDir']) if abs(x['Dir']-x['RusherDir'])>180 else abs(x['Dir']-x['RusherDir']), axis=1)
        
        defense_closest_to_rb['closest_def_time_to_RB_handoff'] = defense_closest_to_rb.apply(lambda x: x['off_dist_to_back_handoff'] / x['S'] if x['S'] != 0 else np.nan, axis=1)
        defense_closest_to_rb['closest_def_time_to_RB_handoff'] = defense_closest_to_rb['closest_def_time_to_RB_handoff'].fillna(value=defense_closest_to_rb['closest_def_time_to_RB_handoff'].mean())
        
        defense_closest_to_rb['closest_def_s_to_RB_s_handoff'] = defense_closest_to_rb.apply(lambda x: x['S'] / x['RusherS'] if x['RusherS'] != 0 else np.nan, axis=1)
        defense_closest_to_rb['closest_def_s_to_RB_s_handoff'] = defense_closest_to_rb['closest_def_s_to_RB_s_handoff'].fillna(value=defense_closest_to_rb['closest_def_s_to_RB_s_handoff'].mean())
        
        defense_closest_to_rb['closest_defense_rusher_xdiff_handoff'] = defense_closest_to_rb['X_handoff'] - defense_closest_to_rb['RusherX']
        defense_closest_to_rb['closest_defense_rusher_ydiff_handoff'] = defense_closest_to_rb['Y_handoff'] - defense_closest_to_rb['RusherY']
        
        defense_closest_to_rb = defense_closest_to_rb[['GameId','PlayId',
                                                       'closest_def_time_to_RB_handoff','closest_def_s_to_RB_s_handoff',
                                                       'closest_defense_rusher_xdiff_handoff', 'closest_defense_rusher_ydiff_handoff','closest_def_RB_Dir_handoff']]
        
        defense_closest_to_rb.columns = ['GameId','PlayId',
                                         'closest_def_time_to_RB_handoff','closest_def_s_to_RB_s_handoff',
                                         'closest_defense_rusher_xdiff_handoff', 'closest_defense_rusher_ydiff_handoff','closest_def_RB_Dir_handoff']
        
        return defense_closest_to_rb
    
    def offense_closest_player_feature(df):
        """
        closest offense features 
        """
        df_vor = df.copy()        
        rusher = df_vor[df_vor['NflId'] == df_vor['NflIdRusher']][['GameId','PlayId','Team','X','Y','S','Dir']]
        rusher.columns = ['GameId','PlayId','RusherTeam','RusherX','RusherY','RusherS','RusherDir']

        defense = pd.merge(df_vor,rusher,on=['GameId','PlayId'],how='inner')
        defense = defense[(defense['Team'] == defense['RusherTeam'])
                          &(defense['NflId'] != defense['NflIdRusher'])][['GameId','PlayId','X','Y','S','A','Dir','PlayerWeight','RusherX','RusherY','RusherS','RusherDir']]
        
        defense['off_dist_to_back'] = defense[['X','Y','RusherX','RusherY']].apply(lambda x: euclidean_distance(x[0],x[1],x[2],x[3]), axis=1)
        
        defense_closest_to_rb = defense.loc[defense.groupby(['GameId','PlayId']).off_dist_to_back.idxmin()][['GameId','PlayId','off_dist_to_back','S','A','X','Y','Dir','PlayerWeight',
                                                                                                             'RusherS','RusherX','RusherY','RusherDir']]

        defense_closest_to_rb['closest_offense_rusher_xdiff'] = defense_closest_to_rb['X'] - defense_closest_to_rb['RusherX']
        defense_closest_to_rb['closest_offense_rusher_ydiff'] = defense_closest_to_rb['Y'] - defense_closest_to_rb['RusherY']
        
        defense_closest_to_rb['PlayerWeightKG'] = defense_closest_to_rb["PlayerWeight"] * 0.45359237 / scipy.constants.g
        defense_closest_to_rb['closest_off_momentum'] =  defense_closest_to_rb['PlayerWeightKG'] * defense_closest_to_rb['S'] * 0.9144
        defense_closest_to_rb['closest_off_force'] = defense_closest_to_rb['PlayerWeightKG'] * defense_closest_to_rb['A'] * 0.9144
        
        defense_closest_to_rb = defense_closest_to_rb[['GameId','PlayId','S','A','X','Y','Dir', 
                                                       'closest_offense_rusher_xdiff', 'closest_offense_rusher_ydiff','closest_off_momentum','closest_off_force']]
        
        defense_closest_to_rb.columns = ['GameId','PlayId','closest_off_S','closest_off_A','closest_off_X','closest_off_Y','closest_off_Dir',
                                         'closest_offense_rusher_xdiff', 'closest_offense_rusher_ydiff','closest_off_momentum','closest_off_force']
        return defense_closest_to_rb
    
    
    def offense_closest_player_feature_handoff(df):
        """
        closest offense features after 1s
        """
        df_vor = df.copy()        
        rusher = df_vor[df_vor['NflId'] == df_vor['NflIdRusher']][['GameId','PlayId','Team','X_handoff','Y_handoff','S','Dir']]
        rusher.columns = ['GameId','PlayId','RusherTeam','RusherX','RusherY','RusherS','RusherDir']

        defense = pd.merge(df_vor,rusher,on=['GameId','PlayId'],how='inner')
        defense = defense[(defense['Team'] == defense['RusherTeam'])
                          &(defense['NflId'] != defense['NflIdRusher'])][['GameId','PlayId','X_handoff','Y_handoff','S','A','Dir','PlayerWeight','RusherX','RusherY','RusherS','RusherDir']]
        
        defense['off_dist_to_back'] = defense[['X_handoff','Y_handoff','RusherX','RusherY']].apply(lambda x: euclidean_distance(x[0],x[1],x[2],x[3]), axis=1)
        
        defense_closest_to_rb = defense.loc[defense.groupby(['GameId','PlayId']).off_dist_to_back.idxmin()][['GameId','PlayId','off_dist_to_back','S','A','X_handoff','Y_handoff','Dir','PlayerWeight',
                                                                                                             'RusherS','RusherX','RusherY','RusherDir']]

        defense_closest_to_rb['closest_offense_rusher_xdiff_handoff'] = defense_closest_to_rb['X_handoff'] - defense_closest_to_rb['RusherX']
        defense_closest_to_rb['closest_offense_rusher_ydiff_handoff'] = defense_closest_to_rb['Y_handoff'] - defense_closest_to_rb['RusherY']
        
        defense_closest_to_rb['PlayerWeightKG'] = defense_closest_to_rb["PlayerWeight"] * 0.45359237 / scipy.constants.g
        defense_closest_to_rb['closest_off_momentum_handoff'] =  defense_closest_to_rb['PlayerWeightKG'] * defense_closest_to_rb['S'] * 0.9144
        defense_closest_to_rb['closest_off_force_handoff'] = defense_closest_to_rb['PlayerWeightKG'] * defense_closest_to_rb['A'] * 0.9144
        
        defense_closest_to_rb = defense_closest_to_rb[['GameId','PlayId','X_handoff','Y_handoff',
                                                       'closest_offense_rusher_xdiff_handoff', 'closest_offense_rusher_ydiff_handoff','closest_off_momentum_handoff','closest_off_force_handoff']]
        defense_closest_to_rb.columns = ['GameId','PlayId','closest_off_X_handoff','closest_off_Y_handoff',
                                         'closest_offense_rusher_xdiff_handoff', 'closest_offense_rusher_ydiff_handoff','closest_off_momentum_handoff','closest_off_force_handoff']
        return defense_closest_to_rb
    
    
    
    def combine_features(static_feats, back_feats, all_dis_to_back, off_to_back, off_to_back_handoff, def_to_back, def_to_back_handoff, def_to_line,def_to_line_handoff,
                         bowen_feature, def_in_back_way_count, def_speed,def_vor_area, def_only_vor_area, 
                         def_closest_features, def_closest_features_handoff, off_closest_features, off_closest_features_handoff,
                         deploy=deploy):
        # game feature join on back_features
        df = pd.merge(static_feats, back_feats,on=['GameId','PlayId'],how='inner')
        # all distance to back
        df = pd.merge(df, all_dis_to_back, on=['GameId','PlayId'],how='inner')
        # offense distance to back
        df = pd.merge(df, off_to_back, on=['GameId','PlayId'],how='inner')
        df = pd.merge(df, off_to_back_handoff, on=['GameId','PlayId'],how='inner')
        # defense distance to back
        df = pd.merge(df, def_to_back, on=['GameId','PlayId'],how='inner')
        df = pd.merge(df, def_to_back_handoff, on=['GameId','PlayId'],how='inner')
        # defense distance to yard line
        df = pd.merge(df, def_to_line, on=['GameId','PlayId'],how='inner')
        df = pd.merge(df, def_to_line_handoff, on=['GameId','PlayId'],how='inner')
        
        # Bowen feature
        df = pd.merge(df, bowen_feature, on=['GameId','PlayId'],how='inner')
        #  def_in_back_way_count
        df = pd.merge(df, def_in_back_way_count, on=['GameId','PlayId'],how='inner')
        # defense speed
        df = pd.merge(df, def_speed, on=['GameId','PlayId'],how='inner')
        # defense voronoi area
        df = pd.merge(df, def_vor_area, on=['GameId','PlayId'],how='inner')
        df = pd.merge(df, def_only_vor_area, on=['GameId','PlayId'],how='inner')
        
        # closest defense/offense features
        df = pd.merge(df, def_closest_features, on=['GameId','PlayId'],how='inner')
        df = pd.merge(df, def_closest_features_handoff, on=['GameId','PlayId'],how='inner')
        
        df = pd.merge(df, off_closest_features, on=['GameId','PlayId'],how='inner')
        df = pd.merge(df, off_closest_features_handoff, on=['GameId','PlayId'],how='inner')
        
        if not deploy:
            df = pd.merge(df, outcomes, on=['GameId','PlayId'], how='inner')

        return df
    
    # game level
    static_feats = static_features(df)
    
    # Player Level: rusher position related info
    back_feats = back_features(df)
    
    # All other players distance to rusher - using x y after 1s
    all_dis_to_back = all_distance_relative_to_back(df)
    
    # offense player info relative to the rusher
    off_to_back = offense_distance_relative_to_back(df)
    off_to_back_handoff = offense_distance_relative_to_back_handoff(df)
    
    # defense player info relative to the rusher
    def_to_back = defense_distance_relative_to_back(df)
    def_to_back_handoff = defense_distance_relative_to_back_handoff(df)
    
    # defense people to line
    def_to_line = defense_to_line_features(df)
    def_to_line_handoff = defense_to_line_features_handoff(df)
    
     # Bowen feature
    bowen_feature = Bowen_features(df)
    
    # defense_in_back_way
    def_in_back_way_count = defense_in_back_way(df)
    
    # defense 
    def_speed = defense_speed(df)
    
    # closest defensor and offensor to RB features
    def_closest_features = defense_closest_player_feature(df)
    def_closest_features_handoff = defense_closest_player_feature_handoff(df)
    
    off_closest_features = offense_closest_player_feature(df)
    off_closest_features_handoff = offense_closest_player_feature_handoff(df)
    
    #voronoi features
    def_vor_area = defense_vor_area(df)
    def_only_vor_area = defense_only_vor_area(df)
    
    basetable = combine_features(static_feats, back_feats, all_dis_to_back, off_to_back, off_to_back_handoff, def_to_back, def_to_back_handoff, def_to_line, def_to_line_handoff,
                                 bowen_feature, def_in_back_way_count, def_speed, def_vor_area,def_only_vor_area, 
                                 def_closest_features, def_closest_features_handoff, off_closest_features, off_closest_features_handoff, deploy=deploy)
    
    # New feature
    basetable['def/off_X_travel_distance'] = basetable.apply(lambda x: x['def_X_travel_distance'] / x['off_X_travel_distance'] if x['off_X_travel_distance'] != 0 else x['def_X_travel_distance'], axis=1)
    basetable['def/off_Y_travel_distance'] = basetable.apply(lambda x: x['def_Y_travel_distance'] / x['off_Y_travel_distance'] if x['off_Y_travel_distance'] != 0 else x['def_Y_travel_distance'], axis=1)
    
    basetable['off_min_dist/handoff'] = basetable.apply(lambda x: x['off_min_dist'] / x['off_min_dist_handoff'] if x['off_min_dist_handoff'] != 0 else x['off_min_dist'], axis=1)
    basetable['def_min_dist/handoff'] = basetable.apply(lambda x: x['def_min_dist'] / x['def_min_dist_handoff'] if x['def_min_dist_handoff'] != 0 else x['def_min_dist'], axis=1)
    
    basetable['off_max_dist/handoff'] = basetable.apply(lambda x: x['off_max_dist'] / x['off_max_dist_handoff'] if x['off_max_dist_handoff'] != 0 else x['off_max_dist'], axis=1)
    basetable['def_max_dist/handoff'] = basetable.apply(lambda x: x['def_max_dist'] / x['def_max_dist_handoff'] if x['def_max_dist_handoff'] != 0 else x['def_max_dist'], axis=1)
    
    basetable['off_mean_dist/handoff'] = basetable.apply(lambda x: x['off_mean_dist'] / x['off_mean_dist_handoff'] if x['off_mean_dist_handoff'] != 0 else x['off_mean_dist'], axis=1)
    basetable['def_mean_dist/handoff'] = basetable.apply(lambda x: x['def_mean_dist'] / x['def_mean_dist_handoff'] if x['def_mean_dist_handoff'] != 0 else x['def_mean_dist'], axis=1)
    
    basetable['off_std_dist/handoff'] = basetable.apply(lambda x: x['off_std_dist'] / x['off_std_dist_handoff'] if x['off_std_dist_handoff'] != 0 else x['off_std_dist'], axis=1)
    basetable['def_std_dist/handoff'] = basetable.apply(lambda x: x['def_std_dist'] / x['def_std_dist_handoff'] if x['def_std_dist_handoff'] != 0 else x['def_std_dist'], axis=1)
    
    basetable['def_toline_min_dist/handoff'] = basetable.apply(lambda x: x['def_toline_min_dist'] / x['def_toline_min_dist_handoff'] if x['def_toline_min_dist_handoff'] != 0 else x['def_toline_min_dist'], axis=1)
    basetable['def_toline_max_dist/handoff'] = basetable.apply(lambda x: x['def_toline_max_dist'] / x['def_toline_max_dist_handoff'] if x['def_toline_max_dist_handoff'] != 0 else x['def_toline_max_dist'], axis=1)
    basetable['def_toline_mean_dist/handoff'] = basetable.apply(lambda x: x['def_toline_mean_dist'] / x['def_toline_mean_dist_handoff'] if x['def_toline_mean_dist_handoff'] != 0 else x['def_toline_mean_dist'], axis=1)
    basetable['def_toline_std_dist/handoff'] = basetable.apply(lambda x: x['def_toline_std_dist'] / x['def_toline_std_dist_handoff'] if x['def_toline_std_dist_handoff'] != 0 else x['def_toline_std_dist'], axis=1)

    basetable['closest_def_time_to_RB/handoff'] = basetable.apply(lambda x: x['closest_def_time_to_RB'] / x['closest_def_time_to_RB_handoff'] if x['closest_def_time_to_RB_handoff'] != 0 else x['closest_def_time_to_RB'], axis=1)
    basetable['closest_def_s_to_RB_s/handoff'] = basetable.apply(lambda x: x['closest_def_s_to_RB_s'] / x['closest_def_s_to_RB_s_handoff'] if x['closest_def_s_to_RB_s_handoff'] != 0 else x['closest_def_s_to_RB_s'], axis=1)
    basetable['closest_defense_rusher_xdiff/handoff'] = basetable.apply(lambda x: x['closest_defense_rusher_xdiff'] / x['closest_defense_rusher_xdiff_handoff'] if x['closest_defense_rusher_xdiff_handoff'] != 0 else x['closest_defense_rusher_xdiff'], axis=1)
    basetable['closest_defense_rusher_ydiff/handoff'] = basetable.apply(lambda x: x['closest_defense_rusher_ydiff'] / x['closest_defense_rusher_ydiff_handoff'] if x['closest_defense_rusher_ydiff_handoff'] != 0 else x['closest_defense_rusher_ydiff'], axis=1)
    basetable['closest_def_RB_Dir/handoff'] = basetable.apply(lambda x: x['closest_def_RB_Dir'] / x['closest_def_RB_Dir_handoff'] if x['closest_def_RB_Dir_handoff'] != 0 else x['closest_def_RB_Dir'], axis=1)
    
    basetable['back_def_momentum'] = basetable.apply(lambda x: x['back_momentum'] / x['closest_def_momentum'] if x['closest_def_momentum'] != 0 else x['back_momentum'], axis=1)
    basetable['back_def_force'] = basetable.apply(lambda x: x['back_force'] / x['closest_def_force'] if x['closest_def_force'] != 0 else x['back_force'], axis=1)
    
    # handle missing value
    null_col = basetable.isnull().sum()[basetable.isnull().sum()>0].index
    if len(null_col) >0:
        for col in null_col:
            basetable[col].fillna(value=basetable[col].dropna().mean(),inplace=True)
    else:
        pass

    return basetable

In [None]:
def transform_y(y_train):
    Y_train=np.zeros((len(y_train),199))
    for i, yard in enumerate(y_train.tolist()):
        Y_train[i][99 + yard] = 1   
    return Y_train

In [None]:
%%time 
train_basetable = create_features(Preprocessing(train), False)

In [None]:
import seaborn as sns
tem_exclude = ['GameId','PlayId','Yards','RB_BMI'
               'closest_def_S_y','closest_def_X_y','closest_def_A_y','closest_def_Y_y', # Duplicated columns
               # Distirbution is bad
               'back_oriented_down_field','back_moving_down_field']
tem_include = [col for col in train_basetable.columns if col not in tem_exclude]

In [None]:
yards = train_basetable.Yards
X = train_basetable[tem_include].copy()

t_scaler = StandardScaler()
X_scaled = pd.DataFrame(t_scaler.fit_transform(X))
X_scaled.columns = X[tem_include].columns

In [None]:
y = transform_y(yards)

## 4. Feature Selection

We mainly used RFECV (recursive feature elimination and cross-validated selection) from sklearn.feature_selection to select features

In [None]:
feature_select =['Dis','YardLine','Season','DefendersInTheBox_vs_Distance',
 # Def Speed                
 'Defense_S_mean',
 'Defense_S_max',
 'Defense_S_min',

 # closest defense
 'closest_def_S_x',
 'closest_def_A_x',
 'closest_def_X_x',
               
 'closest_defense_rusher_ydiff',
 'closest_defense_rusher_xdiff',
 'closest_defense_rusher_ydiff_handoff',
 'closest_defense_rusher_xdiff_handoff',
                 
 'closest_def_s_to_RB_s',
 'closest_def_Dir',
 'closest_def_RB_Dir',
 'closest_def_RB_Dir_handoff',
 'closest_def_time_to_RB',               
 'closest_def_time_to_RB_handoff',
              
 'def_closest_time_max',
 'def_closest_time_min',               
 'defense_in_back_way_count',

 #RB Features
 'RB_v_horizontal',
 'RB_v_vertical',
 'RB_travel_distance',
 'back_from_scrimmage',  
 'back_from_scrimmage_handoff',
 'back_Dir',
 'back_X',
 'back_S',
 'back_A',
 
 #QB
 'qb_S',
                 
 # Voronoi
 'def_vor_area_max',
 'def_vor_area_std',
 'def_vor_area_min',
                 
 'def_only_vor_area_min',
 'def_only_vor_area_std',
 'def_only_vor_area_max',
                 
 'voronoi_rb_area',
 'voronoi_rb_area_def_only',
                 
# travel distance             
 'off_X_travel_distance',
 'off_Y_travel_distance',
 'def_X_travel_distance',
 'def_Y_travel_distance',
 'def/off_Y_travel_distance',
 'def/off_X_travel_distance',
              
 'def_treat',
 'off_dir_degree_std',
 'def_dir_degree_std',
 
 # all distance
 'all_min_dist',
 'all_mean_dist',
                 
 # offense distance               
 'off_min_dist',
 'off_max_dist_handoff',
 'off_mean_dist_handoff',
                 
 'off_mean_dist/handoff',
 'off_std_dist/handoff',
 'off_min_dist/handoff',
                 
 # def distance
 'def_min_dist',
 'def_min_dist_handoff',
 'def_std_dist',
 'def_std_dist_handoff', 
 'def_mean_dist', 
 'def_mean_dist_handoff',
 'def_max_dist',          
 'def_max_dist_handoff',
                 
 'def_max_dist/handoff',
 'def_min_dist/handoff',
 'def_std_dist/handoff',
 'def_mean_dist/handoff',

 # toline
 'def_toline_std_dist',
 'def_toline_mean_dist',
 'def_toline_min_dist',
 'def_toline_max_dist',
                 
 'def_toline_mean_dist_handoff',
 'def_toline_max_dist_handoff',
 'def_toline_min_dist_handoff',
 'def_toline_std_dist_handoff',
                 
 'def_behind_off',
 'def_behind_off_handoff',
                
 'def_core_convexhull',
 'def_convexhull_handoff',
 
 'closest_off_momentum_handoff',
 'closest_off_momentum',
                 
 'back_momentum',
 'back_force']

print(len(feature_select))