In [1]:
import numpy as np
import pandas as pd
import sklearn.metrics as mtr
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
import datetime
from string import punctuation
import re
import tensorflow_probability as tfp
from scipy.spatial import Voronoi

pd.set_option('display.max_columns', 50)
pd.set_option('display.max_rows', 150)



from keras.layers import Dense
from keras.models import Sequential
from keras.callbacks import Callback, EarlyStopping
from keras.layers import Dropout,  BatchNormalization,  Activation
from keras.optimizers import Adam

Using TensorFlow backend.


In [2]:
train = pd.read_csv('/kaggle/input/nfl-big-data-bowl-2020/train.csv', dtype={'WindSpeed': 'object'})


outcomes = train[['GameId','PlayId','Yards']].drop_duplicates()

In [3]:
cols_to_use = ['OffenseFormation']
of_index = {o:i for i,o in enumerate(train['OffenseFormation'].unique())}
#op_index = {o:i for i,o in enumerate(train['OffensePersonnel'].unique())}
#dp_index = {o:i for i,o in enumerate(train['DisplayName'].unique())}
week_index = {o:i for i,o in enumerate(train['Quarter'].unique())}

train['OffenseFormation'] = train['OffenseFormation'].apply(lambda x: of_index[x])
#train['OffensePersonnel'] = train['OffensePersonnel'].apply(lambda x: op_index[x])
#train['DisplayName'] = train['DisplayName'].apply(lambda x: dp_index[x])
train['Quarter'] = train['Quarter'].apply(lambda x: week_index[x])

In [4]:
def create_features_orig(df, deploy=False):
    def new_X(x_coordinate, play_direction):
        if play_direction == 'left':
            return 120.0 - x_coordinate
        else:
            return x_coordinate

    def new_line(rush_team, field_position, yardline):
        if rush_team == field_position:
            # offense starting at X = 0 plus the 10 yard endzone plus the line of scrimmage
            return 10.0 + yardline
        else:
            # half the field plus the yards between midfield and the line of scrimmage
            return 60.0 + (50 - yardline)

    def new_orientation(angle, play_direction):
        if play_direction == 'left':
            new_angle = 360.0 - angle
            if new_angle == 360.0:
                new_angle = 0.0
            return new_angle
        else:
            return angle

    def euclidean_distance(x1,y1,x2,y2):
        x_diff = (x1-x2)**2
        y_diff = (y1-y2)**2

        return np.sqrt(x_diff + y_diff)

    def back_direction(orientation):
        if orientation > 180.0:
            return 1
        else:
            return 0
    def get_voronoi(play_id, df=df):
        df = df[df.PlayId == play_id]
        xy = df[['std_X', 'std_Y']].values
        n_points = xy.shape[0]
        xy1 = xy.copy()
        xy1[:,1] = - xy[:,1]
        xy2 = xy.copy()
        xy2[:,1] = 320/3 - xy[:,1]
        xy3 = xy.copy()
        xy3[:,0] = 20 - xy[:,0]
        xy4 = xy.copy()
        xy4[:,0] = 220 - xy[:,0]
        xy = np.concatenate((xy, xy1, xy2, xy3, xy4), axis=0)
        offense = df.IsOnOffense.values

        rusher_val = df.IsRusher.values
        vor = Voronoi(xy)
        offense_area = 0
        defense_area = 0
        rusher_area = 0
        for r in range(n_points):
            region = vor.regions[vor.point_region[r]]


            if not -1 in region:
                polygon = [vor.vertices[i] for i in region]
                x_values, y_values = np.array([a[0] for a in polygon]),np.array([a[1] for a in polygon])
                if offense[r]:
                    offense_area += PolyArea(x_values, y_values)
                    if rusher_val[r]:
                        rusher_area = PolyArea(x_values, y_values)
                else:
                    defense_area += PolyArea(x_values, y_values)

        return offense_area, defense_area, rusher_area

    def PolyArea(x,y):
        return 0.5*np.abs(np.dot(x,np.roll(y,1))-np.dot(y,np.roll(x,1)))

    def update_yardline(df):
        new_yardline = df[df['NflId'] == df['NflIdRusher']]
        new_yardline['YardLine'] = new_yardline[['PossessionTeam','FieldPosition','YardLine']].apply(lambda x: new_line(x[0],x[1],x[2]), axis=1)
        new_yardline = new_yardline[['GameId','PlayId','YardLine']]

        return new_yardline

    def update_orientation(df, yardline):
     
        df['Orientation'] = df[['Orientation','PlayDirection']].apply(lambda x: new_orientation(x[0],x[1]), axis=1)
        df['Dir'] = df[['Dir','PlayDirection']].apply(lambda x: new_orientation(x[0],x[1]), axis=1)

        df = df.drop('YardLine', axis=1)
        df = pd.merge(df, yardline, on=['GameId','PlayId'], how='inner')

        return df
    
    def add_voronoi(df):
        df['OffenseTeam'] = np.repeat(df.loc[(df['NflId'] == df['NflIdRusher']), 'Team'].values, 22)
    
        df['std_X'] = df.X
        df.loc[df.PlayDirection == 'left', 'std_X'] = 120 - df.loc[train.PlayDirection == 'left', 'X'] 
        df['std_Y'] = df.Y
        df.loc[df.PlayDirection == 'left', 'std_X'] = 160/3 - df.loc[train.PlayDirection == 'left', 'Y'] 
        df['IsRusher'] = (df['NflIdRusher'] == df['NflId'])
        df['IsOnOffense'] = (df['Team'] == df['OffenseTeam'])


        voronoi_stats = []    
        for pid in df.PlayId.unique():
            voi = get_voronoi(pid, df)
            voronoi_stats.append(voi)
            
        df = pd.concat([df, pd.DataFrame(np.repeat(np.array(voronoi_stats), 22, axis=0), columns=['off_voi', 'def_voi', 'rusher_voi'])], axis=1)
            
        return df
 

    def projection_features(df):
        rad = 2 * np.pi * (90 - df[['Orientation']]) / 360
        v_0 = df['S'].values * np.cos(rad).values.reshape(-1)
        v_1 = np.sin(rad).values.reshape(-1)

        a_0 = df['A'].values * np.cos(rad).values.reshape(-1)
        a_1 = np.sin(rad)

        df['v_0'] = v_0
        df['v_1'] = v_1
        df['a_0'] = a_0
        df['a_1'] = a_1
        
        return df

    def back_features(df):
        carriers = df[df['NflId'] == df['NflIdRusher']][['GameId','PlayId','NflIdRusher','X','Y','Orientation','Dir','YardLine']]
        carriers['back_from_scrimmage'] = carriers['YardLine'] - carriers['X']
        carriers['back_oriented_down_field'] = carriers['Orientation'].apply(lambda x: back_direction(x))
        carriers['back_moving_down_field'] = carriers['Dir'].apply(lambda x: back_direction(x))
        carriers = carriers.rename(columns={'X':'back_X',
                                            'Y':'back_Y'})
        carriers = carriers[['GameId','PlayId','NflIdRusher','back_X','back_Y','back_from_scrimmage','back_oriented_down_field','back_moving_down_field']]

        return carriers

    def features_relative_to_back(df, carriers):
        player_distance = df[['GameId','PlayId','NflId','X','Y']]
        player_distance = pd.merge(player_distance, carriers, on=['GameId','PlayId'], how='inner')
        player_distance = player_distance[player_distance['NflId'] != player_distance['NflIdRusher']]
        player_distance['dist_to_back'] = player_distance[['X','Y','back_X','back_Y']].apply(lambda x: euclidean_distance(x[0],x[1],x[2],x[3]), axis=1)

        player_distance = player_distance.groupby(['GameId','PlayId','back_from_scrimmage','back_oriented_down_field','back_moving_down_field'])\
                                         .agg({'dist_to_back':['min','max','mean','std']})\
                                         .reset_index()
        player_distance.columns = ['GameId','PlayId','back_from_scrimmage','back_oriented_down_field','back_moving_down_field',
                                   'min_dist','max_dist','mean_dist','std_dist']

        return player_distance

    def defense_features(df):
        rusher = df[df['NflId'] == df['NflIdRusher']][['GameId','PlayId','Team','X','Y']]
        rusher.columns = ['GameId','PlayId','RusherTeam','RusherX','RusherY']

        defense = pd.merge(df,rusher,on=['GameId','PlayId'],how='inner')
        defense = defense[defense['Team'] != defense['RusherTeam']][['GameId','PlayId','X','Y','RusherX','RusherY']]
        defense['def_dist_to_back'] = defense[['X','Y','RusherX','RusherY']].apply(lambda x: euclidean_distance(x[0],x[1],x[2],x[3]), axis=1)

        defense = defense.groupby(['GameId','PlayId'])\
                         .agg({'def_dist_to_back':['min','max','mean','std']})\
                         .reset_index()
        defense.columns = ['GameId','PlayId','def_min_dist','def_max_dist','def_mean_dist','def_std_dist']


        return defense

    def static_features(df):
        static_features = df[df['NflId'] == df['NflIdRusher']][['GameId','PlayId','X','Y','S','A','Dis','Orientation','Dir','Quarter', 'Down',
                                                            'YardLine','Distance','DefendersInTheBox']].drop_duplicates()
        static_features['DefendersInTheBox'] = static_features['DefendersInTheBox'].fillna(np.mean(static_features['DefendersInTheBox']))

        return static_features
    
    def other_features(df):
        otf = df[['GameId','PlayId','NflId','X','Y', 'Team', 'A', 'PossessionTeam', 'S', 'NflIdRusher', 
                  'OffensePersonnel', 'DefensePersonnel', 'Dir', 'Dis', 'PlayerWeight', 'PlayerHeight',
                 'GameWeather', 'TimeHandoff', 'TimeSnap', 'PlayerBirthDate', 'StadiumType']]
        
        otf['IsRusher'] = (otf['NflId'] == otf['NflIdRusher'])
        otf['OffenseTeam'] = np.repeat(otf.loc[otf['IsRusher']==True, 'Team'].values, 22)
        

        
        otf['IsOnOffense'] = (otf['Team'] == otf['OffenseTeam'])
        otf['IsOnOffense'] = otf['IsOnOffense'].apply(int)



        
        average_offense_x = np.mean(otf.loc[otf['IsOnOffense'] == 1, 'X'].values.reshape((-1, 11)), axis=1)
        try:
            otf['OffenseCentroid_X'] = np.repeat(average_offense_x, 22)
        except:
            print(average_offense_x)
            print(otf.shape)
            print(otf.head())

        average_defense_x = np.mean(otf.loc[otf['IsOnOffense'] == 0, 'X'].values.reshape((-1, 11)), axis=1)
        otf['DefenseCentroid_X'] = np.repeat(average_defense_x, 22)\

        average_offense_y = np.mean(otf.loc[otf['IsOnOffense'] == 1, 'Y'].values.reshape((-1, 11)), axis=1)
        otf['OffenseCentroid_Y'] = np.repeat(average_offense_x, 22)

        average_defense_x = np.mean(otf.loc[otf['IsOnOffense'] == 0, 'Y'].values.reshape((-1, 11)), axis=1)
        otf['DefenseCentroid_Y'] = np.repeat(average_defense_x, 22)

      

        def find_rusher_distance_to_ocent(row):

            return np.sqrt((row['X'] - row['OffenseCentroid_X'])**2 + (row['Y'] - row['OffenseCentroid_Y'])**2)

        def find_rusher_distance_to_dcent(row):
            return np.sqrt((row['X'] - row['DefenseCentroid_X'])**2 + (row['Y'] - row['DefenseCentroid_Y'])**2)

        rusher_centroids = otf.loc[otf['IsRusher'] == True, ['X', 'Y', 'OffenseCentroid_X','OffenseCentroid_Y','DefenseCentroid_X','DefenseCentroid_Y' ]]
        otf['runner_distance_to_ocent'] = np.repeat(rusher_centroids.apply(find_rusher_distance_to_ocent, axis=1).values, 22)
        otf['runner_distance_to_dcent'] = np.repeat(rusher_centroids.apply(find_rusher_distance_to_dcent, axis=1).values, 22)
       
        otf['time_to_tackle'] = 0
        
        otf['S'] = otf['S'].replace(to_replace=0, value=otf['S'].mean())
        
        runner_speeds = otf.loc[otf['IsRusher'] == True,'S']
        otf['RunnerSpeed'] =  np.repeat(np.array(runner_speeds),22)

        otf['RunnerSpeed'] = otf['RunnerSpeed'].replace(to_replace=0, value=otf['RunnerSpeed'].mean())
        
        otf['Rusher_X'] =  np.repeat(rusher_centroids['X'].values,22)
        otf['Rusher_Y'] =  np.repeat(rusher_centroids['Y'].values,22)

        
        def find_distance_from_runner(row):
            return np.sqrt((row['X'] - row['Rusher_X'])**2 + (row['Y'] - row['Rusher_Y'])**2)

        otf['distance_from_runner'] = otf.apply(find_distance_from_runner, axis=1)

        otf.loc[otf['IsOnOffense']==0, 'time_to_tackle'] = otf[otf['IsOnOffense']==0].apply(lambda row: row['distance_from_runner']/(row['S'] + row['RunnerSpeed']), axis=1)
        otf['min_time_to_tackle'] = np.repeat(np.min(otf.loc[otf['IsOnOffense']==0, 'time_to_tackle'].values.reshape(-1, 11), axis=1), 22)
        otf['average_time_to_tackle'] = np.repeat(np.mean(otf.loc[otf['IsOnOffense']==0, 'time_to_tackle'].values.reshape(-1, 11), axis=1), 22)
        
        defense_x_indexes = np.where(otf['IsOnOffense'] == 0)[0][::11]
        
        closest_defendor_indexes = np.argmin(otf.loc[otf['IsOnOffense'] == 0,'distance_from_runner'].values.reshape(-1,11), axis=1) + defense_x_indexes
        otf['IsClosestDefendor'] = 0
        otf.loc[closest_defendor_indexes, 'IsClosestDefendor'] = 1

        runner_speeds = otf.loc[otf['IsRusher'] == True,'S']
        firstdefensor_speeds = otf.loc[otf['IsClosestDefendor'] == 1,'S']
        firstdefensor_speeds.replace([np.nan, 0], 1)
        otf['runner_vs_1stdefensor_speed'] = np.repeat(runner_speeds.values / firstdefensor_speeds.values ,22)
        otf['runner_vs_1stdefensor_speed'].fillna(np.mean(otf['runner_vs_1stdefensor_speed'].values))
        

        
        
        otf['off_average_acc'] = np.repeat(np.mean(otf.loc[otf['IsOnOffense']==1, 'A'].values.reshape(-1, 11), axis=1), 22)
        otf['off_std_acc'] = np.repeat(np.std(otf.loc[otf['IsOnOffense']==1, 'A'].values.reshape(-1, 11), axis=1), 22)
        otf['def_average_acc'] = np.repeat(np.mean(otf.loc[otf['IsOnOffense']==0, 'A'].values.reshape(-1, 11), axis=1), 22)
        otf['def_std_acc'] = np.repeat(np.std(otf.loc[otf['IsOnOffense']==0, 'A'].values.reshape(-1, 11), axis=1), 22)
        
        otf['off_average_speed'] = np.repeat(np.mean(otf.loc[otf['IsOnOffense']==1, 'S'].values.reshape(-1, 11), axis=1), 22)
        otf['def_average_speed'] = np.repeat(np.mean(otf.loc[otf['IsOnOffense']==0, 'S'].values.reshape(-1, 11), axis=1), 22)
        
        runner_speeds = otf.loc[otf['IsRusher'] == True,'A']
        otf['1stdefensor_acc'] = np.repeat(otf.loc[otf['IsClosestDefendor'] == True,'A'].values, 22)
        firstdefensor_speeds = otf.loc[otf['IsClosestDefendor'] == 1,'A']
        firstdefensor_speeds.replace([np.nan, 0], 1, inplace=True)
        otf['runner_vs_1stdefensor_acc'] = np.repeat(runner_speeds.values / firstdefensor_speeds.values ,22)
        otf['runner_vs_1stdefensor_acc'].replace([np.nan, np.inf, -np.inf], 999, inplace=True)
     
     
        ###
        
        otf['1stdefensor_dir'] = np.repeat(otf.loc[otf['IsClosestDefendor'] == True,'Dir'].values, 22)
        
        otf['fe1'] = pd.Series(np.sqrt(np.absolute(np.square(otf.X.values) - np.square(otf.Y.values))))
        otf['fe5'] = np.square(otf['S'].values) + 2 * otf['A'].values * otf['Dis'].values  # N
        otf['fe7'] = np.arccos(np.clip(otf['X'].values / otf['Y'].values, -1, 1))  # N
        otf['fe8'] = otf['S'].values / np.clip(otf['fe1'].values, 0.6, None)
        radian_angle = (90 - otf['Dir']) * np.pi / 180.0
        otf['fe10'] = np.abs(otf['S'] * np.cos(radian_angle))
        otf['fe11'] = np.abs(otf['S'] * np.sin(radian_angle))
        otf['fe12'] = otf['S']*otf['PlayerWeight']

        features_to_take = ['GameId','PlayId', 
                            'runner_distance_to_ocent', 'runner_distance_to_dcent', 'min_time_to_tackle', 'average_time_to_tackle',
                             'off_average_acc', 'def_average_acc', 'off_average_speed', 'def_average_speed',
                              'runner_vs_1stdefensor_speed' ,
                            'IsRusher'
                            ] + ['fe1', 'fe5', 'fe7', 'fe8' , 'fe10', 'fe11','fe12']
        
    

        other_features = otf[features_to_take]

        return other_features
    


    def combine_features(relative_to_back, defense, static, speed_frame, other_feats, deploy=deploy):
        df = pd.merge(relative_to_back,defense,on=['GameId','PlayId'],how='inner')
        df = pd.merge(df,static,on=['GameId','PlayId'],how='inner')
        df = pd.merge(df, speed_frame, on=['GameId','PlayId'],how='inner')
        df = pd.merge(df, other_feats, on=['GameId','PlayId'],how='inner')
        #f = pd.merge(df, personnel_feats, on=['GameId','PlayId'],how='inner')
        if not deploy:
            df = pd.merge(df, outcomes, on=['GameId','PlayId'], how='inner')

        return df
    
    


    df = projection_features(df)
    speed_frame= df[['GameId','PlayId',
   
                     'a_0','a_1','v_0','v_1', 'Temperature', 'Humidity']]
    
    yardline = update_yardline(df)
    df = update_orientation(df, yardline)


  

                
    other_feats = other_features(df)
    back_feats = back_features(df)
    rel_back = features_relative_to_back(df, back_feats)
    def_feats = defense_features(df)
    static_feats = static_features(df)

    
    basetable = combine_features(rel_back, def_feats, static_feats,  speed_frame, other_feats,  deploy=deploy)
    basetable[['a_0','a_1','v_0','v_1']] = basetable[['a_0','a_1','v_0','v_1']].fillna(0)
    basetable['Temperature'] = basetable['Temperature'].fillna(60.436442)
    basetable['Humidity'] = basetable['Humidity'].fillna(59.0)
    basetable = basetable[basetable['IsRusher'] ==True]
    basetable.drop(['IsRusher'], axis=1, inplace=True)

 

    return basetable

In [5]:
def add_logs(basetable):
   
    log_basetable = pd.DataFrame(np.log(basetable.values), columns=['LOG_' + str(a) for a in basetable.columns])
    log_basetable.replace([np.inf, -np.inf], np.nan, inplace=True)
    #log_basetable.dropna(axis=1, inplace=True)
    log_basetable.fillna(0, inplace=True)



    basetable = pd.concat([pd.DataFrame(basetable.values, columns=basetable.columns), log_basetable], axis=1)
    
    return basetable

In [6]:
%time train_basetable1 = create_features_orig(train)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_gui

CPU times: user 3min 17s, sys: 25.2 s, total: 3min 42s
Wall time: 3min 35s


In [7]:
train_basetable1.fillna(0, inplace=True)

In [8]:
for col in train_basetable1.columns:
    if train_basetable1[col].dtype == 'object':
        train_basetable1.drop(col, axis=1, inplace=True)

In [9]:


%time train_basetable = add_logs(train_basetable1)



  This is separate from the ipykernel package so we can avoid doing imports until
  This is separate from the ipykernel package so we can avoid doing imports until


CPU times: user 1.07 s, sys: 496 ms, total: 1.56 s
Wall time: 1.27 s


In [10]:
X = train_basetable.copy()
yards = X.Yards

y = np.zeros((yards.shape[0], 199))
for idx, target in enumerate(list(yards)):

    y[int(idx)][99 + int(target)] = 1

X.drop(['Yards', 'GameId', 'PlayId', 'LOG_GameId', 'LOG_PlayId','LOG_Yards'], axis=1, inplace=True)
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [11]:
class Metric(Callback):
    def __init__(self, model, callbacks, data):
        super().__init__()
        self.model = model
        self.callbacks = callbacks
        self.data = data

    def on_train_begin(self, logs=None):
        for callback in self.callbacks:
            callback.on_train_begin(logs)

    def on_train_end(self, logs=None):
        for callback in self.callbacks:
            callback.on_train_end(logs)

    def on_epoch_end(self, batch, logs=None):
#         X_train, y_train = self.data[0][0], self.data[0][1]
#         y_pred = self.model.predict(X_train)
#         y_true = np.clip(np.cumsum(y_train, axis=1), 0, 1)
#         y_pred = np.clip(np.cumsum(y_pred, axis=1), 0, 1)
#         tr_s = ((y_true - y_pred) ** 2).sum(axis=1).sum(axis=0) / (199 * X_train.shape[0])
#         tr_s = np.round(tr_s, 6)
        logs['tr_CRPS'] = 0

        a_val,  X_valid,  y_valid = self.data[1][0][0], self.data[1][0][1], self.data[1][1]
        
        y_pred = self.model.predict([a_val, X_valid])
        
        def to_dist(val):
            y_pred = np.zeros((val.shape[0], 199))
            for idx, target in enumerate(list(val)):
                y_pred[int(idx)][99 + int(target):] = 1
            return y_pred
    
        y_true = np.clip(np.cumsum(y_valid, axis=1), 0, 1)
        y_pred = np.clip(np.cumsum(y_pred, axis=1), 0, 1)
        val_s = ((y_true - y_pred) ** 2).sum(axis=1).sum(axis=0) / (199 * X_valid.shape[0])
        val_s = np.round(val_s, 6)
        logs['val_CRPS'] = val_s
        print('tr CRPS', 'Grr', 'val CRPS', val_s)

        for callback in self.callbacks:
            callback.on_epoch_end(batch, logs)

In [12]:
import keras.backend as K
from keras.layers import Input, Embedding, Reshape, Concatenate, Flatten, GaussianNoise
from keras.models import Model

In [13]:
from keras.models import Sequential
from keras import layers
from keras import backend as K


from keras.backend import sigmoid, tanh, softplus
def swish(x, beta = 1):
     return (x * sigmoid(beta * x))
    
def mish(x):
    return(x * tanh(softplus(x)))

from keras.utils.generic_utils import get_custom_objects

get_custom_objects().update({'swish': Activation(swish), 'mish':Activation(mish)})

In [14]:
from keras import regularizers

def get_model0():
    

    
    embeddings = []
    inputs = []
    #OffenseFormation
    input0 = Input(shape=(1,))
    embedding0 = Embedding(7, 3, input_length=1)(input0)
    embedding0 = Reshape(target_shape=(3,))(embedding0)
  
    inputs.append(input0)
    embeddings.append(embedding0)
    




    input4 = Input(shape=(X.shape[1],))
    inputs.append(input4)
    
    x = Concatenate()(inputs)


    x= Dense(512, activation='mish')(x)
    #model.add(ReLU())
    x = BatchNormalization()(x)
    
    x = Dropout(0.68)(x)
    
    x= Dense(256, activation='mish')(x)
    #model.add(ReLU())
    x = BatchNormalization()(x)
    
    x = Dropout(0.68)(x)

    x= Dense(199, activation='mish')(x)
    #model.add(ReLU())
    x = BatchNormalization()(x)
    
    x = Dropout(0.68)(x)


    
    output = Dense(199, activation='softmax')(x)
    
    model = Model(inputs, output)
  
    model.compile(optimizer=Adam(lr=0.002, clipvalue=0.5), loss='categorical_crossentropy', metrics=[])
    
    
   
    return model


K.clear_session()

In [15]:
from sklearn.model_selection import KFold, GroupKFold

In [16]:
models = []
k = 0

rkf = GroupKFold(11)
for fold_id, (tr_idx, vl_idx) in enumerate(rkf.split(train_basetable['GameId'],train_basetable['GameId'],train_basetable['GameId'])):
    print(fold_id)
    print(tr_idx)
    print(vl_idx)
    x_tr, y_tr = X[tr_idx], y[tr_idx]
    x_vl, y_vl = X[vl_idx], y[vl_idx]    
    
    a = train['OffenseFormation'].values[tr_idx]

    a_val = train['OffenseFormation'].values[vl_idx]


    model= eval("get_model" + str((k%1)) + "()")
    es = EarlyStopping(monitor='val_CRPS', 
                   mode='min',
                   restore_best_weights=True, 
                   verbose=1, 
                   patience=5)
    es.set_model(model)
    metric = Metric(model, [es], [([a,x_tr], y_tr), ([ a_val,  x_vl], y_vl)])
    


    model.fit([a,x_tr], y_tr, callbacks=[metric], epochs=250, batch_size=2048, validation_data=[[a_val, x_vl], y_vl])
    models.append(model)
    k+=1

0
[     0      1      2 ... 509759 509760 509761]
[  7172   7173   7174 ... 496713 496714 496715]
Train on 463628 samples, validate on 46134 samples
Epoch 1/250
tr CRPS Grr val CRPS 0.013282
Epoch 2/250
tr CRPS Grr val CRPS 0.013001
Epoch 3/250
tr CRPS Grr val CRPS 0.012884
Epoch 4/250
tr CRPS Grr val CRPS 0.012802
Epoch 5/250
tr CRPS Grr val CRPS 0.012754
Epoch 6/250
tr CRPS Grr val CRPS 0.012712
Epoch 7/250
tr CRPS Grr val CRPS 0.012679
Epoch 8/250
tr CRPS Grr val CRPS 0.012689
Epoch 9/250
tr CRPS Grr val CRPS 0.012683
Epoch 10/250
tr CRPS Grr val CRPS 0.012655
Epoch 11/250
tr CRPS Grr val CRPS 0.012632
Epoch 12/250
tr CRPS Grr val CRPS 0.012659
Epoch 13/250
tr CRPS Grr val CRPS 0.012653
Epoch 14/250
tr CRPS Grr val CRPS 0.012671
Epoch 15/250
tr CRPS Grr val CRPS 0.012669
Epoch 16/250
tr CRPS Grr val CRPS 0.012693
Restoring model weights from the end of the best epoch
Epoch 00016: early stopping
1
[     0      1      2 ... 509759 509760 509761]
[  2112   2113   2114 ... 485317 485318

In [17]:
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor, GradientBoostingClassifier
from sklearn.linear_model import LinearRegression,LogisticRegression
import datetime

In [18]:
X = train_basetable1.copy()
yards = X.Yards

y = np.zeros((yards.shape[0], 199))
for idx, target in enumerate(list(yards)):

    y[int(idx)][99 + int(target)] = 1

In [19]:
X.drop(['Yards', 'GameId', 'PlayId'], axis=1, inplace=True)
scaler1 = StandardScaler()
X = scaler1.fit_transform(X)

In [20]:
def crps(y_true, y_pred):
    y_true = np.clip(np.cumsum(y_true, axis=1), 0, 1)
    y_pred = np.clip(np.cumsum(y_pred, axis=1), 0, 1)
    return ((y_true - y_pred) ** 2).sum(axis=1).sum(axis=0) / (199 * y_true.shape[0]) 

In [21]:
treemodels = []
kf = KFold(n_splits=3, random_state=42)
score = []
for i, (tdx, vdx) in enumerate(kf.split(X, y)):
    print(f'Fold : {i}')
    X_train, X_val, y_train, y_val = X[tdx], X[vdx], y[tdx], y[vdx]

    model = RandomForestRegressor(bootstrap=False, max_features=0.2, min_samples_leaf=260, min_samples_split=7, n_estimators=70, n_jobs=-1, random_state=42)
    model.fit(X_train, y_train)
    score_ = crps(y_val, model.predict(X_val))

    print(score_)
    score.append(score_)
    treemodels.append(model)

print(np.mean(score))

Fold : 0
0.01251392004466005
Fold : 1
0.012557834040438047
Fold : 2
0.014282314572460963
0.013118022885853021


In [22]:
import warnings
warnings.filterwarnings("ignore")

from kaggle.competitions import nflrush
env = nflrush.make_env()
iter_test = env.iter_test()


for (test_df, sample_prediction_df) in iter_test:

    basetable1 = create_features_orig(test_df, deploy=True)
    basetable1.fillna(0, inplace=True)
    basetable = add_logs(basetable1)
    basetable.drop(['GameId', 'PlayId', 'LOG_GameId', 'LOG_PlayId'], axis=1, inplace=True)

    scaled_basetable = scaler.transform(basetable)
    try:
        a = test_df['OffenseFormation'] = test_df['OffenseFormation'].apply(lambda x: of_index[x])
    except:
        test_df['OffenseFormation'] = np.nan
        test_df['OffenseFormation'].fillna(-1, inplace=True)
        a = test_df['OffenseFormation']

  
    p = [m.predict([a,scaled_basetable]) for m in models]
    
    y_pred_nn = (np.array(p)).mean(0)
    #np.expm1(np.log1p(np.array(p)).mean(0))
    
    basetable1.drop(['GameId', 'PlayId'], axis=1, inplace=True)
    scaled_basetable1 = scaler1.transform(basetable1)
    y_pred_tree = np.array([m.predict(scaled_basetable1) for m in treemodels]).mean(0)
    
   
    y_pred = (.85*y_pred_nn+.15*y_pred_tree)
    y_pred = np.clip(np.cumsum(y_pred, axis=1), 0, 1)
    y_pred[:,:90] = 0
    #y_pred[:,122:] = 1
    y_pred = y_pred.tolist()[0]

    preds_df = pd.DataFrame(data=[y_pred], columns=sample_prediction_df.columns)
    env.predict(preds_df)


    
env.write_submission_file()

Your submission file has been saved!  Once you `Commit` your Notebook and it finishes running, you can submit the file to the competition from the Notebook Viewer `Output` tab.
