https://www.kaggle.com/bestpredict/location-eda-8eb410

In [1]:
import os
# os.environ['CUDA_VISIBLE_DEVICES'] = "1"


import numpy as np
import pandas as pd
import sklearn.metrics as mtr
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

from keras.layers import Dense,Input,Flatten,concatenate,Dropout,Lambda,BatchNormalization
from keras.models import Model
import keras.backend as K
from keras.callbacks import Callback
from  keras.callbacks import EarlyStopping,ModelCheckpoint
import datetime

TRAIN_OFFLINE = False


pd.set_option('display.max_columns', 50)
pd.set_option('display.max_rows', 150)

Using TensorFlow backend.


In [2]:
train = pd.read_csv('../input/train.csv', dtype={'WindSpeed': 'object'})

In [3]:
outcomes = train[['GameId','PlayId','Yards']].drop_duplicates()

In [4]:
def strtoseconds(txt):
    txt = txt.split(':')
    ans = int(txt[0])*60 + int(txt[1]) + int(txt[2])/60
    return ans

def strtofloat(x):
    try:
        return float(x)
    except:
        return -1

def map_weather(txt):
    ans = 1
    if pd.isna(txt):
        return 0
    if 'partly' in txt:
        ans*=0.5
    if 'climate controlled' in txt or 'indoor' in txt:
        return ans*3
    if 'sunny' in txt or 'sun' in txt:
        return ans*2
    if 'clear' in txt:
        return ans
    if 'cloudy' in txt:
        return -ans
    if 'rain' in txt or 'rainy' in txt:
        return -2*ans
    if 'snow' in txt:
        return -3*ans
    return 0

def OffensePersonnelSplit(x):
    dic = {'DB' : 0, 'DL' : 0, 'LB' : 0, 'OL' : 0, 'QB' : 0, 'RB' : 0, 'TE' : 0, 'WR' : 0}
    for xx in x.split(","):
        xxs = xx.split(" ")
        dic[xxs[-1]] = int(xxs[-2])
    return dic

def DefensePersonnelSplit(x):
    dic = {'DB' : 0, 'DL' : 0, 'LB' : 0, 'OL' : 0}
    for xx in x.split(","):
        xxs = xx.split(" ")
        dic[xxs[-1]] = int(xxs[-2])
    return dic

def orientation_to_cat(x):
    x = np.clip(x, 0, 360 - 1)
    try:
        return str(int(x/15))
    except:
        return "nan"
def preprocess(train):
    ## GameClock
    train['GameClock_sec'] = train['GameClock'].apply(strtoseconds)
    train["GameClock_minute"] = train["GameClock"].apply(lambda x : x.split(":")[0]).astype("object")

    ## Height
    train['PlayerHeight_dense'] = train['PlayerHeight'].apply(lambda x: 12*int(x.split('-')[0])+int(x.split('-')[1]))

    ## Time
    train['TimeHandoff'] = train['TimeHandoff'].apply(lambda x: datetime.datetime.strptime(x, "%Y-%m-%dT%H:%M:%S.%fZ"))
    train['TimeSnap'] = train['TimeSnap'].apply(lambda x: datetime.datetime.strptime(x, "%Y-%m-%dT%H:%M:%S.%fZ"))

    train['TimeDelta'] = train.apply(lambda row: (row['TimeHandoff'] - row['TimeSnap']).total_seconds(), axis=1)
    train['PlayerBirthDate'] = train['PlayerBirthDate'].apply(lambda x: datetime.datetime.strptime(x, "%m/%d/%Y"))

    ## Age
    seconds_in_year = 60*60*24*365.25
    train['PlayerAge'] = train.apply(lambda row: (row['TimeHandoff']-row['PlayerBirthDate']).total_seconds()/seconds_in_year, axis=1)
    train["PlayerAge_ob"] = train['PlayerAge'].astype(np.int).astype("object")

    ## WindSpeed
    train['WindSpeed_ob'] = train['WindSpeed'].apply(lambda x: x.lower().replace('mph', '').strip() if not pd.isna(x) else x)
    train['WindSpeed_ob'] = train['WindSpeed_ob'].apply(lambda x: (int(x.split('-')[0])+int(x.split('-')[1]))/2 if not pd.isna(x) and '-' in x else x)
    train['WindSpeed_ob'] = train['WindSpeed_ob'].apply(lambda x: (int(x.split()[0])+int(x.split()[-1]))/2 if not pd.isna(x) and type(x)!=float and 'gusts up to' in x else x)
    train['WindSpeed_dense'] = train['WindSpeed_ob'].apply(strtofloat)

    ## Weather
    train['GameWeather_process'] = train['GameWeather'].str.lower()
    train['GameWeather_process'] = train['GameWeather_process'].apply(lambda x: "indoor" if not pd.isna(x) and "indoor" in x else x)
    train['GameWeather_process'] = train['GameWeather_process'].apply(lambda x: x.replace('coudy', 'cloudy').replace('clouidy', 'cloudy').replace('party', 'partly') if not pd.isna(x) else x)
    train['GameWeather_process'] = train['GameWeather_process'].apply(lambda x: x.replace('clear and sunny', 'sunny and clear') if not pd.isna(x) else x)
    train['GameWeather_process'] = train['GameWeather_process'].apply(lambda x: x.replace('skies', '').replace("mostly", "").strip() if not pd.isna(x) else x)
    train['GameWeather_dense'] = train['GameWeather_process'].apply(map_weather)

    ## Rusher
    train['IsRusher'] = (train['NflId'] == train['NflIdRusher'])
    train['IsRusher_ob'] = (train['NflId'] == train['NflIdRusher']).astype("object")
    temp = train[train["IsRusher"]][["Team", "PlayId"]].rename(columns={"Team":"RusherTeam"})
    train = train.merge(temp, on = "PlayId")
    train["IsRusherTeam"] = train["Team"] == train["RusherTeam"]

    ## dense -> categorical
    train["Quarter_ob"] = train["Quarter"].astype("object")
    train["Down_ob"] = train["Down"].astype("object")
    train["JerseyNumber_ob"] = train["JerseyNumber"].astype("object")
    train["YardLine_ob"] = train["YardLine"].astype("object")
    # train["DefendersInTheBox_ob"] = train["DefendersInTheBox"].astype("object")
    # train["Week_ob"] = train["Week"].astype("object")
    # train["TimeDelta_ob"] = train["TimeDelta"].astype("object")


    ## Orientation and Dir
    train["Orientation_ob"] = train["Orientation"].apply(lambda x : orientation_to_cat(x)).astype("object")
    train["Dir_ob"] = train["Dir"].apply(lambda x : orientation_to_cat(x)).astype("object")

    train["Orientation_sin"] = train["Orientation"].apply(lambda x : np.sin(x/360 * 2 * np.pi))
    train["Orientation_cos"] = train["Orientation"].apply(lambda x : np.cos(x/360 * 2 * np.pi))
    train["Dir_sin"] = train["Dir"].apply(lambda x : np.sin(x/360 * 2 * np.pi))
    train["Dir_cos"] = train["Dir"].apply(lambda x : np.cos(x/360 * 2 * np.pi))

    ## diff Score
    train["diffScoreBeforePlay"] = train["HomeScoreBeforePlay"] - train["VisitorScoreBeforePlay"]
    train["diffScoreBeforePlay_binary_ob"] = (train["HomeScoreBeforePlay"] > train["VisitorScoreBeforePlay"]).astype("object")

    ## Turf
    Turf = {'Field Turf':'Artificial', 'A-Turf Titan':'Artificial', 'Grass':'Natural', 'UBU Sports Speed S5-M':'Artificial', 'Artificial':'Artificial', 'DD GrassMaster':'Artificial', 'Natural Grass':'Natural', 'UBU Speed Series-S5-M':'Artificial', 'FieldTurf':'Artificial', 'FieldTurf 360':'Artificial', 'Natural grass':'Natural', 'grass':'Natural', 'Natural':'Natural', 'Artifical':'Artificial', 'FieldTurf360':'Artificial', 'Naturall Grass':'Natural', 'Field turf':'Artificial', 'SISGrass':'Artificial', 'Twenty-Four/Seven Turf':'Artificial', 'natural grass':'Natural'} 
    train['Turf'] = train['Turf'].map(Turf)

    ## OffensePersonnel
    temp = train["OffensePersonnel"].iloc[np.arange(0, len(train), 22)].apply(lambda x : pd.Series(OffensePersonnelSplit(x)))
    temp.columns = ["Offense" + c for c in temp.columns]
    temp["PlayId"] = train["PlayId"].iloc[np.arange(0, len(train), 22)]
    train = train.merge(temp, on = "PlayId")

    ## DefensePersonnel
    temp = train["DefensePersonnel"].iloc[np.arange(0, len(train), 22)].apply(lambda x : pd.Series(DefensePersonnelSplit(x)))
    temp.columns = ["Defense" + c for c in temp.columns]
    temp["PlayId"] = train["PlayId"].iloc[np.arange(0, len(train), 22)]
    train = train.merge(temp, on = "PlayId")

    ## sort
#     train = train.sort_values(by = ['X']).sort_values(by = ['Dis']).sort_values(by=['PlayId', 'Team', 'IsRusher']).reset_index(drop = True)
    train = train.sort_values(by = ['X']).sort_values(by = ['Dis']).sort_values(by=['PlayId', 'IsRusherTeam', 'IsRusher']).reset_index(drop = True)
    return train

In [5]:
def create_features(df, deploy=False):
    def new_X(x_coordinate, play_direction):
        if play_direction == 'left':
            return 120.0 - x_coordinate
        else:
            return x_coordinate

    def new_line(rush_team, field_position, yardline):
        if rush_team == field_position:
            # offense starting at X = 0 plus the 10 yard endzone plus the line of scrimmage
            return 10.0 + yardline
        else:
            # half the field plus the yards between midfield and the line of scrimmage
            return 60.0 + (50 - yardline)

    def new_orientation(angle, play_direction):
        if play_direction == 'left':
            new_angle = 360.0 - angle
            if new_angle == 360.0:
                new_angle = 0.0
            return new_angle
        else:
            return angle

    def euclidean_distance(x1,y1,x2,y2):
        x_diff = (x1-x2)**2
        y_diff = (y1-y2)**2

        return np.sqrt(x_diff + y_diff)

    def back_direction(orientation):
        if orientation > 180.0:
            return 1
        else:
            return 0

    def update_yardline(df):
        new_yardline = df[df['NflId'] == df['NflIdRusher']]
        new_yardline['YardLine'] = new_yardline[['PossessionTeam','FieldPosition','YardLine']].apply(lambda x: new_line(x[0],x[1],x[2]), axis=1)
        new_yardline = new_yardline[['GameId','PlayId','YardLine']]

        return new_yardline

    def update_orientation(df, yardline):
        df['X'] = df[['X','PlayDirection']].apply(lambda x: new_X(x[0],x[1]), axis=1)
        df['Orientation'] = df[['Orientation','PlayDirection']].apply(lambda x: new_orientation(x[0],x[1]), axis=1)
        df['Dir'] = df[['Dir','PlayDirection']].apply(lambda x: new_orientation(x[0],x[1]), axis=1)

        df = df.drop('YardLine', axis=1)
        df = pd.merge(df, yardline, on=['GameId','PlayId'], how='inner')

        return df

    def back_features(df):
        carriers = df[df['NflId'] == df['NflIdRusher']][['GameId','PlayId','NflIdRusher','X','Y','Orientation','Dir','YardLine']]
        carriers['back_from_scrimmage'] = carriers['YardLine'] - carriers['X']
        carriers['back_oriented_down_field'] = carriers['Orientation'].apply(lambda x: back_direction(x))
        carriers['back_moving_down_field'] = carriers['Dir'].apply(lambda x: back_direction(x))
        carriers = carriers.rename(columns={'X':'back_X',
                                            'Y':'back_Y'})
        carriers = carriers[['GameId','PlayId','NflIdRusher','back_X','back_Y','back_from_scrimmage','back_oriented_down_field','back_moving_down_field']]

        return carriers

    def features_relative_to_back(df, carriers):
        player_distance = df[['GameId','PlayId','NflId','X','Y']]
        player_distance = pd.merge(player_distance, carriers, on=['GameId','PlayId'], how='inner')
        player_distance = player_distance[player_distance['NflId'] != player_distance['NflIdRusher']]
        player_distance['dist_to_back'] = player_distance[['X','Y','back_X','back_Y']].apply(lambda x: euclidean_distance(x[0],x[1],x[2],x[3]), axis=1)

        player_distance = player_distance.groupby(['GameId','PlayId','back_from_scrimmage','back_oriented_down_field','back_moving_down_field'])\
                                         .agg({'dist_to_back':['min','max','mean','std']})\
                                         .reset_index()
        player_distance.columns = ['GameId','PlayId','back_from_scrimmage','back_oriented_down_field','back_moving_down_field',
                                   'min_dist','max_dist','mean_dist','std_dist']

        return player_distance

    def defense_features(df):
        rusher = df[df['NflId'] == df['NflIdRusher']][['GameId','PlayId','Team','X','Y']]
        rusher.columns = ['GameId','PlayId','RusherTeam','RusherX','RusherY']

        defense = pd.merge(df,rusher,on=['GameId','PlayId'],how='inner')
        defense = defense[defense['Team'] != defense['RusherTeam']][['GameId','PlayId','X','Y','RusherX','RusherY']]
        defense['def_dist_to_back'] = defense[['X','Y','RusherX','RusherY']].apply(lambda x: euclidean_distance(x[0],x[1],x[2],x[3]), axis=1)

        defense = defense.groupby(['GameId','PlayId'])\
                         .agg({'def_dist_to_back':['min','max','mean','std']})\
                         .reset_index()
        defense.columns = ['GameId','PlayId','def_min_dist','def_max_dist','def_mean_dist','def_std_dist']

        return defense

    def static_features(df):
        
        
        add_new_feas = []

        ## Height
        df['PlayerHeight_dense'] = df['PlayerHeight'].apply(lambda x: 12*int(x.split('-')[0])+int(x.split('-')[1]))
        
        add_new_feas.append('PlayerHeight_dense')

        ## Time
        df['TimeHandoff'] = df['TimeHandoff'].apply(lambda x: datetime.datetime.strptime(x, "%Y-%m-%dT%H:%M:%S.%fZ"))
        df['TimeSnap'] = df['TimeSnap'].apply(lambda x: datetime.datetime.strptime(x, "%Y-%m-%dT%H:%M:%S.%fZ"))

        df['TimeDelta'] = df.apply(lambda row: (row['TimeHandoff'] - row['TimeSnap']).total_seconds(), axis=1)
        df['PlayerBirthDate'] =df['PlayerBirthDate'].apply(lambda x: datetime.datetime.strptime(x, "%m/%d/%Y"))

        ## Age
        seconds_in_year = 60*60*24*365.25
        df['PlayerAge'] = df.apply(lambda row: (row['TimeHandoff']-row['PlayerBirthDate']).total_seconds()/seconds_in_year, axis=1)
        add_new_feas.append('PlayerAge')

        ## WindSpeed
        df['WindSpeed_ob'] = df['WindSpeed'].apply(lambda x: x.lower().replace('mph', '').strip() if not pd.isna(x) else x)
        df['WindSpeed_ob'] = df['WindSpeed_ob'].apply(lambda x: (int(x.split('-')[0])+int(x.split('-')[1]))/2 if not pd.isna(x) and '-' in x else x)
        df['WindSpeed_ob'] = df['WindSpeed_ob'].apply(lambda x: (int(x.split()[0])+int(x.split()[-1]))/2 if not pd.isna(x) and type(x)!=float and 'gusts up to' in x else x)
        df['WindSpeed_dense'] = df['WindSpeed_ob'].apply(strtofloat)
        add_new_feas.append('WindSpeed_dense')

        ## Weather
        df['GameWeather_process'] = df['GameWeather'].str.lower()
        df['GameWeather_process'] = df['GameWeather_process'].apply(lambda x: "indoor" if not pd.isna(x) and "indoor" in x else x)
        df['GameWeather_process'] = df['GameWeather_process'].apply(lambda x: x.replace('coudy', 'cloudy').replace('clouidy', 'cloudy').replace('party', 'partly') if not pd.isna(x) else x)
        df['GameWeather_process'] = df['GameWeather_process'].apply(lambda x: x.replace('clear and sunny', 'sunny and clear') if not pd.isna(x) else x)
        df['GameWeather_process'] = df['GameWeather_process'].apply(lambda x: x.replace('skies', '').replace("mostly", "").strip() if not pd.isna(x) else x)
        df['GameWeather_dense'] = df['GameWeather_process'].apply(map_weather)
        add_new_feas.append('GameWeather_dense')
#         ## Rusher
#         train['IsRusher'] = (train['NflId'] == train['NflIdRusher'])
#         train['IsRusher_ob'] = (train['NflId'] == train['NflIdRusher']).astype("object")
#         temp = train[train["IsRusher"]][["Team", "PlayId"]].rename(columns={"Team":"RusherTeam"})
#         train = train.merge(temp, on = "PlayId")
#         train["IsRusherTeam"] = train["Team"] == train["RusherTeam"]

        ## dense -> categorical
#         train["Quarter_ob"] = train["Quarter"].astype("object")
#         train["Down_ob"] = train["Down"].astype("object")
#         train["JerseyNumber_ob"] = train["JerseyNumber"].astype("object")
#         train["YardLine_ob"] = train["YardLine"].astype("object")
        # train["DefendersInTheBox_ob"] = train["DefendersInTheBox"].astype("object")
        # train["Week_ob"] = train["Week"].astype("object")
        # train["TimeDelta_ob"] = train["TimeDelta"].astype("object")


        ## Orientation and Dir
        df["Orientation_ob"] = df["Orientation"].apply(lambda x : orientation_to_cat(x)).astype("object")
        df["Dir_ob"] = df["Dir"].apply(lambda x : orientation_to_cat(x)).astype("object")

        df["Orientation_sin"] = df["Orientation"].apply(lambda x : np.sin(x/360 * 2 * np.pi))
        df["Orientation_cos"] = df["Orientation"].apply(lambda x : np.cos(x/360 * 2 * np.pi))
        df["Dir_sin"] = df["Dir"].apply(lambda x : np.sin(x/360 * 2 * np.pi))
        df["Dir_cos"] = df["Dir"].apply(lambda x : np.cos(x/360 * 2 * np.pi))
        add_new_feas.append("Dir_sin")
        add_new_feas.append("Dir_cos")

        ## diff Score
        df["diffScoreBeforePlay"] = df["HomeScoreBeforePlay"] - df["VisitorScoreBeforePlay"]
        add_new_feas.append("diffScoreBeforePlay")
    
    
        static_features = df[df['NflId'] == df['NflIdRusher']][add_new_feas+['GameId','PlayId','X','Y','S','A','Dis','Orientation','Dir',
                                                            'YardLine','Quarter','Down','Distance','DefendersInTheBox']].drop_duplicates()
#         static_features['DefendersInTheBox'] = static_features['DefendersInTheBox'].fillna(np.mean(static_features['DefendersInTheBox']))
        static_features.fillna(-999,inplace=True)
#         for i in add_new_feas:
#             static_features[i] = static_features[i].fillna(np.mean(static_features[i]))
            

        return static_features


    def combine_features(relative_to_back, defense, static, deploy=deploy):
        df = pd.merge(relative_to_back,defense,on=['GameId','PlayId'],how='inner')
        df = pd.merge(df,static,on=['GameId','PlayId'],how='inner')

        if not deploy:
            df = pd.merge(df, outcomes, on=['GameId','PlayId'], how='inner')

        return df
    
    yardline = update_yardline(df)
    df = update_orientation(df, yardline)
    back_feats = back_features(df)
    rel_back = features_relative_to_back(df, back_feats)
    def_feats = defense_features(df)
    static_feats = static_features(df)
    basetable = combine_features(rel_back, def_feats, static_feats, deploy=deploy)
    
    return basetable

In [6]:
%time train_basetable = create_features(train, False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Wall time: 2min 30s


In [14]:
train_basetable

Unnamed: 0,GameId,PlayId,back_from_scrimmage,back_oriented_down_field,back_moving_down_field,min_dist,max_dist,mean_dist,std_dist,def_min_dist,def_max_dist,def_mean_dist,def_std_dist,PlayerHeight_dense,PlayerAge,WindSpeed_dense,GameWeather_dense,Dir_sin,Dir_cos,diffScoreBeforePlay,X,Y,S,A,Dis,Orientation,Dir,YardLine,Quarter,Down,Distance,DefendersInTheBox,Yards
0,2017090700,20170907000118,3.75,1,0,1.449724,22.415872,8.046559,4.873845,4.593310,22.415872,9.752491,5.327299,70,25.596251,8.0,1.0,0.911690,-0.410878,0,41.25,30.53,3.63,3.35,0.38,198.02,114.26,45.0,1,3,2,6.0,8
1,2017090700,20170907000139,4.07,0,0,0.792023,23.025872,8.614623,5.598683,4.287773,23.025872,10.297028,5.833217,70,25.596252,8.0,1.0,0.740805,0.671721,0,48.93,27.16,3.06,2.41,0.34,149.30,47.80,53.0,1,1,10,6.0,3
2,2017090700,20170907000189,3.66,1,0,1.646390,20.726285,8.482583,4.642121,4.221670,20.726285,9.903689,5.073290,70,25.596253,8.0,1.0,0.668612,-0.743612,0,71.34,19.11,5.77,2.42,0.60,219.18,138.04,75.0,1,1,10,7.0,5
3,2017090700,20170907000345,3.53,0,0,0.918096,9.791231,5.549379,1.983128,4.528002,9.791231,6.309354,1.834174,71,26.852933,8.0,1.0,0.995496,0.094803,0,104.47,25.36,4.45,3.20,0.46,173.78,84.56,108.0,1,2,2,9.0,2
4,2017090700,20170907000395,5.01,0,0,0.502892,21.214806,9.168819,5.611232,4.288088,21.214806,11.056456,5.900009,71,22.091819,8.0,1.0,0.375901,-0.926660,7,29.99,27.12,3.90,2.53,0.44,34.27,157.92,35.0,1,1,10,7.0,7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23166,2018123015,20181230153910,4.57,0,0,0.575847,19.341419,6.736412,4.514974,2.942329,19.341419,8.243270,5.075063,73,27.041779,5.0,-1.0,0.938312,0.345790,3,70.43,32.27,3.99,3.38,0.39,72.56,69.77,75.0,4,1,10,8.0,1
23167,2018123015,20181230154035,4.00,0,0,1.151217,25.500196,9.827962,7.474030,2.808932,25.500196,11.779213,7.739310,69,25.861781,5.0,-1.0,0.485115,-0.874450,0,31.00,30.99,4.18,2.95,0.40,122.97,150.98,35.0,4,1,10,6.0,4
23168,2018123015,20181230154082,4.07,0,0,0.776209,19.454503,8.272970,5.548527,3.595998,19.454503,8.825731,5.317643,69,25.861783,5.0,-1.0,0.659346,0.751840,0,39.93,29.20,4.67,3.68,0.45,63.11,41.25,44.0,4,3,1,7.0,4
23169,2018123015,20181230154135,3.81,0,0,0.584637,19.113474,8.202171,5.833721,4.090880,19.113474,9.062557,5.568974,71,24.290258,5.0,-1.0,0.659215,0.751955,0,81.19,23.75,4.23,2.43,0.41,66.34,41.24,85.0,4,1,10,7.0,2


In [7]:
X = train_basetable.copy()
yards = X.Yards

y = np.zeros((yards.shape[0], 199))
for idx, target in enumerate(list(yards)):
    y[idx][99 + target] = 1

X.drop(['GameId','PlayId','Yards'], axis=1, inplace=True)

In [8]:
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [9]:
from keras.layers import Dense,Input,Flatten,concatenate,Dropout,Lambda
from keras.models import Model
import keras.backend as K
import re
from keras.losses import binary_crossentropy
from  keras.callbacks import EarlyStopping,ModelCheckpoint
import codecs

from keras.utils import to_categorical
from keras.callbacks import EarlyStopping, ModelCheckpoint, Callback
from sklearn.metrics import f1_score




class CRPSCallback(Callback):
    
    def __init__(self,validation, predict_batch_size=20, include_on_batch=False):
        super(CRPSCallback, self).__init__()
        self.validation = validation
        self.predict_batch_size = predict_batch_size
        self.include_on_batch = include_on_batch
        
        print('validation shape',len(self.validation))

    def on_batch_begin(self, batch, logs={}):
        pass

    def on_train_begin(self, logs={}):
        if not ('CRPS_score_val' in self.params['metrics']):
            self.params['metrics'].append('CRPS_score_val')

    def on_batch_end(self, batch, logs={}):
        if (self.include_on_batch):
            logs['CRPS_score_val'] = float('-inf')

    def on_epoch_end(self, epoch, logs={}):
        logs['CRPS_score_val'] = float('-inf')
            
        if (self.validation):
            X_valid, y_valid = self.validation[0], self.validation[1]
            y_pred = self.model.predict(X_valid)
            y_true = np.clip(np.cumsum(y_valid, axis=1), 0, 1)
            y_pred = np.clip(np.cumsum(y_pred, axis=1), 0, 1)
            val_s = ((y_true - y_pred) ** 2).sum(axis=1).sum(axis=0) / (199 * X_valid.shape[0])
            val_s = np.round(val_s, 6)
            logs['CRPS_score_val'] = val_s

In [10]:
def get_model(x_tr,y_tr,x_val,y_val):
    inp = Input(shape = (x_tr.shape[1],))
    x = Dense(1024, input_dim=X.shape[1], activation='relu')(inp)
    x = Dropout(0.5)(x)
    x = BatchNormalization()(x)
    x = Dense(512, activation='relu')(x)
    x = Dropout(0.5)(x)
    x = BatchNormalization()(x)
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.5)(x)
    x = BatchNormalization()(x)
    
    out = Dense(199, activation='softmax')(x)
    model = Model(inp,out)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=[])
    #add lookahead
#     lookahead = Lookahead(k=5, alpha=0.5) # Initialize Lookahead
#     lookahead.inject(model) # add into model

    
    es = EarlyStopping(monitor='CRPS_score_val', 
                       mode='min',
                       restore_best_weights=True, 
                       verbose=1, 
                       patience=10)

    mc = ModelCheckpoint('best_model.h5',monitor='CRPS_score_val',mode='min',
                                   save_best_only=True, verbose=1, save_weights_only=True)
    
    bsz = 1024
    steps = x_tr.shape[0]/bsz
    


    model.fit(x_tr, y_tr,callbacks=[CRPSCallback(validation = (x_val,y_val)),es,mc], epochs=100, batch_size=bsz,verbose=1)
    model.load_weights("best_model.h5")
    
    y_pred = model.predict(x_val)
    y_valid = y_val
    y_true = np.clip(np.cumsum(y_valid, axis=1), 0, 1)
    y_pred = np.clip(np.cumsum(y_pred, axis=1), 0, 1)
    val_s = ((y_true - y_pred) ** 2).sum(axis=1).sum(axis=0) / (199 * x_val.shape[0])
    crps = np.round(val_s, 6)

    return model,crps

In [12]:
X

array([[-1.25301814,  1.67362393, -0.18639474, ...,  2.19167184,
        -1.6676964 , -0.07074881],
       [-0.91178801, -0.5975058 , -0.18639474, ..., -0.77134982,
         0.44022727, -0.07074881],
       [-1.34898912,  1.67362393, -0.18639474, ..., -0.77134982,
         0.44022727,  0.01629631],
       ...,
       [-0.91178801, -0.5975058 , -0.18639474, ...,  2.19167184,
        -1.93118686,  0.01629631],
       [-1.18903749, -0.5975058 , -0.18639474, ..., -0.77134982,
         0.44022727,  0.01629631],
       [ 1.35952508, -0.5975058 , -0.18639474, ...,  0.71016101,
        -0.08675365,  0.19038655]])

In [11]:
from sklearn.model_selection import train_test_split, KFold
import time

losses = []
models = []
crps_csv = []

s_time = time.time()


for k in range(2):
    kfold = KFold(10, random_state = 42 + k, shuffle = True)
    for k_fold, (tr_inds, val_inds) in enumerate(kfold.split(yards)):
        print("-----------")
        print("-----------")
        tr_x,tr_y = X[tr_inds],y[tr_inds]
        val_x,val_y = X[val_inds],y[val_inds]
        model,crps = get_model(tr_x,tr_y,val_x,val_y)
        models.append(model)
        print("the %d fold crps is %f"%((k_fold+1),crps))
        crps_csv.append(crps)
 
print("mean crps is %f"%np.mean(crps_csv))


def predict(x_te):
    model_num = len(models)
    for k,m in enumerate(models):
        if k==0:
            y_pred = m.predict(x_te,batch_size=1024)
        else:
            y_pred+=m.predict(x_te,batch_size=1024)
            
    y_pred = y_pred / model_num
    
    return y_pred

-----------
-----------
validation shape 2
Epoch 1/100

Epoch 00001: CRPS_score_val improved from inf to 0.08193, saving model to best_model.h5
Epoch 2/100

Epoch 00002: CRPS_score_val improved from 0.08193 to 0.07675, saving model to best_model.h5
Epoch 3/100

Epoch 00003: CRPS_score_val improved from 0.07675 to 0.05974, saving model to best_model.h5
Epoch 4/100

Epoch 00004: CRPS_score_val improved from 0.05974 to 0.03558, saving model to best_model.h5
Epoch 5/100

Epoch 00005: CRPS_score_val improved from 0.03558 to 0.02150, saving model to best_model.h5
Epoch 6/100

Epoch 00006: CRPS_score_val improved from 0.02150 to 0.01625, saving model to best_model.h5
Epoch 7/100

Epoch 00007: CRPS_score_val improved from 0.01625 to 0.01445, saving model to best_model.h5
Epoch 8/100

Epoch 00008: CRPS_score_val improved from 0.01445 to 0.01381, saving model to best_model.h5
Epoch 9/100

Epoch 00009: CRPS_score_val improved from 0.01381 to 0.01354, saving model to best_model.h5
Epoch 10/100

Ep

Epoch 00047: CRPS_score_val did not improve from 0.01289
Epoch 48/100

Epoch 00048: CRPS_score_val improved from 0.01289 to 0.01287, saving model to best_model.h5
Epoch 49/100

Epoch 00049: CRPS_score_val did not improve from 0.01287
Epoch 50/100

Epoch 00050: CRPS_score_val did not improve from 0.01287
Epoch 51/100

Epoch 00051: CRPS_score_val did not improve from 0.01287
Epoch 52/100

Epoch 00052: CRPS_score_val did not improve from 0.01287
Epoch 53/100

Epoch 00053: CRPS_score_val did not improve from 0.01287
Epoch 54/100

Epoch 00054: CRPS_score_val did not improve from 0.01287
Epoch 55/100

Epoch 00055: CRPS_score_val did not improve from 0.01287
Epoch 56/100

Epoch 00056: CRPS_score_val did not improve from 0.01287
Epoch 57/100

Epoch 00057: CRPS_score_val did not improve from 0.01287
Epoch 58/100
Restoring model weights from the end of the best epoch

Epoch 00058: CRPS_score_val did not improve from 0.01287
Epoch 00058: early stopping
the 1 fold crps is 0.012875
-----------
----

Epoch 00035: CRPS_score_val improved from 0.01279 to 0.01278, saving model to best_model.h5
Epoch 36/100

Epoch 00036: CRPS_score_val did not improve from 0.01278
Epoch 37/100

Epoch 00037: CRPS_score_val did not improve from 0.01278
Epoch 38/100

Epoch 00038: CRPS_score_val did not improve from 0.01278
Epoch 39/100

Epoch 00039: CRPS_score_val improved from 0.01278 to 0.01277, saving model to best_model.h5
Epoch 40/100

Epoch 00040: CRPS_score_val improved from 0.01277 to 0.01276, saving model to best_model.h5
Epoch 41/100

Epoch 00041: CRPS_score_val did not improve from 0.01276
Epoch 42/100

Epoch 00042: CRPS_score_val improved from 0.01276 to 0.01276, saving model to best_model.h5
Epoch 43/100

Epoch 00043: CRPS_score_val did not improve from 0.01276
Epoch 44/100

Epoch 00044: CRPS_score_val did not improve from 0.01276
Epoch 45/100

Epoch 00045: CRPS_score_val improved from 0.01276 to 0.01275, saving model to best_model.h5
Epoch 46/100

Epoch 00046: CRPS_score_val improved from 0.


Epoch 00087: CRPS_score_val did not improve from 0.01266
Epoch 88/100
Restoring model weights from the end of the best epoch

Epoch 00088: CRPS_score_val did not improve from 0.01266
Epoch 00088: early stopping
the 2 fold crps is 0.012661
-----------
-----------
validation shape 2
Epoch 1/100

Epoch 00001: CRPS_score_val improved from inf to 0.08190, saving model to best_model.h5
Epoch 2/100

Epoch 00002: CRPS_score_val improved from 0.08190 to 0.07546, saving model to best_model.h5
Epoch 3/100

Epoch 00003: CRPS_score_val improved from 0.07546 to 0.05609, saving model to best_model.h5
Epoch 4/100

Epoch 00004: CRPS_score_val improved from 0.05609 to 0.03453, saving model to best_model.h5
Epoch 5/100

Epoch 00005: CRPS_score_val improved from 0.03453 to 0.02232, saving model to best_model.h5
Epoch 6/100

Epoch 00006: CRPS_score_val improved from 0.02232 to 0.01686, saving model to best_model.h5
Epoch 7/100

Epoch 00007: CRPS_score_val improved from 0.01686 to 0.01449, saving model to 


Epoch 00046: CRPS_score_val did not improve from 0.01273
Epoch 47/100

Epoch 00047: CRPS_score_val did not improve from 0.01273
Epoch 48/100

Epoch 00048: CRPS_score_val improved from 0.01273 to 0.01272, saving model to best_model.h5
Epoch 49/100

Epoch 00049: CRPS_score_val did not improve from 0.01272
Epoch 50/100

Epoch 00050: CRPS_score_val did not improve from 0.01272
Epoch 51/100

Epoch 00051: CRPS_score_val did not improve from 0.01272
Epoch 52/100

Epoch 00052: CRPS_score_val did not improve from 0.01272
Epoch 53/100

Epoch 00053: CRPS_score_val did not improve from 0.01272
Epoch 54/100

Epoch 00054: CRPS_score_val did not improve from 0.01272
Epoch 55/100

Epoch 00055: CRPS_score_val did not improve from 0.01272
Epoch 56/100

Epoch 00056: CRPS_score_val did not improve from 0.01272
Epoch 57/100

Epoch 00057: CRPS_score_val did not improve from 0.01272
Epoch 58/100
Restoring model weights from the end of the best epoch

Epoch 00058: CRPS_score_val did not improve from 0.01272


Epoch 00035: CRPS_score_val did not improve from 0.01284
Epoch 36/100

Epoch 00036: CRPS_score_val did not improve from 0.01284
Epoch 37/100

Epoch 00037: CRPS_score_val improved from 0.01284 to 0.01284, saving model to best_model.h5
Epoch 38/100

Epoch 00038: CRPS_score_val did not improve from 0.01284
Epoch 39/100

Epoch 00039: CRPS_score_val improved from 0.01284 to 0.01283, saving model to best_model.h5
Epoch 40/100

Epoch 00040: CRPS_score_val did not improve from 0.01283
Epoch 41/100

Epoch 00041: CRPS_score_val did not improve from 0.01283
Epoch 42/100

Epoch 00042: CRPS_score_val did not improve from 0.01283
Epoch 43/100

Epoch 00043: CRPS_score_val improved from 0.01283 to 0.01282, saving model to best_model.h5
Epoch 44/100

Epoch 00044: CRPS_score_val did not improve from 0.01282
Epoch 45/100

Epoch 00045: CRPS_score_val did not improve from 0.01282
Epoch 46/100

Epoch 00046: CRPS_score_val improved from 0.01282 to 0.01282, saving model to best_model.h5
Epoch 47/100

Epoch 00


Epoch 00018: CRPS_score_val improved from 0.01319 to 0.01316, saving model to best_model.h5
Epoch 19/100

Epoch 00019: CRPS_score_val improved from 0.01316 to 0.01313, saving model to best_model.h5
Epoch 20/100

Epoch 00020: CRPS_score_val improved from 0.01313 to 0.01311, saving model to best_model.h5
Epoch 21/100

Epoch 00021: CRPS_score_val improved from 0.01311 to 0.01307, saving model to best_model.h5
Epoch 22/100

Epoch 00022: CRPS_score_val improved from 0.01307 to 0.01307, saving model to best_model.h5
Epoch 23/100

Epoch 00023: CRPS_score_val improved from 0.01307 to 0.01307, saving model to best_model.h5
Epoch 24/100

Epoch 00024: CRPS_score_val improved from 0.01307 to 0.01304, saving model to best_model.h5
Epoch 25/100

Epoch 00025: CRPS_score_val improved from 0.01304 to 0.01303, saving model to best_model.h5
Epoch 26/100

Epoch 00026: CRPS_score_val improved from 0.01303 to 0.01303, saving model to best_model.h5
Epoch 27/100

Epoch 00027: CRPS_score_val did not improve f


Epoch 00069: CRPS_score_val did not improve from 0.01294
Epoch 70/100
Restoring model weights from the end of the best epoch

Epoch 00070: CRPS_score_val did not improve from 0.01294
Epoch 00070: early stopping
the 5 fold crps is 0.012941
-----------
-----------
validation shape 2
Epoch 1/100

Epoch 00001: CRPS_score_val improved from inf to 0.08173, saving model to best_model.h5
Epoch 2/100

Epoch 00002: CRPS_score_val improved from 0.08173 to 0.07575, saving model to best_model.h5
Epoch 3/100

Epoch 00003: CRPS_score_val improved from 0.07575 to 0.05629, saving model to best_model.h5
Epoch 4/100

Epoch 00004: CRPS_score_val improved from 0.05629 to 0.03438, saving model to best_model.h5
Epoch 5/100

Epoch 00005: CRPS_score_val improved from 0.03438 to 0.02175, saving model to best_model.h5
Epoch 6/100

Epoch 00006: CRPS_score_val improved from 0.02175 to 0.01662, saving model to best_model.h5
Epoch 7/100

Epoch 00007: CRPS_score_val improved from 0.01662 to 0.01438, saving model to 

Restoring model weights from the end of the best epoch

Epoch 00046: CRPS_score_val did not improve from 0.01244
Epoch 00046: early stopping
the 6 fold crps is 0.012442
-----------
-----------
validation shape 2
Epoch 1/100

Epoch 00001: CRPS_score_val improved from inf to 0.08228, saving model to best_model.h5
Epoch 2/100

Epoch 00002: CRPS_score_val improved from 0.08228 to 0.07579, saving model to best_model.h5
Epoch 3/100

Epoch 00003: CRPS_score_val improved from 0.07579 to 0.05634, saving model to best_model.h5
Epoch 4/100

Epoch 00004: CRPS_score_val improved from 0.05634 to 0.03307, saving model to best_model.h5
Epoch 5/100

Epoch 00005: CRPS_score_val improved from 0.03307 to 0.02066, saving model to best_model.h5
Epoch 6/100

Epoch 00006: CRPS_score_val improved from 0.02066 to 0.01554, saving model to best_model.h5
Epoch 7/100

Epoch 00007: CRPS_score_val improved from 0.01554 to 0.01364, saving model to best_model.h5
Epoch 8/100

Epoch 00008: CRPS_score_val improved from 0.


Epoch 00048: CRPS_score_val improved from 0.01219 to 0.01219, saving model to best_model.h5
Epoch 49/100

Epoch 00049: CRPS_score_val improved from 0.01219 to 0.01218, saving model to best_model.h5
Epoch 50/100

Epoch 00050: CRPS_score_val did not improve from 0.01218
Epoch 51/100

Epoch 00051: CRPS_score_val did not improve from 0.01218
Epoch 52/100

Epoch 00052: CRPS_score_val did not improve from 0.01218
Epoch 53/100

Epoch 00053: CRPS_score_val did not improve from 0.01218
Epoch 54/100

Epoch 00054: CRPS_score_val did not improve from 0.01218
Epoch 55/100

Epoch 00055: CRPS_score_val improved from 0.01218 to 0.01217, saving model to best_model.h5
Epoch 56/100

Epoch 00056: CRPS_score_val improved from 0.01217 to 0.01217, saving model to best_model.h5
Epoch 57/100

Epoch 00057: CRPS_score_val did not improve from 0.01217
Epoch 58/100

Epoch 00058: CRPS_score_val did not improve from 0.01217
Epoch 59/100

Epoch 00059: CRPS_score_val did not improve from 0.01217
Epoch 60/100

Epoch 0


Epoch 00029: CRPS_score_val did not improve from 0.01311
Epoch 30/100

Epoch 00030: CRPS_score_val did not improve from 0.01311
Epoch 31/100

Epoch 00031: CRPS_score_val did not improve from 0.01311
Epoch 32/100

Epoch 00032: CRPS_score_val did not improve from 0.01311
Epoch 33/100

Epoch 00033: CRPS_score_val did not improve from 0.01311
Epoch 34/100

Epoch 00034: CRPS_score_val did not improve from 0.01311
Epoch 35/100

Epoch 00035: CRPS_score_val did not improve from 0.01311
Epoch 36/100

Epoch 00036: CRPS_score_val improved from 0.01311 to 0.01310, saving model to best_model.h5
Epoch 37/100

Epoch 00037: CRPS_score_val improved from 0.01310 to 0.01306, saving model to best_model.h5
Epoch 38/100

Epoch 00038: CRPS_score_val did not improve from 0.01306
Epoch 39/100

Epoch 00039: CRPS_score_val did not improve from 0.01306
Epoch 40/100

Epoch 00040: CRPS_score_val improved from 0.01306 to 0.01303, saving model to best_model.h5
Epoch 41/100

Epoch 00041: CRPS_score_val did not improv

Epoch 00020: CRPS_score_val improved from 0.01283 to 0.01282, saving model to best_model.h5
Epoch 21/100

Epoch 00021: CRPS_score_val improved from 0.01282 to 0.01279, saving model to best_model.h5
Epoch 22/100

Epoch 00022: CRPS_score_val improved from 0.01279 to 0.01276, saving model to best_model.h5
Epoch 23/100

Epoch 00023: CRPS_score_val did not improve from 0.01276
Epoch 24/100

Epoch 00024: CRPS_score_val improved from 0.01276 to 0.01273, saving model to best_model.h5
Epoch 25/100

Epoch 00025: CRPS_score_val improved from 0.01273 to 0.01272, saving model to best_model.h5
Epoch 26/100

Epoch 00026: CRPS_score_val improved from 0.01272 to 0.01271, saving model to best_model.h5
Epoch 27/100

Epoch 00027: CRPS_score_val improved from 0.01271 to 0.01268, saving model to best_model.h5
Epoch 28/100

Epoch 00028: CRPS_score_val did not improve from 0.01268
Epoch 29/100

Epoch 00029: CRPS_score_val did not improve from 0.01268
Epoch 30/100

Epoch 00030: CRPS_score_val improved from 0.0


Epoch 00019: CRPS_score_val improved from 0.01374 to 0.01372, saving model to best_model.h5
Epoch 20/100

Epoch 00020: CRPS_score_val did not improve from 0.01372
Epoch 21/100

Epoch 00021: CRPS_score_val improved from 0.01372 to 0.01369, saving model to best_model.h5
Epoch 22/100

Epoch 00022: CRPS_score_val improved from 0.01369 to 0.01367, saving model to best_model.h5
Epoch 23/100

Epoch 00023: CRPS_score_val improved from 0.01367 to 0.01366, saving model to best_model.h5
Epoch 24/100

Epoch 00024: CRPS_score_val did not improve from 0.01366
Epoch 25/100

Epoch 00025: CRPS_score_val improved from 0.01366 to 0.01363, saving model to best_model.h5
Epoch 26/100

Epoch 00026: CRPS_score_val did not improve from 0.01363
Epoch 27/100

Epoch 00027: CRPS_score_val improved from 0.01363 to 0.01362, saving model to best_model.h5
Epoch 28/100

Epoch 00028: CRPS_score_val improved from 0.01362 to 0.01361, saving model to best_model.h5
Epoch 29/100

Epoch 00029: CRPS_score_val did not improve 


Epoch 00004: CRPS_score_val improved from 0.05602 to 0.03253, saving model to best_model.h5
Epoch 5/100

Epoch 00005: CRPS_score_val improved from 0.03253 to 0.02013, saving model to best_model.h5
Epoch 6/100

Epoch 00006: CRPS_score_val improved from 0.02013 to 0.01553, saving model to best_model.h5
Epoch 7/100

Epoch 00007: CRPS_score_val improved from 0.01553 to 0.01395, saving model to best_model.h5
Epoch 8/100

Epoch 00008: CRPS_score_val improved from 0.01395 to 0.01334, saving model to best_model.h5
Epoch 9/100

Epoch 00009: CRPS_score_val improved from 0.01334 to 0.01307, saving model to best_model.h5
Epoch 10/100

Epoch 00010: CRPS_score_val improved from 0.01307 to 0.01296, saving model to best_model.h5
Epoch 11/100

Epoch 00011: CRPS_score_val improved from 0.01296 to 0.01289, saving model to best_model.h5
Epoch 12/100

Epoch 00012: CRPS_score_val improved from 0.01289 to 0.01283, saving model to best_model.h5
Epoch 13/100

Epoch 00013: CRPS_score_val improved from 0.01283 


Epoch 00052: CRPS_score_val did not improve from 0.01241
Epoch 53/100

Epoch 00053: CRPS_score_val did not improve from 0.01241
Epoch 54/100

Epoch 00054: CRPS_score_val did not improve from 0.01241
Epoch 55/100

Epoch 00055: CRPS_score_val did not improve from 0.01241
Epoch 56/100

Epoch 00056: CRPS_score_val did not improve from 0.01241
Epoch 57/100

Epoch 00057: CRPS_score_val did not improve from 0.01241
Epoch 58/100

Epoch 00058: CRPS_score_val did not improve from 0.01241
Epoch 59/100

Epoch 00059: CRPS_score_val did not improve from 0.01241
Epoch 60/100

Epoch 00060: CRPS_score_val did not improve from 0.01241
Epoch 61/100

Epoch 00061: CRPS_score_val improved from 0.01241 to 0.01239, saving model to best_model.h5
Epoch 62/100

Epoch 00062: CRPS_score_val did not improve from 0.01239
Epoch 63/100

Epoch 00063: CRPS_score_val did not improve from 0.01239
Epoch 64/100

Epoch 00064: CRPS_score_val did not improve from 0.01239
Epoch 65/100

Epoch 00065: CRPS_score_val did not impro

Epoch 30/100

Epoch 00030: CRPS_score_val improved from 0.01259 to 0.01258, saving model to best_model.h5
Epoch 31/100

Epoch 00031: CRPS_score_val did not improve from 0.01258
Epoch 32/100

Epoch 00032: CRPS_score_val did not improve from 0.01258
Epoch 33/100

Epoch 00033: CRPS_score_val did not improve from 0.01258
Epoch 34/100

Epoch 00034: CRPS_score_val improved from 0.01258 to 0.01256, saving model to best_model.h5
Epoch 35/100

Epoch 00035: CRPS_score_val improved from 0.01256 to 0.01256, saving model to best_model.h5
Epoch 36/100

Epoch 00036: CRPS_score_val did not improve from 0.01256
Epoch 37/100

Epoch 00037: CRPS_score_val did not improve from 0.01256
Epoch 38/100

Epoch 00038: CRPS_score_val did not improve from 0.01256
Epoch 39/100

Epoch 00039: CRPS_score_val did not improve from 0.01256
Epoch 40/100

Epoch 00040: CRPS_score_val did not improve from 0.01256
Epoch 41/100

Epoch 00041: CRPS_score_val did not improve from 0.01256
Epoch 42/100

Epoch 00042: CRPS_score_val d

KeyboardInterrupt: 

In [None]:
print("mean crps is %f"%np.mean(crps_csv))

In [None]:
%%time
if  TRAIN_OFFLINE==False:
    from kaggle.competitions import nflrush
    env = nflrush.make_env()
    iter_test = env.iter_test()

    for (test_df, sample_prediction_df) in iter_test:
        basetable = create_features(test_df, deploy=True)
        basetable.drop(['GameId','PlayId'], axis=1, inplace=True)
        scaled_basetable = scaler.transform(basetable)

        y_pred = predict(scaled_basetable)
        y_pred = np.clip(np.cumsum(y_pred, axis=1), 0, 1).tolist()[0]

        preds_df = pd.DataFrame(data=[y_pred], columns=sample_prediction_df.columns)
        env.predict(preds_df)

    env.write_submission_file()