In [7]:
import numpy as np 
import pandas as pd 

import math

from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline

import sys
sys.path.append("..")
from source.df_pipeline import df_imputer, df_scaler, dummify

pd.set_option('max_columns', 200)
pd.set_option('max_rows', 80)

In [8]:
df_train = pd.read_csv('../data_processed/train_processed.csv', dtype={'WindSpeed': 'object'})

df_train.head()

Unnamed: 0,GameId,PlayId,Team,X,Y,S,A,Dis,Orientation,Dir,NflId,YardLine,Quarter,GameClock,PossessionTeam,Down,Distance,FieldPosition,HomeScoreBeforePlay,VisitorScoreBeforePlay,NflIdRusher,OffenseFormation,OffensePersonnel,DefendersInTheBox,DefensePersonnel,PlayDirection,Yards,PlayerHeight,PlayerWeight,Position,HomeTeamAbbr,VisitorTeamAbbr,Location,StadiumType,Turf,GameWeather,Temperature,Humidity,WindSpeed,WindDirection,to_left,has_ball,offense_team,from_yardline,X_speed,Y_speed,X_acceleration,Y_acceleration,age,distance_from_ball,closest_opponent,opponents_in_6,teammates_in_6
0,2017090700,20170907000118,away,46.09,18.493333,1.69,1.13,0.4,81.99,1.620015,496723,45,1,14:14:00,NE,3,2,NE,0,0,2543773,SHOTGUN,"1 RB, 1 TE, 3 WR",6.0,"2 DL, 3 LB, 6 DB",left,8,72,212,SS,NE,KC,"Foxborough, MA",Outdoor,0,Clear and warm,63.0,77.0,8,SW,True,False,home,1.09,1.687953,-0.083145,1.128632,-0.055594,10480,6.480872,4.59331,3.0,7.0
1,2017090700,20170907000118,away,45.33,20.693333,0.42,1.35,0.01,27.61,1.24442,2495116,45,1,14:14:00,NE,3,2,NE,0,0,2543773,SHOTGUN,"1 RB, 1 TE, 3 WR",6.0,"2 DL, 3 LB, 6 DB",left,8,75,288,DE,NE,KC,"Foxborough, MA",Outdoor,0,Clear and warm,63.0,77.0,8,SW,True,False,home,0.33,0.397828,0.134657,1.278734,0.432828,10394,4.59331,4.59331,3.0,7.0
2,2017090700,20170907000118,away,46.0,20.133333,1.22,0.59,0.31,3.01,1.174083,2495493,45,1,14:14:00,NE,3,2,NE,0,0,2543773,SHOTGUN,"1 RB, 1 TE, 3 WR",6.0,"2 DL, 3 LB, 6 DB",left,8,75,270,DE,NE,KC,"Foxborough, MA",Outdoor,0,Clear and warm,63.0,77.0,8,SW,True,False,home,1.0,1.12525,0.471395,0.544178,0.22797,10457,5.448982,4.59331,3.0,7.0
3,2017090700,20170907000118,away,48.54,25.633333,0.42,0.54,0.02,359.77,2.868623,2506353,45,1,14:14:00,NE,3,2,NE,0,0,2543773,SHOTGUN,"1 RB, 1 TE, 3 WR",6.0,"2 DL, 3 LB, 6 DB",left,8,75,245,ILB,NE,KC,"Foxborough, MA",Outdoor,0,Clear and warm,63.0,77.0,8,SW,True,False,home,3.54,0.113229,-0.404449,0.14558,-0.520006,12709,7.820038,4.59331,3.0,7.0
4,2017090700,20170907000118,away,50.68,17.913333,1.82,2.43,0.16,12.63,1.844638,2530794,45,1,14:14:00,NE,3,2,NE,0,0,2543773,SHOTGUN,"1 RB, 1 TE, 3 WR",6.0,"2 DL, 3 LB, 6 DB",left,8,72,206,FS,NE,KC,"Foxborough, MA",Outdoor,0,Clear and warm,63.0,77.0,8,SW,True,False,home,5.68,1.752185,-0.492187,2.339456,-0.657151,10980,10.622476,4.59331,3.0,7.0


In [9]:
class transformation(TransformerMixin, BaseEstimator):
    def __init__(self, mean_weight=10):
        self.columns = None
        self.mean_weight = mean_weight
        self.smooth_team = {}
        
    def fit(self, X, y=None):
        return self
    
    
    def stats_by_play(self, data):
        avg_by_play = data.groupby(['PlayId', 
                                    'Team', 
                                    'offense_team'], as_index=False)[['PlayerHeight', 
                                                                      'PlayerWeight',
                                                                      'age',
                                                                      'S', 'A']].mean()
        spread = data.groupby(['PlayId', 
                               'Team', 
                               'offense_team'])[['X', 'Y']].std().reset_index()
        tot_momentum = data.groupby(['PlayId', 
                                     'Team', 
                                     'offense_team'], as_index=False)[['X_speed', 'Y_speed',
                                                                       'PlayerWeight',
                                                                       'X_acceleration','Y_acceleration']].sum()
        
        tot_momentum['x_momentum'] = tot_momentum['X_speed'] * tot_momentum['PlayerWeight']
        tot_momentum['y_momentum'] = tot_momentum['Y_speed'] * tot_momentum['PlayerWeight']
        tot_momentum['x_force'] = tot_momentum['X_acceleration'] * tot_momentum['PlayerWeight']
        tot_momentum['y_force'] = tot_momentum['Y_acceleration'] * tot_momentum['PlayerWeight']
        tot_momentum.drop(['X_speed', 'Y_speed',
                           'PlayerWeight',  
                           'X_acceleration','Y_acceleration'], axis=1, inplace=True)

        avg_by_play = pd.merge(avg_by_play, tot_momentum, on=['PlayId', 'Team', 'offense_team'])
        avg_by_play = pd.merge(avg_by_play, spread, on=['PlayId', 'Team', 'offense_team'])

        poss_team = avg_by_play[avg_by_play.Team == avg_by_play.offense_team].copy()
        def_team = avg_by_play[avg_by_play.Team != avg_by_play.offense_team].copy()

        poss_team.rename(columns={'PlayerHeight': 'poss_avg_height', 
                                  'PlayerWeight': 'poss_avg_weight',
                                  'age': 'poss_avg_age',
                                  'X': 'poss_std_X',
                                  'Y': 'poss_std_Y',
                                  'S': 'poss_avg_S', 
                                  'A': 'poss_avg_A', 
                                  'x_momentum': 'poss_x_momentum', 
                                  'y_momentum': 'poss_y_momentum', 
                                  'x_force': 'poss_x_force', 
                                  'y_force': 'poss_y_force'}, inplace=True)
        def_team.rename(columns={'PlayerHeight': 'def_avg_height', 
                                  'PlayerWeight': 'def_avg_weight', 
                                  'age': 'def_avg_age',
                                  'X': 'def_std_X',
                                  'Y': 'def_std_Y',
                                  'S': 'def_avg_S', 
                                  'A': 'def_avg_A',
                                  'x_momentum': 'def_x_momentum', 
                                  'y_momentum': 'def_y_momentum', 
                                  'x_force': 'def_x_force', 
                                  'y_force': 'def_y_force'}, inplace=True)

        avg_by_play = pd.merge(poss_team.drop('Team', axis=1), 
                               def_team.drop('Team', axis=1), on=['PlayId', 'offense_team'])
        
        avg_by_play['tot_x_momenumt'] = avg_by_play['poss_x_momentum'] - avg_by_play['def_x_momentum']
        avg_by_play['tot_x_force'] = avg_by_play['poss_x_force'] - avg_by_play['def_x_force']
        avg_by_play['height_diff'] = avg_by_play['poss_avg_height'] - avg_by_play['def_avg_height']
        avg_by_play['weight_diff'] = avg_by_play['poss_avg_weight'] - avg_by_play['def_avg_weight']
        avg_by_play['age_diff'] = avg_by_play['poss_avg_age'] - avg_by_play['def_avg_age']
        avg_by_play['X_diff'] = avg_by_play['poss_std_X'] - avg_by_play['def_std_X']
        avg_by_play['Y_diff'] = avg_by_play['poss_std_Y'] - avg_by_play['def_std_Y']

        return avg_by_play
    
    
    def process_play(self, X):
        cols_by_play = ['GameId', 'PlayId', 'YardLine', 
                'Quarter', 'GameClock', 'Down', 'Distance',
                'OffenseFormation', 'DefendersInTheBox',  
                'Location', 'StadiumType', 'Turf', 
                'GameWeather','Temperature', 'Humidity', 'WindSpeed', 'WindDirection', 
                'PlayDirection', 'HomeScoreBeforePlay', 'VisitorScoreBeforePlay']
        train_play = X[cols_by_play].drop_duplicates()
        avg_by_play = self.stats_by_play(X)
        train_play = pd.merge(train_play, avg_by_play.drop('offense_team', axis=1), on=['PlayId'])

        return train_play
    
    
    def transform(self, X, y=None):
        train_play = self.process_play(X)
        carriers = X[X.has_ball].copy()

        to_drop = ['GameId', 'NflId', 'Team', 'Orientation','YardLine', 'Quarter', 'GameClock', 'PossessionTeam',
           'Down', 'FieldPosition', 'HomeScoreBeforePlay',
           'VisitorScoreBeforePlay', 'NflIdRusher', 'OffensePersonnel','DefensePersonnel',
               'PlayDirection', 'Position', 'HomeTeamAbbr',
           'VisitorTeamAbbr', 'Location', 'StadiumType', 'GameWeather',
           'Temperature', 'Humidity', 'WindSpeed', 'WindDirection', 'to_left',
           'has_ball', 'offense_team', 'Distance',
           'OffenseFormation', 'DefendersInTheBox', 'Turf']

        carriers.drop(to_drop, axis=1, inplace=True)

        full_train = pd.merge(carriers, train_play, on='PlayId')

        full_train.drop(['GameId', 'WindDirection', 'WindSpeed', 'GameWeather', 
                         'PlayDirection', 'StadiumType', 'Turf', 'Location', 'GameClock'], axis=1, inplace=True)
        
        self.columns = full_train.columns

        return full_train
    
    
    def get_features_name(self):
        return self.columns

In [10]:
tmp = transformation().transform(df_train)

tmp.head()

Unnamed: 0,PlayId,X,Y,S,A,Dis,Dir,Yards,PlayerHeight,PlayerWeight,from_yardline,X_speed,Y_speed,X_acceleration,Y_acceleration,age,distance_from_ball,closest_opponent,opponents_in_6,teammates_in_6,YardLine,Quarter,Down,Distance,OffenseFormation,DefendersInTheBox,Temperature,Humidity,HomeScoreBeforePlay,VisitorScoreBeforePlay,poss_avg_height,poss_avg_weight,poss_avg_age,poss_avg_S,poss_avg_A,poss_x_momentum,poss_y_momentum,poss_x_force,poss_y_force,poss_std_X,poss_std_Y,def_avg_height,def_avg_weight,def_avg_age,def_avg_S,def_avg_A,def_x_momentum,def_y_momentum,def_x_force,def_y_force,def_std_X,def_std_Y,tot_x_momenumt,tot_x_force,height_diff,weight_diff,age_diff,X_diff,Y_diff
0,20170907000118,41.25,22.803333,3.63,3.35,0.38,0.423417,8,70,205,3.75,1.491487,3.309436,1.376441,3.054163,9349,0.0,4.59331,3.0,7.0,45,1,3,2,SHOTGUN,6.0,63.0,77.0,0,0,74.727273,259.181818,10374.454545,2.106364,1.358182,30394.065885,42824.323203,14808.310917,29092.727988,1.772665,5.855606,73.636364,233.545455,10101.272727,1.314545,1.025455,21967.405111,4867.00364,11406.041625,-783.13754,5.294079,7.014714,8426.660774,3402.269293,1.090909,25.636364,273.181818,-3.521414,-1.159107
1,20170907000139,48.93,26.173333,3.06,2.41,0.34,5.546656,3,70,205,4.07,-2.055465,2.266862,-1.618847,1.785339,9349,0.0,4.287773,3.0,7.0,53,1,1,10,SHOTGUN,6.0,63.0,77.0,0,0,74.727273,259.181818,10374.454545,2.094545,1.541818,5361.669172,36553.979512,1503.78838,27039.089576,2.00138,6.932502,73.636364,233.545455,10101.272727,1.639091,1.592727,-3627.89183,6256.141431,-2706.292685,4533.856018,5.406292,7.190716,8989.561002,4210.081065,1.090909,25.636364,273.181818,-3.404912,-0.258215
2,20170907000189,71.34,34.223333,5.77,2.42,0.6,0.838456,5,70,205,3.66,4.29064,3.857889,1.799541,1.61804,9349,0.0,4.22167,2.0,6.0,75,1,1,10,SINGLEBACK,7.0,63.0,77.0,0,0,74.727273,259.181818,10374.454545,3.682727,1.419091,86277.195695,48107.583525,33664.932926,23232.912519,2.115286,6.044208,73.636364,233.545455,10101.272727,3.244545,2.092727,82977.152392,12860.033647,52837.531067,8496.045083,4.720893,6.57762,3300.043303,-19172.598141,1.090909,25.636364,273.181818,-2.605606,-0.533412
3,20170907000345,104.47,27.973333,4.45,3.2,0.46,6.188239,2,71,210,3.53,-0.421875,4.429957,-0.303371,3.185587,9808,0.0,4.528002,7.0,9.0,108,1,2,2,JUMBO,9.0,63.0,77.0,0,0,76.181818,282.545455,10320.636364,2.141818,0.880909,-39812.029305,39225.794933,-11636.814745,16186.365888,1.898686,3.154869,73.727273,257.454545,10390.181818,1.555455,1.293636,-37637.593645,-4639.830146,-26960.759277,-9328.539063,0.962418,4.505029,-2174.43566,15323.944532,2.454545,25.090909,-69.545455,0.936268,-1.35016
4,20170907000395,29.99,27.12,3.9,2.53,0.44,5.097758,7,71,216,5.01,-3.613974,1.466013,-2.34445,0.951029,8069,0.0,4.288088,3.0,6.0,35,1,1,10,SHOTGUN,7.0,63.0,77.0,7,0,76.909091,268.454545,9732.818182,2.644545,1.62,-31427.161922,20004.068984,-13921.201825,13577.076191,2.056883,7.423977,73.181818,242.454545,9967.181818,2.322727,2.121818,-54049.085285,-11151.98538,-50459.233839,-5737.78304,5.391251,7.864325,22621.923362,36538.032014,3.727273,26.0,-234.363636,-3.334368,-0.440348


In [11]:
df_train.head()

Unnamed: 0,GameId,PlayId,Team,X,Y,S,A,Dis,Orientation,Dir,NflId,YardLine,Quarter,GameClock,PossessionTeam,Down,Distance,FieldPosition,HomeScoreBeforePlay,VisitorScoreBeforePlay,NflIdRusher,OffenseFormation,OffensePersonnel,DefendersInTheBox,DefensePersonnel,PlayDirection,Yards,PlayerHeight,PlayerWeight,Position,HomeTeamAbbr,VisitorTeamAbbr,Location,StadiumType,Turf,GameWeather,Temperature,Humidity,WindSpeed,WindDirection,to_left,has_ball,offense_team,from_yardline,X_speed,Y_speed,X_acceleration,Y_acceleration,age,distance_from_ball,closest_opponent,opponents_in_6,teammates_in_6
0,2017090700,20170907000118,away,46.09,18.493333,1.69,1.13,0.4,81.99,1.620015,496723,45,1,14:14:00,NE,3,2,NE,0,0,2543773,SHOTGUN,"1 RB, 1 TE, 3 WR",6.0,"2 DL, 3 LB, 6 DB",left,8,72,212,SS,NE,KC,"Foxborough, MA",Outdoor,0,Clear and warm,63.0,77.0,8,SW,True,False,home,1.09,1.687953,-0.083145,1.128632,-0.055594,10480,6.480872,4.59331,3.0,7.0
1,2017090700,20170907000118,away,45.33,20.693333,0.42,1.35,0.01,27.61,1.24442,2495116,45,1,14:14:00,NE,3,2,NE,0,0,2543773,SHOTGUN,"1 RB, 1 TE, 3 WR",6.0,"2 DL, 3 LB, 6 DB",left,8,75,288,DE,NE,KC,"Foxborough, MA",Outdoor,0,Clear and warm,63.0,77.0,8,SW,True,False,home,0.33,0.397828,0.134657,1.278734,0.432828,10394,4.59331,4.59331,3.0,7.0
2,2017090700,20170907000118,away,46.0,20.133333,1.22,0.59,0.31,3.01,1.174083,2495493,45,1,14:14:00,NE,3,2,NE,0,0,2543773,SHOTGUN,"1 RB, 1 TE, 3 WR",6.0,"2 DL, 3 LB, 6 DB",left,8,75,270,DE,NE,KC,"Foxborough, MA",Outdoor,0,Clear and warm,63.0,77.0,8,SW,True,False,home,1.0,1.12525,0.471395,0.544178,0.22797,10457,5.448982,4.59331,3.0,7.0
3,2017090700,20170907000118,away,48.54,25.633333,0.42,0.54,0.02,359.77,2.868623,2506353,45,1,14:14:00,NE,3,2,NE,0,0,2543773,SHOTGUN,"1 RB, 1 TE, 3 WR",6.0,"2 DL, 3 LB, 6 DB",left,8,75,245,ILB,NE,KC,"Foxborough, MA",Outdoor,0,Clear and warm,63.0,77.0,8,SW,True,False,home,3.54,0.113229,-0.404449,0.14558,-0.520006,12709,7.820038,4.59331,3.0,7.0
4,2017090700,20170907000118,away,50.68,17.913333,1.82,2.43,0.16,12.63,1.844638,2530794,45,1,14:14:00,NE,3,2,NE,0,0,2543773,SHOTGUN,"1 RB, 1 TE, 3 WR",6.0,"2 DL, 3 LB, 6 DB",left,8,72,206,FS,NE,KC,"Foxborough, MA",Outdoor,0,Clear and warm,63.0,77.0,8,SW,True,False,home,5.68,1.752185,-0.492187,2.339456,-0.657151,10980,10.622476,4.59331,3.0,7.0


In [12]:
transf_pipe = Pipeline([('trsf', transformation()), 
                        ('dummifier', dummify(drop_first=True)),
                        ('Imputer', df_imputer()),
                        ('scl', df_scaler(method='standard'))])

tmp = transf_pipe.fit_transform(df_train)

tmp.head()

Unnamed: 0,PlayId,X,Y,S,A,Dis,Dir,Yards,PlayerHeight,PlayerWeight,from_yardline,X_speed,Y_speed,X_acceleration,Y_acceleration,age,distance_from_ball,closest_opponent,opponents_in_6,teammates_in_6,YardLine,Quarter,Down,Distance,DefendersInTheBox,Temperature,Humidity,HomeScoreBeforePlay,VisitorScoreBeforePlay,poss_avg_height,poss_avg_weight,poss_avg_age,poss_avg_S,poss_avg_A,poss_x_momentum,poss_y_momentum,poss_x_force,poss_y_force,poss_std_X,poss_std_Y,def_avg_height,def_avg_weight,def_avg_age,def_avg_S,def_avg_A,def_x_momentum,def_y_momentum,def_x_force,def_y_force,def_std_X,def_std_Y,tot_x_momenumt,tot_x_force,height_diff,weight_diff,age_diff,X_diff,Y_diff,OffenseFormation_EMPTY,OffenseFormation_I_FORM,OffenseFormation_JUMBO,OffenseFormation_PISTOL,OffenseFormation_SHOTGUN,OffenseFormation_SINGLEBACK,OffenseFormation_WILDCAT
0,-1.008048,-0.466483,-1.131131,-0.535149,0.7803,-0.631894,-1.117867,0.588498,-0.605427,-0.901023,-1.253744,0.453306,0.393116,0.713108,1.238745,-0.038367,0.0,0.04393,0.046728,-0.058689,-0.51517,-1.315124,2.191672,-1.667696,-0.951527,0.153387,0.844803,-1.08475,-1.014376,-0.920559,-0.970356,1.214068,-1.048066,-0.774981,0.529482,0.822946,0.52684,0.913043,-1.061916,-0.365969,0.184076,-1.305392,0.963263,-1.404836,-1.689791,0.448414,1.293395,0.353257,0.622035,0.465031,-0.194409,0.624157,0.269215,-0.85848,0.383551,0.13801,-0.874806,-0.615776,-0.031522,-0.512567,-0.151665,-0.164432,1.541659,-0.89043,-0.059228
1,-1.008048,-0.162534,-0.16688,-1.032635,-0.213714,-1.002146,0.959133,-0.188363,-0.605427,-0.901023,-0.912343,-0.681514,-0.380761,-0.892431,0.000925,-0.038367,0.0,-0.268025,0.046728,-0.058689,-0.197209,-1.315124,-0.77135,0.440227,-0.951527,0.153387,0.844803,-1.08475,-1.014376,-0.920559,-0.970356,1.214068,-1.064274,-0.290668,0.049887,0.361537,0.00551,0.717687,-0.50696,0.254722,0.184076,-1.305392,0.963263,-0.948316,-0.298269,-0.111876,1.404586,-0.123796,1.132223,0.557264,-0.07442,0.673283,0.341828,-0.85848,0.383551,0.13801,-0.773238,0.970161,-0.031522,-0.512567,-0.151665,-0.164432,1.541659,-0.89043,-0.059228
2,-1.008048,0.724382,2.136448,1.332603,-0.203139,1.404492,-0.949607,0.122381,-0.605427,-0.901023,-1.349764,1.348874,0.800219,0.939898,-0.162286,-0.038367,0.0,-0.335517,-0.620799,-0.690894,0.677184,-1.315124,-0.77135,0.440227,0.057489,0.153387,0.844803,-1.08475,-1.014376,-0.920559,-0.970356,1.214068,1.113848,-0.614343,1.600146,1.21172,1.265726,0.355617,-0.230577,-0.257264,0.184076,-1.305392,0.963263,1.309985,0.928233,1.783938,1.933179,1.753806,1.512413,-0.00609,-0.492398,0.176735,-1.760018,-0.85848,0.383551,0.13801,-0.076388,0.485702,-0.031522,-0.512567,-0.151665,-0.164432,-0.648652,1.123053,-0.059228
3,-1.008048,2.035562,0.34815,0.180531,0.621681,0.10861,1.219236,-0.343735,-0.067613,-0.530013,-1.488458,-0.158859,1.224851,-0.187307,1.366958,0.367661,0.0,-0.02275,2.716836,1.205722,1.988773,-1.315124,0.710161,-1.667696,2.07552,0.153387,0.844803,-1.08475,-1.014376,1.653346,2.847927,1.06395,-0.999442,-2.033715,-0.815595,0.558145,-0.509398,-0.3147,-0.756137,-1.922593,0.354311,2.390808,1.723931,-1.065962,-1.03194,-0.85636,0.532443,-0.943693,-0.197934,-3.09531,-1.905388,-0.301047,1.340842,1.034191,0.300084,-0.543631,3.011487,-0.952106,-0.031522,-0.512567,6.593488,-0.164432,-0.648652,-0.89043,-0.059228
4,-1.008048,-0.912118,0.103987,-0.299498,-0.086819,-0.076516,0.777146,0.433126,-0.067613,-0.084802,0.090525,-1.180147,-0.975211,-1.28137,-0.812999,-1.170645,0.0,-0.267704,0.046728,-0.690894,-0.912622,-1.315124,-0.77135,0.440227,0.057489,0.153387,0.844803,-0.403623,-1.014376,2.940298,0.545072,-0.575693,-0.309973,-0.084476,-0.65495,-0.856303,-0.59891,-0.562914,-0.372287,0.537993,-0.667097,0.071899,0.610216,0.013316,0.999593,-1.215613,0.011192,-1.738035,0.146615,0.5449,0.384813,1.863039,3.247756,2.800684,0.439195,-0.871433,-0.711736,0.649533,-0.031522,-0.512567,-0.151665,-0.164432,1.541659,-0.89043,-0.059228


# Targets preparation

These are targets that ultimatively were not used but might have led to richer models.

In [31]:
def create_targets(data):
    unique_plays = data[['PlayId', 'Yards']].drop_duplicates()
    simple = unique_plays['Yards'].reset_index(drop=True)
    # As total distance of the rusher
    rushers = data[data.has_ball].copy().reset_index(drop=True)
    tot_dist = rushers['Yards'] + rushers['from_yardline']
    # As percentage of Yards remaining to be gained
    unique_plays = data[['PlayId', 'YardLine', 'Yards']].drop_duplicates().reset_index(drop=True)
    perc_gained = unique_plays['Yards'] / (110 - unique_plays['YardLine'])
    # As both
    perc_dist = tot_dist / (110 - rushers['X'])
    
    return simple, tot_dist, perc_gained, perc_dist

In [32]:
y1, y2, y3, y4 = create_targets(df_train)

In [33]:
targets = pd.DataFrame({'simple': y1, 'total_distance': y2, 'perc_gained': y3, 'perc_dist': y4})

targets.head()

Unnamed: 0,simple,total_distance,perc_gained,perc_dist
0,8,11.75,0.123077,0.170909
1,3,7.07,0.052632,0.115769
2,5,8.66,0.142857,0.224004
3,2,5.53,1.0,1.0
4,7,12.01,0.093333,0.150106


In [35]:
targets.describe()

Unnamed: 0,simple,total_distance,perc_gained,perc_dist
count,23171.0,23171.0,23171.0,23171.0
mean,4.212334,9.137484,0.117923,0.224099
std,6.4363,6.504946,0.246709,0.217772
min,-14.0,-8.4,-5.0,-0.775148
25%,1.0,5.71,0.016393,0.094567
50%,3.0,7.81,0.059701,0.144673
75%,6.0,10.635,0.139535,0.257518
max,99.0,104.0,1.0,1.0
