In [1]:
import numpy as np 
import pandas as pd 

from scipy import stats
import math

import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

from sklearn.model_selection import KFold
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error, mean_absolute_error

from sklearn.linear_model import Lasso, Ridge, SGDRegressor
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor
import xgboost as xgb
import lightgbm as lgb

import warnings

import sys
sys.path.append("..")
import source.df_pipeline as dfp
from source.nfl_transf import transformation

pd.set_option('max_columns', 200)
pd.set_option('max_rows', 80)

In [2]:
def cv_score(df_train, y_train, kfolds, pipeline, imp_coef=False):
    oof = np.zeros(len(df_train))
    train = df_train.copy()
    
    feat_df = pd.DataFrame()
    
    for n_fold, (train_index, test_index) in enumerate(kfolds.split(train.values)):
            
        trn_data = train.iloc[train_index][:]
        val_data = train.iloc[test_index][:]
        
        trn_target = y_train.iloc[train_index].values.ravel()
        val_target = y_train.iloc[test_index].values.ravel()
        
        pipeline.fit(trn_data, trn_target)

        oof[test_index] = pipeline.predict(val_data).ravel()

        if imp_coef:
            try:
                fold_df = get_coef(pipeline)
            except AttributeError:
                fold_df = get_feature_importance(pipeline)
                
            fold_df['fold'] = n_fold + 1
            feat_df = pd.concat([feat_df, fold_df], axis=0)
       
    if imp_coef:
        feat_df = feat_df.groupby('feat')['score'].agg(['mean', 'std'])
        feat_df['abs_sco'] = (abs(feat_df['mean']))
        feat_df = feat_df.sort_values(by=['abs_sco'],ascending=False)
        del feat_df['abs_sco']
        return oof, feat_df
    else:    
        return oof
    

def get_coef(pipe):
    imp = pipe.steps[-1][1].coef_.tolist()
    feats = pipe.steps[-2][1].get_feature_names()
    result = pd.DataFrame({'feat':feats,'score':imp})
    result['abs_res'] = abs(result['score'])
    result = result.sort_values(by=['abs_res'],ascending=False)
    del result['abs_res']
    return result


def get_feature_importance(pipe):
    imp = pipe.steps[-1][1].feature_importances_.tolist() #it's a pipeline
    feats = pipe.steps[-2][1].get_feature_names()
    result = pd.DataFrame({'feat':feats,'score':imp})
    result = result.sort_values(by=['score'],ascending=False)
    return result

In [3]:
def create_targets(data):
    unique_plays = data[['PlayId', 'Yards']].drop_duplicates()
    simple = unique_plays['Yards'].reset_index(drop=True)
    # As total distance of the rusher
    rushers = data[data.has_ball].copy().reset_index(drop=True)
    tot_dist = rushers['Yards'] + rushers['from_yardline']
    # As percentage of Yards remaining to be gained
    unique_plays = data[['PlayId', 'YardLine', 'Yards']].drop_duplicates().reset_index(drop=True)
    perc_gained = unique_plays['Yards'] / (110 - unique_plays['YardLine'])
    # As both
    perc_dist = tot_dist / (110 - rushers['X'])
    
    return simple, tot_dist, perc_gained, perc_dist

In [4]:
df_train = pd.read_csv('../data_processed/train_processed.csv', dtype={'WindSpeed': 'object'})

df_train.head()

Unnamed: 0,GameId,PlayId,Team,X,Y,S,A,Dis,Orientation,Dir,NflId,YardLine,Quarter,GameClock,PossessionTeam,Down,Distance,FieldPosition,HomeScoreBeforePlay,VisitorScoreBeforePlay,NflIdRusher,OffenseFormation,OffensePersonnel,DefendersInTheBox,DefensePersonnel,PlayDirection,Yards,PlayerHeight,PlayerWeight,Position,HomeTeamAbbr,VisitorTeamAbbr,Location,StadiumType,Turf,GameWeather,Temperature,Humidity,WindSpeed,WindDirection,to_left,has_ball,offense_team,from_yardline,X_speed,Y_speed,X_acceleration,Y_acceleration,age,distance_from_ball,closest_opponent,opponents_in_6,teammates_in_6
0,2017090700,20170907000118,away,46.09,18.493333,1.69,1.13,0.4,81.99,1.620015,496723,45,1,14:14:00,NE,3,2,NE,0,0,2543773,SHOTGUN,"1 RB, 1 TE, 3 WR",6.0,"2 DL, 3 LB, 6 DB",left,8,72,212,SS,NE,KC,"Foxborough, MA",Outdoor,0,Clear and warm,63.0,77.0,8,SW,True,False,home,1.09,1.687953,-0.083145,1.128632,-0.055594,10480,6.480872,4.59331,3.0,7.0
1,2017090700,20170907000118,away,45.33,20.693333,0.42,1.35,0.01,27.61,1.24442,2495116,45,1,14:14:00,NE,3,2,NE,0,0,2543773,SHOTGUN,"1 RB, 1 TE, 3 WR",6.0,"2 DL, 3 LB, 6 DB",left,8,75,288,DE,NE,KC,"Foxborough, MA",Outdoor,0,Clear and warm,63.0,77.0,8,SW,True,False,home,0.33,0.397828,0.134657,1.278734,0.432828,10394,4.59331,4.59331,3.0,7.0
2,2017090700,20170907000118,away,46.0,20.133333,1.22,0.59,0.31,3.01,1.174083,2495493,45,1,14:14:00,NE,3,2,NE,0,0,2543773,SHOTGUN,"1 RB, 1 TE, 3 WR",6.0,"2 DL, 3 LB, 6 DB",left,8,75,270,DE,NE,KC,"Foxborough, MA",Outdoor,0,Clear and warm,63.0,77.0,8,SW,True,False,home,1.0,1.12525,0.471395,0.544178,0.22797,10457,5.448982,4.59331,3.0,7.0
3,2017090700,20170907000118,away,48.54,25.633333,0.42,0.54,0.02,359.77,2.868623,2506353,45,1,14:14:00,NE,3,2,NE,0,0,2543773,SHOTGUN,"1 RB, 1 TE, 3 WR",6.0,"2 DL, 3 LB, 6 DB",left,8,75,245,ILB,NE,KC,"Foxborough, MA",Outdoor,0,Clear and warm,63.0,77.0,8,SW,True,False,home,3.54,0.113229,-0.404449,0.14558,-0.520006,12709,7.820038,4.59331,3.0,7.0
4,2017090700,20170907000118,away,50.68,17.913333,1.82,2.43,0.16,12.63,1.844638,2530794,45,1,14:14:00,NE,3,2,NE,0,0,2543773,SHOTGUN,"1 RB, 1 TE, 3 WR",6.0,"2 DL, 3 LB, 6 DB",left,8,72,206,FS,NE,KC,"Foxborough, MA",Outdoor,0,Clear and warm,63.0,77.0,8,SW,True,False,home,5.68,1.752185,-0.492187,2.339456,-0.657151,10980,10.622476,4.59331,3.0,7.0


In [5]:
folds = KFold(5, shuffle=True, random_state=541)

plays = df_train[df_train.has_ball][['PlayId', 
                                     'YardLine', 
                                     'from_yardline', 
                                     'X']].drop_duplicates().reset_index(drop=True)


y1, y2, y3, y4 = create_targets(df_train)

In [6]:
transf_pipe = Pipeline([('trsf', transformation())])

full_train = transf_pipe.fit_transform(df_train)

In [7]:
full_train.head()

Unnamed: 0,S,A,PlayerHeight,PlayerWeight,from_yardline,X_speed,Y_speed,X_acceleration,Y_acceleration,age,closest_opponent,opponents_in_6,teammates_in_6,YardLine,DefendersInTheBox,Turf,poss_avg_height,poss_avg_weight,poss_avg_age,poss_avg_S,poss_avg_A,poss_x_momentum,poss_y_momentum,poss_x_force,poss_y_force,poss_std_X,poss_std_Y,def_avg_height,def_avg_weight,def_avg_age,def_avg_S,def_avg_A,def_x_momentum,def_y_momentum,def_x_force,def_y_force,def_std_X,def_std_Y,tot_x_momenumt,tot_x_force,height_diff,weight_diff,age_diff,X_diff,Y_diff,Team_offense,Team_defense,is_home
0,3.63,3.35,70,205,3.75,1.491487,3.309436,1.376441,3.054163,9349,4.59331,3.0,7.0,45,6.0,0,74.727273,259.181818,10374.454545,2.106364,1.358182,30394.065885,42824.323203,14808.310917,29092.727988,1.772665,5.855606,73.636364,233.545455,10101.272727,1.314545,1.025455,21967.405111,4867.00364,11406.041625,-783.13754,5.294079,7.014714,8426.660774,3402.269293,1.090909,25.636364,273.181818,-3.521414,-1.159107,NE,KC,1.0
1,3.06,2.41,70,205,4.07,-2.055465,2.266862,-1.618847,1.785339,9349,4.287773,3.0,7.0,53,6.0,0,74.727273,259.181818,10374.454545,2.094545,1.541818,5361.669172,36553.979512,1503.78838,27039.089576,2.00138,6.932502,73.636364,233.545455,10101.272727,1.639091,1.592727,-3627.89183,6256.141431,-2706.292685,4533.856018,5.406292,7.190716,8989.561002,4210.081065,1.090909,25.636364,273.181818,-3.404912,-0.258215,NE,KC,1.0
2,5.77,2.42,70,205,3.66,4.29064,3.857889,1.799541,1.61804,9349,4.22167,2.0,6.0,75,7.0,0,74.727273,259.181818,10374.454545,3.682727,1.419091,86277.195695,48107.583525,33664.932926,23232.912519,2.115286,6.044208,73.636364,233.545455,10101.272727,3.244545,2.092727,82977.152392,12860.033647,52837.531067,8496.045083,4.720893,6.57762,3300.043303,-19172.598141,1.090909,25.636364,273.181818,-2.605606,-0.533412,NE,KC,1.0
3,4.45,3.2,71,210,3.53,-0.421875,4.429957,-0.303371,3.185587,9808,4.528002,7.0,9.0,108,9.0,0,76.181818,282.545455,10320.636364,2.141818,0.880909,-39812.029305,39225.794933,-11636.814745,16186.365888,1.898686,3.154869,73.727273,257.454545,10390.181818,1.555455,1.293636,-37637.593645,-4639.830146,-26960.759277,-9328.539063,0.962418,4.505029,-2174.43566,15323.944532,2.454545,25.090909,-69.545455,0.936268,-1.35016,NE,KC,1.0
4,3.9,2.53,71,216,5.01,-3.613974,1.466013,-2.34445,0.951029,8069,4.288088,3.0,6.0,35,7.0,0,76.909091,268.454545,9732.818182,2.644545,1.62,-31427.161922,20004.068984,-13921.201825,13577.076191,2.056883,7.423977,73.181818,242.454545,9967.181818,2.322727,2.121818,-54049.085285,-11151.98538,-50459.233839,-5737.78304,5.391251,7.864325,22621.923362,36538.032014,3.727273,26.0,-234.363636,-3.334368,-0.440348,KC,NE,0.0


In [8]:
def get_crps(train, inf_pred, y):
    y_pred = np.zeros((len(train),199))
    y_true = np.zeros((len(train),199))

    for i,p in enumerate(inf_preds):
        p += 99
        for j in range(199):
            if j >= p + 10:
                y_pred[i][j] = 1.0
            elif j >= p - 10:
                y_pred[i][j] = (j + 10 - p) * 0.05

    for i,p in enumerate(y):
        p += 99
        for j in range(199):
            if j >= p:
                y_true[i][j]=1.0
                
    return np.sum(np.power(y_pred - y_true, 2)) / (199 * (len(train)))

In [9]:
warnings.filterwarnings("ignore", 
                        message="The dummies in this set do not match the ones in the train set, we corrected the issue.")

In [11]:
models = [('lasso', Lasso(alpha=0.01)), ('ridge', Ridge()), ('sgd', SGDRegressor()), 
          ('forest', RandomForestRegressor(n_estimators=100, n_jobs=-1, max_depth=10, 
                                           criterion='mae', max_features='sqrt')), 
          ('xtree', ExtraTreesRegressor(n_estimators=200, n_jobs=-1, max_depth=10,
                                        criterion='mae', max_features='sqrt')), 
          ('svr', SVR(gamma='auto')),
          ('xgb', xgb.XGBRegressor(n_estimators=600, objective='reg:squarederror', n_jobs=-1)), 
          ('lgb', lgb.LGBMRegressor(n_estimators=600, n_jobs=-1))]

mod_name = []
rmse_train_simple = []
mae_train_simple = []
crps_train_simple = []

rmse_train_tot = []
mae_train_tot = []
crps_train_tot = []

mae_train_prcgained = []
rmse_train_prcgained = []
crps_train_prcgained = []

rmse_train_prctot = []
mae_train_prctot = []
crps_train_prctot = []

for model in models:
    
    train = full_train.copy()
    mod_play = plays.copy()
    print(model[0])
    mod_name.append(model[0])
    
    model_pipe = Pipeline([('dummifier', dfp.dummify(drop_first=True)),
                           ('Imputer', dfp.df_imputer()),
                           ('scl', dfp.df_scaler(method='standard'))] + [model])
            
    %time inf_preds = cv_score(train, y1, folds, model_pipe)
    
    mod_play['simple'] = inf_preds
    mod_play.loc[mod_play.simple > (110 - mod_play.YardLine), 'simple'] = (110 - mod_play.YardLine)
    inf_preds = mod_play['simple']
    
    avg_preds = inf_preds / 4
    
    crps = get_crps(train, inf_preds, y1)

    rmse_train_simple.append(np.sqrt(mean_squared_error(y1, inf_preds)))
    mae_train_simple.append(mean_absolute_error(y1, inf_preds))
    crps_train_simple.append(crps)
    
    print(f'\tTrain set RMSE: {round(np.sqrt(mean_squared_error(y1, inf_preds)), 4)}')
    print(f'\tTrain set MAE: {round(mean_absolute_error(y1, inf_preds), 4)}')
    print(f'\tTrain set CRPS: {round(crps, 4)}')
    
    print('- -'*20)
    
    %time inf_preds = cv_score(train, y2, folds, model_pipe)
    
    mod_play['total'] = inf_preds
    mod_play['total'] = mod_play['total'] - mod_play['from_yardline']
    inf_preds = mod_play['total']
    
    avg_preds += inf_preds / 4
    
    crps = get_crps(train, inf_preds, y1)
    
    rmse_train_tot.append(np.sqrt(mean_squared_error(y1, inf_preds)))
    mae_train_tot.append(mean_absolute_error(y1, inf_preds))
    crps_train_tot.append(crps)
    
    print(f'\tTrain set RMSE full distance: {round(np.sqrt(mean_squared_error(y1, inf_preds)), 4)}')
    print(f'\tTrain set MAE full distance: {round(mean_absolute_error(y1, inf_preds), 4)}')
    print(f'\tTrain set CRPS full distance: {round(crps, 4)}')
    
    print('- -'*20)
    
    %time inf_preds = cv_score(train, y3, folds, model_pipe)
    
    mod_play['per_gain'] = inf_preds
    mod_play['per_gain'] = mod_play['per_gain'] * (110 - mod_play['YardLine'])
    inf_preds = mod_play['per_gain']
    
    avg_preds += inf_preds / 4
    
    crps = get_crps(train, inf_preds, y1)
    
    rmse_train_prcgained.append(np.sqrt(mean_squared_error(y1, inf_preds)))
    mae_train_prcgained.append(mean_absolute_error(y1, inf_preds))
    crps_train_prcgained.append(crps)
    
    print(f'\tTrain set RMSE percentage gained: {round(np.sqrt(mean_squared_error(y1, inf_preds)), 4)}')
    print(f'\tTrain set MAE percentage gained: {round(mean_absolute_error(y1, inf_preds), 4)}')
    print(f'\tTrain set CRPS percentage gained: {round(crps, 4)}')
    
    print('- -'*20)
    
    %time inf_preds = cv_score(train, y4, folds, model_pipe)
    
    mod_play['per_tot'] = inf_preds
    mod_play['per_tot'] = mod_play['per_tot'] * (110 - mod_play['X']) - mod_play['from_yardline']
    inf_preds = mod_play['per_tot']
    
    avg_preds += inf_preds / 4
    
    crps = get_crps(train, inf_preds, y1)
    
    rmse_train_prctot.append(np.sqrt(mean_squared_error(y1, inf_preds)))
    mae_train_prctot.append(mean_absolute_error(y1, inf_preds))
    crps_train_prctot.append(crps)
    
    print(f'\tTrain set RMSE full distance percentage: {round(np.sqrt(mean_squared_error(y1, inf_preds)), 4)}')
    print(f'\tTrain set MAE full distance percentage: {round(mean_absolute_error(y1, inf_preds), 4)}')
    print(f'\tTrain set CRPS full distance percentage: {round(crps, 4)}')
    
    print('- -'*20)
    
    print(f'\tTrain set RMSE average: {round(np.sqrt(mean_squared_error(y1, avg_preds)), 4)}')
    print(f'\tTrain set MAE average: {round(mean_absolute_error(y1, avg_preds), 4)}')
    print(f'\tTrain set CRPS average: {round(crps, 4)}')

    print('_'*60)
    print('\n')
    
    
results = pd.DataFrame({'model_name': mod_name, 
                        'rmse_train': rmse_train_simple, 
                        'mae_train': mae_train_simple, 
                        'crps_train': crps_train_simple, 
                        'rmse_tot_dist': rmse_train_tot, 
                        'mae_tot_dist': mae_train_tot, 
                        'crps_tot_dist': crps_train_tot,
                        'rmse_perc_gained': rmse_train_prcgained, 
                        'mae_perc_gained': mae_train_prcgained, 
                        'crps_perc_gained': crps_train_prcgained,
                        'rmse_perc_tot': rmse_train_prctot, 
                        'mae_perc_tot': mae_train_prctot, 
                        'crps_perc_tot': crps_train_prctot,})

results

lasso
CPU times: user 2.79 s, sys: 1.62 s, total: 4.41 s
Wall time: 851 ms
	Train set RMSE: 6.1954
	Train set MAE: 3.6104
	Train set CRPS: 0.0151
- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -
CPU times: user 2.94 s, sys: 1.46 s, total: 4.4 s
Wall time: 827 ms
	Train set RMSE full distance: 6.2017
	Train set MAE full distance: 3.63
	Train set CRPS full distance: 0.0151
- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -
CPU times: user 1.87 s, sys: 1.28 s, total: 3.15 s
Wall time: 618 ms
	Train set RMSE percentage gained: 6.4243
	Train set MAE percentage gained: 4.0092
	Train set CRPS percentage gained: 0.0159
- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -
CPU times: user 2.27 s, sys: 1.2 s, total: 3.47 s
Wall time: 670 ms
	Train set RMSE full distance percentage: 7.1448
	Train set MAE full distance percentage: 4.8095
	Train set CRPS full distance percentage: 0.0181
- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -
	Train set RMSE average

CPU times: user 51.6 s, sys: 184 ms, total: 51.8 s
Wall time: 9.49 s
	Train set RMSE full distance: 6.3046
	Train set MAE full distance: 3.7594
	Train set CRPS full distance: 0.0155
- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -
CPU times: user 44.9 s, sys: 156 ms, total: 45 s
Wall time: 8.08 s
	Train set RMSE percentage gained: 6.3527
	Train set MAE percentage gained: 3.831
	Train set CRPS percentage gained: 0.0157
- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -
CPU times: user 47.4 s, sys: 164 ms, total: 47.6 s
Wall time: 8.57 s
	Train set RMSE full distance percentage: 6.3165
	Train set MAE full distance percentage: 3.7924
	Train set CRPS full distance percentage: 0.0156
- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -
	Train set RMSE average: 6.1986
	Train set MAE average: 3.6604
	Train set CRPS average: 0.0156
____________________________________________________________




Unnamed: 0,model_name,rmse_train,mae_train,crps_train,rmse_tot_dist,mae_tot_dist,crps_tot_dist,rmse_perc_gained,mae_perc_gained,crps_perc_gained,rmse_perc_tot,mae_perc_tot,crps_perc_tot
0,lasso,6.195401,3.610358,0.015083,6.201707,3.629998,0.015107,6.424297,4.009212,0.015904,7.144809,4.809547,0.018107
1,ridge,6.198742,3.617378,0.015097,6.205318,3.637318,0.015123,6.69084,4.263648,0.016627,7.362668,4.964857,0.0187
2,sgd,6.253977,3.683091,0.015259,6.25112,3.687956,0.015251,7.02717,4.606883,0.01772,7.664589,5.227347,0.01963
3,forest,6.345162,3.441203,0.014934,6.325255,3.435232,0.014903,6.353085,3.577263,0.015181,6.635452,3.98355,0.016177
4,xtree,6.461531,3.547512,0.015183,6.467255,3.550303,0.015177,6.42892,3.639561,0.015285,6.689683,4.141738,0.016323
5,svr,6.328843,3.393583,0.014828,6.335183,3.403891,0.014844,7.233881,4.814618,0.018605,7.297453,4.933223,0.018936
6,xgb,6.234974,3.64077,0.015219,6.258257,3.658956,0.015265,6.418516,3.756062,0.015531,6.246161,3.695466,0.015327
7,lgb,6.290481,3.740162,0.015473,6.30457,3.759431,0.015496,6.352664,3.830991,0.015686,6.316501,3.792406,0.015593


In [12]:
results.corr()

Unnamed: 0,rmse_train,mae_train,crps_train,rmse_tot_dist,mae_tot_dist,crps_tot_dist,rmse_perc_gained,mae_perc_gained,crps_perc_gained,rmse_perc_tot,mae_perc_tot,crps_perc_tot
rmse_train,1.0,-0.45835,-0.122581,0.990204,-0.491415,-0.18746,-0.072582,-0.302176,-0.190163,-0.284222,-0.339526,-0.330228
mae_train,-0.45835,1.0,0.928362,-0.401737,0.997834,0.941869,-0.312408,-0.139778,-0.234155,-0.141748,-0.112521,-0.14117
crps_train,-0.122581,0.928362,1.0,-0.058792,0.915099,0.993511,-0.408186,-0.305192,-0.356301,-0.367425,-0.353206,-0.372843
rmse_tot_dist,0.990204,-0.401737,-0.058792,1.0,-0.429761,-0.112567,-0.08517,-0.307668,-0.196765,-0.341351,-0.384376,-0.376975
mae_tot_dist,-0.491415,0.997834,0.915099,-0.429761,1.0,0.935542,-0.303259,-0.120539,-0.217901,-0.139758,-0.103121,-0.132748
crps_tot_dist,-0.18746,0.941869,0.993511,-0.112567,0.935542,1.0,-0.397313,-0.279522,-0.335626,-0.376931,-0.350657,-0.371346
rmse_perc_gained,-0.072582,-0.312408,-0.408186,-0.08517,-0.303259,-0.397313,1.0,0.958417,0.983311,0.760063,0.761249,0.81188
mae_perc_gained,-0.302176,-0.139778,-0.305192,-0.307668,-0.120539,-0.279522,0.958417,1.0,0.98879,0.808284,0.833609,0.872928
crps_perc_gained,-0.190163,-0.234155,-0.356301,-0.196765,-0.217901,-0.335626,0.983311,0.98879,1.0,0.755221,0.773309,0.8223
rmse_perc_tot,-0.284222,-0.141748,-0.367425,-0.341351,-0.139758,-0.376931,0.760063,0.808284,0.755221,1.0,0.991798,0.990829


In [13]:
results.sort_values(by='rmse_train')

Unnamed: 0,model_name,rmse_train,mae_train,crps_train,rmse_tot_dist,mae_tot_dist,crps_tot_dist,rmse_perc_gained,mae_perc_gained,crps_perc_gained,rmse_perc_tot,mae_perc_tot,crps_perc_tot
0,lasso,6.195401,3.610358,0.015083,6.201707,3.629998,0.015107,6.424297,4.009212,0.015904,7.144809,4.809547,0.018107
1,ridge,6.198742,3.617378,0.015097,6.205318,3.637318,0.015123,6.69084,4.263648,0.016627,7.362668,4.964857,0.0187
6,xgb,6.234974,3.64077,0.015219,6.258257,3.658956,0.015265,6.418516,3.756062,0.015531,6.246161,3.695466,0.015327
2,sgd,6.253977,3.683091,0.015259,6.25112,3.687956,0.015251,7.02717,4.606883,0.01772,7.664589,5.227347,0.01963
7,lgb,6.290481,3.740162,0.015473,6.30457,3.759431,0.015496,6.352664,3.830991,0.015686,6.316501,3.792406,0.015593
5,svr,6.328843,3.393583,0.014828,6.335183,3.403891,0.014844,7.233881,4.814618,0.018605,7.297453,4.933223,0.018936
3,forest,6.345162,3.441203,0.014934,6.325255,3.435232,0.014903,6.353085,3.577263,0.015181,6.635452,3.98355,0.016177
4,xtree,6.461531,3.547512,0.015183,6.467255,3.550303,0.015177,6.42892,3.639561,0.015285,6.689683,4.141738,0.016323


In [14]:
results.sort_values(by='mae_train')

Unnamed: 0,model_name,rmse_train,mae_train,crps_train,rmse_tot_dist,mae_tot_dist,crps_tot_dist,rmse_perc_gained,mae_perc_gained,crps_perc_gained,rmse_perc_tot,mae_perc_tot,crps_perc_tot
5,svr,6.328843,3.393583,0.014828,6.335183,3.403891,0.014844,7.233881,4.814618,0.018605,7.297453,4.933223,0.018936
3,forest,6.345162,3.441203,0.014934,6.325255,3.435232,0.014903,6.353085,3.577263,0.015181,6.635452,3.98355,0.016177
4,xtree,6.461531,3.547512,0.015183,6.467255,3.550303,0.015177,6.42892,3.639561,0.015285,6.689683,4.141738,0.016323
0,lasso,6.195401,3.610358,0.015083,6.201707,3.629998,0.015107,6.424297,4.009212,0.015904,7.144809,4.809547,0.018107
1,ridge,6.198742,3.617378,0.015097,6.205318,3.637318,0.015123,6.69084,4.263648,0.016627,7.362668,4.964857,0.0187
6,xgb,6.234974,3.64077,0.015219,6.258257,3.658956,0.015265,6.418516,3.756062,0.015531,6.246161,3.695466,0.015327
2,sgd,6.253977,3.683091,0.015259,6.25112,3.687956,0.015251,7.02717,4.606883,0.01772,7.664589,5.227347,0.01963
7,lgb,6.290481,3.740162,0.015473,6.30457,3.759431,0.015496,6.352664,3.830991,0.015686,6.316501,3.792406,0.015593


In [15]:
results.sort_values(by='crps_train')

Unnamed: 0,model_name,rmse_train,mae_train,crps_train,rmse_tot_dist,mae_tot_dist,crps_tot_dist,rmse_perc_gained,mae_perc_gained,crps_perc_gained,rmse_perc_tot,mae_perc_tot,crps_perc_tot
5,svr,6.328843,3.393583,0.014828,6.335183,3.403891,0.014844,7.233881,4.814618,0.018605,7.297453,4.933223,0.018936
3,forest,6.345162,3.441203,0.014934,6.325255,3.435232,0.014903,6.353085,3.577263,0.015181,6.635452,3.98355,0.016177
0,lasso,6.195401,3.610358,0.015083,6.201707,3.629998,0.015107,6.424297,4.009212,0.015904,7.144809,4.809547,0.018107
1,ridge,6.198742,3.617378,0.015097,6.205318,3.637318,0.015123,6.69084,4.263648,0.016627,7.362668,4.964857,0.0187
4,xtree,6.461531,3.547512,0.015183,6.467255,3.550303,0.015177,6.42892,3.639561,0.015285,6.689683,4.141738,0.016323
6,xgb,6.234974,3.64077,0.015219,6.258257,3.658956,0.015265,6.418516,3.756062,0.015531,6.246161,3.695466,0.015327
2,sgd,6.253977,3.683091,0.015259,6.25112,3.687956,0.015251,7.02717,4.606883,0.01772,7.664589,5.227347,0.01963
7,lgb,6.290481,3.740162,0.015473,6.30457,3.759431,0.015496,6.352664,3.830991,0.015686,6.316501,3.792406,0.015593


In [16]:
results.sort_values(by='mae_tot_dist')

Unnamed: 0,model_name,rmse_train,mae_train,crps_train,rmse_tot_dist,mae_tot_dist,crps_tot_dist,rmse_perc_gained,mae_perc_gained,crps_perc_gained,rmse_perc_tot,mae_perc_tot,crps_perc_tot
5,svr,6.328843,3.393583,0.014828,6.335183,3.403891,0.014844,7.233881,4.814618,0.018605,7.297453,4.933223,0.018936
3,forest,6.345162,3.441203,0.014934,6.325255,3.435232,0.014903,6.353085,3.577263,0.015181,6.635452,3.98355,0.016177
4,xtree,6.461531,3.547512,0.015183,6.467255,3.550303,0.015177,6.42892,3.639561,0.015285,6.689683,4.141738,0.016323
0,lasso,6.195401,3.610358,0.015083,6.201707,3.629998,0.015107,6.424297,4.009212,0.015904,7.144809,4.809547,0.018107
1,ridge,6.198742,3.617378,0.015097,6.205318,3.637318,0.015123,6.69084,4.263648,0.016627,7.362668,4.964857,0.0187
6,xgb,6.234974,3.64077,0.015219,6.258257,3.658956,0.015265,6.418516,3.756062,0.015531,6.246161,3.695466,0.015327
2,sgd,6.253977,3.683091,0.015259,6.25112,3.687956,0.015251,7.02717,4.606883,0.01772,7.664589,5.227347,0.01963
7,lgb,6.290481,3.740162,0.015473,6.30457,3.759431,0.015496,6.352664,3.830991,0.015686,6.316501,3.792406,0.015593


In [17]:
results.sort_values(by='mae_perc_gained')

Unnamed: 0,model_name,rmse_train,mae_train,crps_train,rmse_tot_dist,mae_tot_dist,crps_tot_dist,rmse_perc_gained,mae_perc_gained,crps_perc_gained,rmse_perc_tot,mae_perc_tot,crps_perc_tot
3,forest,6.345162,3.441203,0.014934,6.325255,3.435232,0.014903,6.353085,3.577263,0.015181,6.635452,3.98355,0.016177
4,xtree,6.461531,3.547512,0.015183,6.467255,3.550303,0.015177,6.42892,3.639561,0.015285,6.689683,4.141738,0.016323
6,xgb,6.234974,3.64077,0.015219,6.258257,3.658956,0.015265,6.418516,3.756062,0.015531,6.246161,3.695466,0.015327
7,lgb,6.290481,3.740162,0.015473,6.30457,3.759431,0.015496,6.352664,3.830991,0.015686,6.316501,3.792406,0.015593
0,lasso,6.195401,3.610358,0.015083,6.201707,3.629998,0.015107,6.424297,4.009212,0.015904,7.144809,4.809547,0.018107
1,ridge,6.198742,3.617378,0.015097,6.205318,3.637318,0.015123,6.69084,4.263648,0.016627,7.362668,4.964857,0.0187
2,sgd,6.253977,3.683091,0.015259,6.25112,3.687956,0.015251,7.02717,4.606883,0.01772,7.664589,5.227347,0.01963
5,svr,6.328843,3.393583,0.014828,6.335183,3.403891,0.014844,7.233881,4.814618,0.018605,7.297453,4.933223,0.018936


In [18]:
results.sort_values(by='mae_perc_tot')

Unnamed: 0,model_name,rmse_train,mae_train,crps_train,rmse_tot_dist,mae_tot_dist,crps_tot_dist,rmse_perc_gained,mae_perc_gained,crps_perc_gained,rmse_perc_tot,mae_perc_tot,crps_perc_tot
6,xgb,6.234974,3.64077,0.015219,6.258257,3.658956,0.015265,6.418516,3.756062,0.015531,6.246161,3.695466,0.015327
7,lgb,6.290481,3.740162,0.015473,6.30457,3.759431,0.015496,6.352664,3.830991,0.015686,6.316501,3.792406,0.015593
3,forest,6.345162,3.441203,0.014934,6.325255,3.435232,0.014903,6.353085,3.577263,0.015181,6.635452,3.98355,0.016177
4,xtree,6.461531,3.547512,0.015183,6.467255,3.550303,0.015177,6.42892,3.639561,0.015285,6.689683,4.141738,0.016323
0,lasso,6.195401,3.610358,0.015083,6.201707,3.629998,0.015107,6.424297,4.009212,0.015904,7.144809,4.809547,0.018107
5,svr,6.328843,3.393583,0.014828,6.335183,3.403891,0.014844,7.233881,4.814618,0.018605,7.297453,4.933223,0.018936
1,ridge,6.198742,3.617378,0.015097,6.205318,3.637318,0.015123,6.69084,4.263648,0.016627,7.362668,4.964857,0.0187
2,sgd,6.253977,3.683091,0.015259,6.25112,3.687956,0.015251,7.02717,4.606883,0.01772,7.664589,5.227347,0.01963
