# NHL Goal Prediction Part 2: Electric Boogaloo

Going to look at trying to predict future NHL goals again by trying a couple different statistical techniques that may or may not work!

In [31]:
import os
import math
import itertools
import time
from typing import Tuple

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression, RidgeCV, ElasticNetCV, LassoCV
from sklearn.ensemble import BaggingRegressor
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.model_selection import KFold, train_test_split
from sklearn.feature_selection import SelectKBest, f_classif
import seaborn as sns

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [44]:
def prepare_data(dataframe: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame]:
    stat_columns = ['g', 'gp', 'toi', 'a1', 'a2', 'isf', 'iff', 'icf']
    dataframe = dataframe.merge(dataframe[stat_columns + ['api_id', 'prev_season']], how='left', left_on=['api_id', 'season'], 
                                right_on=['api_id', 'prev_season'] ,suffixes=[None, '_target'])
    dataframe['n_1'] = dataframe['season'] - 10001
    dataframe['n_2'] = dataframe['season'] - 20002
    dataframe['n_3'] = dataframe['season'] - 30003
    dataframe = dataframe.merge(dataframe[stat_columns + ['api_id', 'n_1']], 
                                how='left', left_on=['api_id', 'season'], 
                                right_on=['api_id', 'n_1'] ,suffixes=[None, '_n_1'])
    dataframe = dataframe.merge(dataframe[stat_columns + ['api_id', 'n_2']], 
                                how='left', left_on=['api_id', 'season'], 
                                right_on=['api_id', 'n_2'] ,suffixes=[None, '_n_2'])
    dataframe = dataframe.merge(dataframe[stat_columns + ['api_id', 'n_3']], 
                                how='left', left_on=['api_id', 'season'], 
                                right_on=['api_id', 'n_3'] ,suffixes=[None, '_n_3'])
    
    mask = (dataframe.season != 20192020)&(dataframe.g_target.isna())
    dataframe = dataframe[~mask]
    test_x = dataframe[dataframe.season == 20192020]
    dataframe = dataframe[dataframe.season != 20192020]
    
    drop_columns = ['n_1_n_1', 'n_2_n_2', 'n_3_n_3', 'prev_season_target', 'n_1', 'n_2', 'n_3']
    dataframe = dataframe.drop(drop_columns, axis=1)
    test_x = test_x.drop(drop_columns, axis=1)
    
    return dataframe, test_x

In [45]:
test_data_y = pd.read_csv('csv_files/20202021stats.csv')

#reading in csv files to dataframes
ev_df = pd.read_csv('ev_player_goal_data.csv')
pp_df = pd.read_csv('pp_player_goal_data.csv')

ev_df['prev_season'] = ev_df.season - 10001
pp_df['prev_season'] = pp_df.season = 10001
#combine player stats into totals
ev_df.drop(['Unnamed: 0'], axis=1, inplace=True)
pp_df.drop(['Unnamed: 0'], axis=1, inplace=True)

ev_df = ev_df.groupby(['player', 'api_id', 'season_age', 'position', 'season', 'prev_season'])\
        [['gp', 'toi', 'g', 'a1', 'a2', 'points', 'isf', 'iff', 'icf']].sum().reset_index()
pp_df = ev_df.groupby(['player', 'api_id', 'season_age', 'position', 'season', 'prev_season'])\
        [['gp', 'toi', 'g', 'a1', 'a2', 'points', 'isf', 'iff', 'icf']].sum().reset_index()

ev_f_df = ev_df[ev_df.position != 'D']
ev_d_df = ev_df[ev_df.position == 'D']

pp_f_df = pp_df[pp_df.position != 'D']
pp_d_df = pp_df[pp_df.position == 'D']

In [46]:
ev_f_df_train, ev_f_df_test_data_x = prepare_data(ev_f_df)

In [47]:
ev_f_df_train[ev_f_df_train.api_id == 8478421].head()

Unnamed: 0,player,api_id,season_age,position,season,prev_season,gp,toi,g,a1,a2,points,isf,iff,icf,g_target,gp_target,toi_target,a1_target,a2_target,isf_target,iff_target,icf_target,g_n_1,gp_n_1,toi_n_1,a1_n_1,a2_n_1,isf_n_1,iff_n_1,icf_n_1,g_n_2,gp_n_2,toi_n_2,a1_n_2,a2_n_2,isf_n_2,iff_n_2,icf_n_2,g_n_3,gp_n_3,toi_n_3,a1_n_3,a2_n_3,isf_n_3,iff_n_3,icf_n_3
0,A.J. Greer,8478421,19.0,L,20162017,20152016,5.0,63.8,0.0,0.0,1.0,1.0,6.0,7.0,8.0,0.0,17.0,122.27,2.0,1.0,13.0,15.0,20.0,0.0,17.0,122.27,2.0,1.0,13.0,15.0,20.0,1.0,15.0,93.8,1.0,0.0,9.0,11.0,14.0,,,,,,,,
1,A.J. Greer,8478421,20.0,L,20172018,20162017,17.0,122.27,0.0,2.0,1.0,3.0,13.0,15.0,20.0,1.0,15.0,93.8,1.0,0.0,9.0,11.0,14.0,1.0,15.0,93.8,1.0,0.0,9.0,11.0,14.0,,,,,,,,,,,,,,,,


In [61]:
model.fit(ev_f_df[['g', 'isf', 'gp', 'toi', 'a1', 'a2']], ev_f_df[['g_target']])
model.score(ev_f_df[['g', 'isf', 'gp', 'toi', 'a1', 'a2']], ev_f_df[['g_target']])

0.4312267720735582