<a href="https://colab.research.google.com/github/mikeytw11/Hockey-Model/blob/main/Pre_Season_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
#Import Skater Data
skater_EV_rates_raw = pd.read_csv('https://raw.githubusercontent.com/mikeytw11/Hockey-Model/main/Files/EV%20On-Ice%20Rates.csv')

In [3]:
#Restrict data columns to Player, Season, Team, Position, GP, TOI, G/60, F/60, C/60, xG/60
skater_EV_rates = skater_EV_rates_raw.loc[:,['Player','Season','Team','Position','GP','TOI','G±/60','F±/60','C±/60','xG±/60']]

In [4]:
#Convert TOI to seconds
skater_EV_rates['TOI_EV'] = skater_EV_rates['TOI'] * 60

In [5]:
#Separate Forwards and Defense in order to get averages for each position
forwards_EV = skater_EV_rates.loc[skater_EV_rates.Position!="D"]
defense_EV = skater_EV_rates.loc[skater_EV_rates.Position=="D"]
#forwards_EV.head()

In [6]:
#Calculate Variables/s
pd.options.mode.chained_assignment = None
forwards_EV['G/s'] = forwards_EV['G±/60'] / 3600
forwards_EV['F/s'] = forwards_EV['F±/60'] / 3600
forwards_EV['C/s'] = forwards_EV['C±/60'] / 3600
forwards_EV['xG/s'] = forwards_EV['xG±/60'] / 3600
#forwards_EV.head()

In [7]:
#Calculate average/s for defense
defense_EV['G/s'] = defense_EV['G±/60'] / 3600
defense_EV['F/s'] = defense_EV['F±/60'] / 3600
defense_EV['C/s'] = defense_EV['C±/60'] / 3600
defense_EV['xG/s'] = defense_EV['xG±/60'] / 3600
#defense_EV.head()

In [8]:
#Calculate averages per second for forwards and defensemen
avg_forward_G = forwards_EV['G/s'].mean()
avg_forward_F = forwards_EV['F/s'].mean()
avg_forward_C = forwards_EV['C/s'].mean()
avg_forward_xG = forwards_EV['xG/s'].mean()

avg_defense_G = defense_EV['G/s'].mean()
avg_defense_F = defense_EV['F/s'].mean()
avg_defense_C = defense_EV['C/s'].mean()
avg_defense_xG = defense_EV['xG/s'].mean()


In [9]:
#Add impact columns to Forward table
forwards_EV['G Impact'] = (forwards_EV['G/s']-avg_forward_G) * forwards_EV['TOI_EV']
forwards_EV['F Impact'] = (forwards_EV['F/s']-avg_forward_F) * forwards_EV['TOI_EV']
forwards_EV['C Impact'] = (forwards_EV['C/s']-avg_forward_C) * forwards_EV['TOI_EV']
forwards_EV['xG Impact'] = (forwards_EV['xG/s']-avg_forward_xG) * forwards_EV['TOI_EV']
#forwards_EV

In [10]:
#Restrict data for Forwards table
forwards_EV = forwards_EV.loc[:,['Player', 'Season', 'Team', 'Position', 'GP', 'G Impact', 'F Impact', 'C Impact', 'xG Impact']]
#forwards_EV.head()

In [11]:
#Add impact columns to defense table
defense_EV['G Impact'] = (defense_EV['G/s']-avg_defense_G) * defense_EV['TOI_EV']
defense_EV['F Impact'] = (defense_EV['F/s']-avg_defense_F) * defense_EV['TOI_EV']
defense_EV['C Impact'] = (defense_EV['C/s']-avg_defense_C) * defense_EV['TOI_EV']
defense_EV['xG Impact'] = (defense_EV['xG/s']-avg_defense_xG) * defense_EV['TOI_EV']
#defense_EV

In [12]:
#Restrict data for defense table
defense_EV = defense_EV.loc[:,['Player', 'Season', 'Team', 'Position', 'GP', 'G Impact', 'F Impact', 'C Impact', 'xG Impact']]
#defense_EV.head()

In [13]:
#Merge forwards and defense tables together
concat_frames = [forwards_EV, defense_EV]
skater_EV = pd.concat(concat_frames)
skater_EV

Unnamed: 0,Player,Season,Team,Position,GP,G Impact,F Impact,C Impact,xG Impact
0,A.J. Greer,21-22,N.J,L,9,0.223663,15.804349,19.006918,1.023018
8,Adam Brooks,19-20,TOR,C,7,2.073018,0.283848,-1.472680,0.413894
9,Adam Brooks,20-21,TOR,C,11,3.385881,1.177490,6.324003,0.627826
10,Adam Brooks,21-22,MTL/VGK/WPG,C,25,2.741775,-19.820645,-24.740183,-1.169779
11,Adam Erne,19-20,DET,L,56,-19.885853,-93.055992,-124.865892,-5.684628
...,...,...,...,...,...,...,...,...,...
2466,Zach Whitecloud,20-21,VGK,D,51,10.242078,101.286169,117.323810,5.590053
2467,Zach Whitecloud,21-22,VGK,D,59,17.933012,158.044676,147.294129,10.132147
2475,Zdeno Chara,19-20,BOS,D,68,25.828124,-18.622659,-39.886535,1.990773
2476,Zdeno Chara,20-21,WSH,D,55,4.402653,-4.141999,34.558235,3.024267


In [14]:
#Import Skater PP Data
skater_PP_rates_raw = pd.read_csv('https://raw.githubusercontent.com/mikeytw11/Hockey-Model/main/Files/PP%20On-Ice%20Rates.csv')
#skater_PP_rates_raw.head()

In [15]:
#Restrict data columns to Player, Season, Team, Position, GP, TOI, G/60, F/60, C/60, xG/60
skater_PP_rates = skater_PP_rates_raw.loc[:,['Player','Season','Team','Position','GP','TOI','G±/60','F±/60','C±/60','xG±/60']]
#skater_PP_rates.head()

In [16]:
#Convert TOI to seconds
skater_PP_rates['TOI_PP'] = skater_PP_rates['TOI'] * 60

In [17]:
#Separate Forwards and Defense in order to get averages for each position
forwards_PP = skater_PP_rates.loc[skater_PP_rates.Position!="D"]
defense_PP = skater_PP_rates.loc[skater_PP_rates.Position=="D"]
#forwards_PP.head()

In [18]:
#Calculate Variables/s
pd.options.mode.chained_assignment = None
forwards_PP['G/s'] = forwards_PP['G±/60'] / 3600
forwards_PP['F/s'] = forwards_PP['F±/60'] / 3600
forwards_PP['C/s'] = forwards_PP['C±/60'] / 3600
forwards_PP['xG/s'] = forwards_PP['xG±/60'] / 3600
#forwards_PP.head()

In [19]:
#Calculate variable/s for defense
defense_PP['G/s'] = defense_PP['G±/60'] / 3600
defense_PP['F/s'] = defense_PP['F±/60'] / 3600
defense_PP['C/s'] = defense_PP['C±/60'] / 3600
defense_PP['xG/s'] = defense_PP['xG±/60'] / 3600
#defense_PP.head()

In [20]:
#Calculate averages per second for forwards and defensemen
avg_forward_G_PP = forwards_PP['G/s'].mean()
avg_forward_F_PP = forwards_PP['F/s'].mean()
avg_forward_C_PP = forwards_PP['C/s'].mean()
avg_forward_xG_PP = forwards_PP['xG/s'].mean()

avg_defense_G_PP = defense_PP['G/s'].mean()
avg_defense_F_PP = defense_PP['F/s'].mean()
avg_defense_C_PP = defense_PP['C/s'].mean()
avg_defense_xG_PP = defense_PP['xG/s'].mean()

In [21]:
#Add impact columns to Forward table
forwards_PP['G Impact'] = (forwards_PP['G/s']-avg_forward_G_PP) * forwards_PP['TOI_PP']
forwards_PP['F Impact'] = (forwards_PP['F/s']-avg_forward_F_PP) * forwards_PP['TOI_PP']
forwards_PP['C Impact'] = (forwards_PP['C/s']-avg_forward_C_PP) * forwards_PP['TOI_PP']
forwards_PP['xG Impact'] = (forwards_PP['xG/s']-avg_forward_xG_PP) * forwards_PP['TOI_PP']
#forwards_PP

In [22]:
#Restrict data for Forwards table
forwards_PP = forwards_PP.loc[:,['Player', 'Season', 'Team', 'Position', 'GP', 'G Impact', 'F Impact', 'C Impact', 'xG Impact']]
#forwards_PP.head()

In [23]:
#Add impact columns to defense table
defense_PP['G Impact'] = (defense_PP['G/s']-avg_defense_G_PP) * defense_PP['TOI_PP']
defense_PP['F Impact'] = (defense_PP['F/s']-avg_defense_F_PP) * defense_PP['TOI_PP']
defense_PP['C Impact'] = (defense_PP['C/s']-avg_defense_C_PP) * defense_PP['TOI_PP']
defense_PP['xG Impact'] = (defense_PP['xG/s']-avg_defense_xG_PP) * defense_PP['TOI_PP']
#defense_PP

In [24]:
#Restrict data for defense table
defense_PP = defense_PP.loc[:,['Player', 'Season', 'Team', 'Position', 'GP', 'G Impact', 'F Impact', 'C Impact', 'xG Impact']]
#defense_PP.head()

In [25]:
#Merge forwards and defense tables together
concat_frames = [forwards_PP, defense_PP]
skater_PP = pd.concat(concat_frames)
skater_PP

Unnamed: 0,Player,Season,Team,Position,GP,G Impact,F Impact,C Impact,xG Impact
6,Adam Erne,19-20,DET,L,55,-5.673323,-35.557240,-42.736553,-3.039736
7,Adam Erne,20-21,DET,L,44,-0.768159,-25.651894,-21.354477,-2.398876
8,Adam Erne,21-22,DET,L,77,-6.042810,-17.273673,-27.840542,-1.660783
12,Adam Gaudette,19-20,VAN,C,58,8.187853,0.013922,1.828173,-0.460368
13,Adam Gaudette,20-21,CHI/VAN,C,40,-2.091285,-13.279698,-14.400660,-1.034855
...,...,...,...,...,...,...,...,...,...
1254,Will Butcher,21-22,BUF,D,37,1.649754,-2.538496,-6.541114,-0.133780
1262,Wyatt Kalynuk,20-21,CHI,D,21,-0.241431,5.724222,9.543318,0.435168
1279,Zach Werenski,19-20,CBJ,D,63,-2.311842,5.722068,3.301522,-0.495800
1280,Zach Werenski,20-21,CBJ,D,33,-1.742317,-11.339614,-16.808672,-2.139052


In [26]:
#Import Skater SH Data
skater_SH_rates_raw = pd.read_csv('https://raw.githubusercontent.com/mikeytw11/Hockey-Model/main/Files/SH%20On-Ice%20Rates.csv')
#skater_SH_rates_raw.head()

In [27]:
#Restrict data columns to Player, Season, Team, Position, GP, TOI, G/60, F/60, C/60, xG/60
skater_SH_rates = skater_SH_rates_raw.loc[:,['Player','Season','Team','Position','GP','TOI','G±/60','F±/60','C±/60','xG±/60']]
#skater_SH_rates.head()

In [28]:
#Convert TOI to seconds
skater_SH_rates['TOI_SH'] = skater_SH_rates['TOI'] * 60

In [29]:
#Separate Forwards and Defense in order to get averages for each position
forwards_SH = skater_SH_rates.loc[skater_SH_rates.Position!="D"]
defense_SH = skater_SH_rates.loc[skater_SH_rates.Position=="D"]
#forwards_SH.head()

In [30]:
#Calculate Variables/s for Forward Table
pd.options.mode.chained_assignment = None
forwards_SH['G/s'] = forwards_SH['G±/60'] / 3600
forwards_SH['F/s'] = forwards_SH['F±/60'] / 3600
forwards_SH['C/s'] = forwards_SH['C±/60'] / 3600
forwards_SH['xG/s'] = forwards_SH['xG±/60'] / 3600
#forwards_SH.head()

In [31]:
#Calculate variable/s for Defense Table
defense_SH['G/s'] = defense_SH['G±/60'] / 3600
defense_SH['F/s'] = defense_SH['F±/60'] / 3600
defense_SH['C/s'] = defense_SH['C±/60'] / 3600
defense_SH['xG/s'] = defense_SH['xG±/60'] / 3600
#defense_SH.head()

In [32]:
#Calculate averages per second for forwards and defensemen
avg_forward_G_SH = forwards_SH['G/s'].mean()
avg_forward_F_SH = forwards_SH['F/s'].mean()
avg_forward_C_SH = forwards_SH['C/s'].mean()
avg_forward_xG_SH = forwards_SH['xG/s'].mean()

avg_defense_G_SH = defense_SH['G/s'].mean()
avg_defense_F_SH = defense_SH['F/s'].mean()
avg_defense_C_SH = defense_SH['C/s'].mean()
avg_defense_xG_SH = defense_SH['xG/s'].mean()

In [33]:
#Add impact columns to Forward table
forwards_SH['G Impact'] = (forwards_SH['G/s']-avg_forward_G_SH) * forwards_SH['TOI_SH']
forwards_SH['F Impact'] = (forwards_SH['F/s']-avg_forward_F_SH) * forwards_SH['TOI_SH']
forwards_SH['C Impact'] = (forwards_SH['C/s']-avg_forward_C_SH) * forwards_SH['TOI_SH']
forwards_SH['xG Impact'] = (forwards_SH['xG/s']-avg_forward_xG_SH) * forwards_SH['TOI_SH']
#forwards_SH

In [34]:
#Restrict data for Forwards table
forwards_SH = forwards_SH.loc[:,['Player', 'Season', 'Team', 'Position', 'GP', 'G Impact', 'F Impact', 'C Impact', 'xG Impact']]
#forwards_SH.head()

In [35]:
#Add impact columns to defense table
defense_SH['G Impact'] = (defense_SH['G/s']-avg_defense_G_SH) * defense_SH['TOI_SH']
defense_SH['F Impact'] = (defense_SH['F/s']-avg_defense_F_SH) * defense_SH['TOI_SH']
defense_SH['C Impact'] = (defense_SH['C/s']-avg_defense_C_SH) * defense_SH['TOI_SH']
defense_SH['xG Impact'] = (defense_SH['xG/s']-avg_defense_xG_SH) * defense_SH['TOI_SH']
#defense_SH

In [36]:
#Restrict data for defense table
defense_SH = defense_SH.loc[:,['Player', 'Season', 'Team', 'Position', 'GP', 'G Impact', 'F Impact', 'C Impact', 'xG Impact']]
#defense_SH.head()

In [37]:
#Merge forwards and defense tables together
concat_frames = [forwards_SH, defense_SH]
skater_SH = pd.concat(concat_frames)
skater_SH

Unnamed: 0,Player,Season,Team,Position,GP,G Impact,F Impact,C Impact,xG Impact
5,Adam Henrique,19-20,ANA,C,70,2.025321,11.496054,15.510498,0.463671
6,Adam Henrique,20-21,ANA,C,44,-3.209840,-6.464298,-7.016718,-0.562624
10,Adam Lowry,19-20,WPG,L,48,-0.025562,-12.486435,-19.316160,1.092411
11,Adam Lowry,20-21,WPG,L,52,2.821355,6.395069,0.538890,1.053930
12,Adam Lowry,21-22,WPG,C/L,76,-5.175118,-37.959907,-23.785815,-3.018315
...,...,...,...,...,...,...,...,...,...
1276,Zach Whitecloud,20-21,VGK,D,49,3.135511,42.931355,39.860350,4.566373
1277,Zach Whitecloud,21-22,VGK,D,58,-6.367926,-17.419022,-38.219136,-3.826934
1279,Zdeno Chara,19-20,BOS,D,67,1.347726,14.084769,21.583318,1.044979
1280,Zdeno Chara,20-21,WSH,D,55,-0.782653,10.340557,19.695075,-0.721584


In [38]:
#Rename columns for skater data
skater_EV = skater_EV.rename(columns = {'G Impact':'G_EV','F Impact':'F_EV','C Impact':'C_EV','xG Impact':'xG_EV'})
skater_PP = skater_PP.rename(columns = {'G Impact':'G_PP','F Impact':'F_PP','C Impact':'C_PP','xG Impact':'xG_PP'})
skater_SH = skater_SH.rename(columns = {'G Impact':'G_SH','F Impact':'F_SH','C Impact':'C_SH','xG Impact':'xG_SH'})

In [39]:
#Merge player data for EV and PP
full_skater_data = 

Don't need all the impact columns for the PP and SH data tables. Change that data to reflect PP G+/- and SH G+/-.