<a href="https://colab.research.google.com/github/mikeytw11/Hockey-Model/blob/main/Pre_Season_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
#Import Skater Data
skater_EV_rates_raw = pd.read_csv('https://raw.githubusercontent.com/mikeytw11/Hockey-Model/main/Files/EV%20On-Ice%20Rates.csv')

In [3]:
#Restrict data columns to Player, Season, Team, Position, GP, TOI, G/60, F/60, C/60, xG/60
skater_EV_rates = skater_EV_rates_raw.loc[:,['Player','Season','Team','Position','GP','TOI','G±/60','F±/60','C±/60','xG±/60']]

In [4]:
#Convert TOI to seconds
skater_EV_rates['TOI_EV'] = skater_EV_rates['TOI'] * 60

In [5]:
#Separate Forwards and Defense in order to get averages for each position
forwards_EV = skater_EV_rates.loc[skater_EV_rates.Position!="D"]
defense_EV = skater_EV_rates.loc[skater_EV_rates.Position=="D"]
forwards_EV.head()

Unnamed: 0,Player,Season,Team,Position,GP,TOI,G±/60,F±/60,C±/60,xG±/60,TOI_EV
0,A.J. Greer,21-22,N.J,L,9,70.45,0.05,12.5,14.78,0.8,4227.0
8,Adam Brooks,19-20,TOR,C,7,51.6,2.27,-0.63,-3.12,0.41,3096.0
9,Adam Brooks,20-21,TOR,C,11,110.38,1.7,-0.32,2.03,0.27,6622.8
10,Adam Brooks,21-22,MTL/VGK/WPG,C,25,213.51,0.63,-6.53,-8.36,-0.4,12810.6
11,Adam Erne,19-20,DET,L,56,599.72,-2.13,-10.27,-13.9,-0.64,35983.2


In [6]:
#Calculate Variables/s
pd.options.mode.chained_assignment = None
forwards_EV['G/s'] = forwards_EV['G±/60'] / 3600
forwards_EV['F/s'] = forwards_EV['F±/60'] / 3600
forwards_EV['C/s'] = forwards_EV['C±/60'] / 3600
forwards_EV['xG/s'] = forwards_EV['xG±/60'] / 3600
forwards_EV.head()

Unnamed: 0,Player,Season,Team,Position,GP,TOI,G±/60,F±/60,C±/60,xG±/60,TOI_EV,G/s,F/s,C/s,xG/s
0,A.J. Greer,21-22,N.J,L,9,70.45,0.05,12.5,14.78,0.8,4227.0,1.4e-05,0.003472,0.004106,0.000222
8,Adam Brooks,19-20,TOR,C,7,51.6,2.27,-0.63,-3.12,0.41,3096.0,0.000631,-0.000175,-0.000867,0.000114
9,Adam Brooks,20-21,TOR,C,11,110.38,1.7,-0.32,2.03,0.27,6622.8,0.000472,-8.9e-05,0.000564,7.5e-05
10,Adam Brooks,21-22,MTL/VGK/WPG,C,25,213.51,0.63,-6.53,-8.36,-0.4,12810.6,0.000175,-0.001814,-0.002322,-0.000111
11,Adam Erne,19-20,DET,L,56,599.72,-2.13,-10.27,-13.9,-0.64,35983.2,-0.000592,-0.002853,-0.003861,-0.000178


In [7]:
#Calculate average/s for defense
defense_EV['G/s'] = defense_EV['G±/60'] / 3600
defense_EV['F/s'] = defense_EV['F±/60'] / 3600
defense_EV['C/s'] = defense_EV['C±/60'] / 3600
defense_EV['xG/s'] = defense_EV['xG±/60'] / 3600
defense_EV.head()

Unnamed: 0,Player,Season,Team,Position,GP,TOI,G±/60,F±/60,C±/60,xG±/60,TOI_EV,G/s,F/s,C/s,xG/s
1,Aaron Ekblad,19-20,FLA,D,67,1268.28,0.59,5.43,6.79,-0.01,76096.8,0.000164,0.001508,0.001886,-3e-06
2,Aaron Ekblad,20-21,FLA,D,35,630.15,0.64,5.64,7.91,0.47,37809.0,0.000178,0.001567,0.002197,0.000131
3,Aaron Ekblad,21-22,FLA,D,61,1092.75,1.6,16.84,21.62,1.08,65565.0,0.000444,0.004678,0.006006,0.0003
4,Aaron Ness,19-20,ARI,D,24,292.63,-0.06,-0.54,-1.01,0.08,17557.8,-1.7e-05,-0.00015,-0.000281,2.2e-05
5,Adam Boqvist,19-20,CHI,D,41,563.95,0.34,-5.33,-4.23,-0.2,33837.0,9.4e-05,-0.001481,-0.001175,-5.6e-05


In [8]:
#Calculate averages per second for forwards and defensemen
avg_forward_G = forwards_EV['G/s'].mean()
avg_forward_F = forwards_EV['F/s'].mean()
avg_forward_C = forwards_EV['C/s'].mean()
avg_forward_xG = forwards_EV['xG/s'].mean()

avg_defense_G = defense_EV['G/s'].mean()
avg_defense_F = defense_EV['F/s'].mean()
avg_defense_C = defense_EV['C/s'].mean()
avg_defense_xG = defense_EV['xG/s'].mean()


In [9]:
#Add impact columns to Forward table
forwards_EV['G Impact'] = (forwards_EV['G/s']-avg_forward_G) * forwards_EV['TOI_EV']
forwards_EV['F Impact'] = (forwards_EV['F/s']-avg_forward_F) * forwards_EV['TOI_EV']
forwards_EV['C Impact'] = (forwards_EV['C/s']-avg_forward_C) * forwards_EV['TOI_EV']
forwards_EV['xG Impact'] = (forwards_EV['xG/s']-avg_forward_xG) * forwards_EV['TOI_EV']
forwards_EV

Unnamed: 0,Player,Season,Team,Position,GP,TOI,G±/60,F±/60,C±/60,xG±/60,TOI_EV,G/s,F/s,C/s,xG/s,G Impact,F Impact,C Impact,xG Impact
0,A.J. Greer,21-22,N.J,L,9,70.45,0.05,12.50,14.78,0.80,4227.0,0.000014,0.003472,0.004106,0.000222,0.223663,15.804349,19.006918,1.023018
8,Adam Brooks,19-20,TOR,C,7,51.60,2.27,-0.63,-3.12,0.41,3096.0,0.000631,-0.000175,-0.000867,0.000114,2.073018,0.283848,-1.472680,0.413894
9,Adam Brooks,20-21,TOR,C,11,110.38,1.70,-0.32,2.03,0.27,6622.8,0.000472,-0.000089,0.000564,0.000075,3.385881,1.177490,6.324003,0.627826
10,Adam Brooks,21-22,MTL/VGK/WPG,C,25,213.51,0.63,-6.53,-8.36,-0.40,12810.6,0.000175,-0.001814,-0.002322,-0.000111,2.741775,-19.820645,-24.740183,-1.169779
11,Adam Erne,19-20,DET,L,56,599.72,-2.13,-10.27,-13.90,-0.64,35983.2,-0.000592,-0.002853,-0.003861,-0.000178,-19.885853,-93.055992,-124.865892,-5.684628
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2472,Zack MacEwen,20-21,VAN,C,34,288.65,-0.14,-4.98,-5.31,-0.21,17319.0,-0.000039,-0.001383,-0.001475,-0.000058,0.002339,-19.339280,-18.773887,-0.667398
2473,Zack MacEwen,21-22,PHI,C,75,707.83,-1.09,-12.07,-18.68,-1.06,42469.8,-0.000303,-0.003353,-0.005189,-0.000294,-11.201572,-131.065860,-203.765605,-11.664191
2474,Zack Smith,19-20,CHI,C,50,514.12,-0.36,-9.73,-9.24,-0.95,30847.2,-0.000100,-0.002703,-0.002567,-0.000264,-1.880940,-75.146726,-67.113387,-7.529529
2478,Zemgus Girgensons,19-20,BUF,C,69,785.45,-0.33,-1.16,-1.24,-0.03,47127.0,-0.000092,-0.000322,-0.000344,-0.000008,-2.480892,-2.617432,2.193776,0.540283


In [10]:
#Restrict data for Forwards table
forwards_EV = forwards_EV.loc[:,['Player', 'Season', 'Team', 'Position', 'GP', 'G Impact', 'F Impact', 'C Impact', 'xG Impact']]
forwards_EV.head()

Unnamed: 0,Player,Season,Team,Position,GP,G Impact,F Impact,C Impact,xG Impact
0,A.J. Greer,21-22,N.J,L,9,0.223663,15.804349,19.006918,1.023018
8,Adam Brooks,19-20,TOR,C,7,2.073018,0.283848,-1.47268,0.413894
9,Adam Brooks,20-21,TOR,C,11,3.385881,1.17749,6.324003,0.627826
10,Adam Brooks,21-22,MTL/VGK/WPG,C,25,2.741775,-19.820645,-24.740183,-1.169779
11,Adam Erne,19-20,DET,L,56,-19.885853,-93.055992,-124.865892,-5.684628


In [11]:
#Add impact columns to defense table
defense_EV['G Impact'] = (defense_EV['G/s']-avg_defense_G) * defense_EV['TOI_EV']
defense_EV['F Impact'] = (defense_EV['F/s']-avg_defense_F) * defense_EV['TOI_EV']
defense_EV['C Impact'] = (defense_EV['C/s']-avg_defense_C) * defense_EV['TOI_EV']
defense_EV['xG Impact'] = (defense_EV['xG/s']-avg_defense_xG) * defense_EV['TOI_EV']
defense_EV

Unnamed: 0,Player,Season,Team,Position,GP,TOI,G±/60,F±/60,C±/60,xG±/60,TOI_EV,G/s,F/s,C/s,xG/s,G Impact,F Impact,C Impact,xG Impact
1,Aaron Ekblad,19-20,FLA,D,67,1268.28,0.59,5.43,6.79,-0.01,76096.8,0.000164,0.001508,0.001886,-0.000003,16.131720,141.116661,179.601816,1.310266
2,Aaron Ekblad,20-21,FLA,D,35,630.15,0.64,5.64,7.91,0.47,37809.0,0.000178,0.001567,0.002197,0.000131,8.540235,72.319903,100.998682,5.692211
3,Aaron Ekblad,21-22,FLA,D,61,1092.75,1.60,16.84,21.62,1.08,65565.0,0.000444,0.004678,0.006006,0.000300,32.293715,329.390734,424.836293,20.980551
4,Aaron Ness,19-20,ARI,D,24,292.63,-0.06,-0.54,-1.01,0.08,17557.8,-0.000017,-0.000150,-0.000281,0.000022,0.551910,3.443135,3.397592,0.741263
5,Adam Boqvist,19-20,CHI,D,41,563.95,0.34,-5.33,-4.23,-0.20,33837.0,0.000094,-0.001481,-0.001175,-0.000056,4.823296,-38.386476,-23.717552,-1.203222
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2466,Zach Whitecloud,20-21,VGK,D,51,794.82,0.60,6.40,7.15,0.35,47689.2,0.000167,0.001778,0.001986,0.000097,10.242078,101.286169,117.323810,5.590053
2467,Zach Whitecloud,21-22,VGK,D,59,993.37,0.91,8.30,7.19,0.54,59602.2,0.000253,0.002306,0.001997,0.000150,17.933012,158.044676,147.294129,10.132147
2475,Zdeno Chara,19-20,BOS,D,68,1171.20,1.15,-2.20,-3.75,0.03,70272.0,0.000319,-0.000611,-0.001042,0.000008,25.828124,-18.622659,-39.886535,1.990773
2476,Zdeno Chara,20-21,WSH,D,55,817.42,0.15,-1.55,0.83,0.15,49045.2,0.000042,-0.000431,0.000231,0.000042,4.402653,-4.141999,34.558235,3.024267


In [12]:
#Restrict data for defense table
defense_EV = defense_EV.loc[:,['Player', 'Season', 'Team', 'Position', 'GP', 'G Impact', 'F Impact', 'C Impact', 'xG Impact']]
defense_EV.head()

Unnamed: 0,Player,Season,Team,Position,GP,G Impact,F Impact,C Impact,xG Impact
1,Aaron Ekblad,19-20,FLA,D,67,16.13172,141.116661,179.601816,1.310266
2,Aaron Ekblad,20-21,FLA,D,35,8.540235,72.319903,100.998682,5.692211
3,Aaron Ekblad,21-22,FLA,D,61,32.293715,329.390734,424.836293,20.980551
4,Aaron Ness,19-20,ARI,D,24,0.55191,3.443135,3.397592,0.741263
5,Adam Boqvist,19-20,CHI,D,41,4.823296,-38.386476,-23.717552,-1.203222


In [13]:
#Merge forwards and defense tables together
concat_frames = [forwards_EV, defense_EV]
skater_EV = pd.concat(concat_frames)
skater_EV

Unnamed: 0,Player,Season,Team,Position,GP,G Impact,F Impact,C Impact,xG Impact
0,A.J. Greer,21-22,N.J,L,9,0.223663,15.804349,19.006918,1.023018
8,Adam Brooks,19-20,TOR,C,7,2.073018,0.283848,-1.472680,0.413894
9,Adam Brooks,20-21,TOR,C,11,3.385881,1.177490,6.324003,0.627826
10,Adam Brooks,21-22,MTL/VGK/WPG,C,25,2.741775,-19.820645,-24.740183,-1.169779
11,Adam Erne,19-20,DET,L,56,-19.885853,-93.055992,-124.865892,-5.684628
...,...,...,...,...,...,...,...,...,...
2466,Zach Whitecloud,20-21,VGK,D,51,10.242078,101.286169,117.323810,5.590053
2467,Zach Whitecloud,21-22,VGK,D,59,17.933012,158.044676,147.294129,10.132147
2475,Zdeno Chara,19-20,BOS,D,68,25.828124,-18.622659,-39.886535,1.990773
2476,Zdeno Chara,20-21,WSH,D,55,4.402653,-4.141999,34.558235,3.024267


In [14]:
#Import Skater PP Data
skater_PP_rates_raw = pd.read_csv('https://raw.githubusercontent.com/mikeytw11/Hockey-Model/main/Files/PP%20On-Ice%20Rates.csv')
skater_PP_rates_raw.head()

Unnamed: 0,Player,Season,Team,Position,GP,TOI,GF%,SF%,FF%,CF%,...,CA/60,xGF/60,xGA/60,G±/60,S±/60,F±/60,C±/60,xG±/60,Sh%,Sv%
0,Aaron Ekblad,19-20,FLA,D,67,95.9,92.22,81.87,84.52,85.63,...,14.08,6.14,0.45,5.91,39.94,54.16,69.8,5.69,12.57,95.21
1,Aaron Ekblad,20-21,FLA,D,35,132.75,91.82,87.33,87.48,87.89,...,15.16,8.24,0.69,8.92,60.02,79.28,94.85,7.55,13.94,91.44
2,Aaron Ekblad,21-22,FLA,D,61,213.88,92.51,86.16,86.78,87.39,...,14.02,8.23,0.74,9.14,48.17,65.0,83.1,7.49,17.33,91.27
3,Adam Boqvist,19-20,CHI,D,41,87.02,88.2,81.06,84.35,86.69,...,13.18,6.14,0.63,4.42,36.83,52.28,72.63,5.52,10.62,93.92
4,Adam Boqvist,20-21,CHI,D,35,118.65,82.03,75.97,79.96,82.55,...,16.41,5.95,1.69,5.32,28.65,42.88,61.26,4.25,16.26,88.74


In [15]:
#Restrict data columns to Player, Season, Team, Position, GP, TOI, G/60, F/60, C/60, xG/60
skater_PP_rates = skater_PP_rates_raw.loc[:,['Player','Season','Team','Position','GP','TOI','G±/60','F±/60','C±/60','xG±/60']]
skater_PP_rates.head()

Unnamed: 0,Player,Season,Team,Position,GP,TOI,G±/60,F±/60,C±/60,xG±/60
0,Aaron Ekblad,19-20,FLA,D,67,95.9,5.91,54.16,69.8,5.69
1,Aaron Ekblad,20-21,FLA,D,35,132.75,8.92,79.28,94.85,7.55
2,Aaron Ekblad,21-22,FLA,D,61,213.88,9.14,65.0,83.1,7.49
3,Adam Boqvist,19-20,CHI,D,41,87.02,4.42,52.28,72.63,5.52
4,Adam Boqvist,20-21,CHI,D,35,118.65,5.32,42.88,61.26,4.25


In [16]:
#Convert TOI to seconds
skater_PP_rates['TOI_PP'] = skater_PP_rates['TOI'] * 60

In [17]:
#Separate Forwards and Defense in order to get averages for each position
forwards_PP = skater_PP_rates.loc[skater_PP_rates.Position!="D"]
defense_PP = skater_PP_rates.loc[skater_PP_rates.Position=="D"]
forwards_PP.head()

Unnamed: 0,Player,Season,Team,Position,GP,TOI,G±/60,F±/60,C±/60,xG±/60,TOI_PP
6,Adam Erne,19-20,DET,L,55,48.92,-1.14,13.82,25.2,2.07,2935.2
7,Adam Erne,20-21,DET,L,44,67.95,5.14,34.78,58.76,3.68,4077.0
8,Adam Erne,21-22,DET,L,77,72.83,0.84,43.2,54.68,4.43,4369.8
12,Adam Gaudette,19-20,VAN,C,58,89.62,11.3,57.44,78.84,5.49,5377.2
13,Adam Gaudette,20-21,CHI/VAN,C,40,40.63,2.73,37.82,56.35,4.27,2437.8


In [18]:
#Calculate Variables/s
pd.options.mode.chained_assignment = None
forwards_PP['G/s'] = forwards_PP['G±/60'] / 3600
forwards_PP['F/s'] = forwards_PP['F±/60'] / 3600
forwards_PP['C/s'] = forwards_PP['C±/60'] / 3600
forwards_PP['xG/s'] = forwards_PP['xG±/60'] / 3600
forwards_PP.head()

Unnamed: 0,Player,Season,Team,Position,GP,TOI,G±/60,F±/60,C±/60,xG±/60,TOI_PP,G/s,F/s,C/s,xG/s
6,Adam Erne,19-20,DET,L,55,48.92,-1.14,13.82,25.2,2.07,2935.2,-0.000317,0.003839,0.007,0.000575
7,Adam Erne,20-21,DET,L,44,67.95,5.14,34.78,58.76,3.68,4077.0,0.001428,0.009661,0.016322,0.001022
8,Adam Erne,21-22,DET,L,77,72.83,0.84,43.2,54.68,4.43,4369.8,0.000233,0.012,0.015189,0.001231
12,Adam Gaudette,19-20,VAN,C,58,89.62,11.3,57.44,78.84,5.49,5377.2,0.003139,0.015956,0.0219,0.001525
13,Adam Gaudette,20-21,CHI/VAN,C,40,40.63,2.73,37.82,56.35,4.27,2437.8,0.000758,0.010506,0.015653,0.001186


In [19]:
#Calculate variable/s for defense
defense_PP['G/s'] = defense_PP['G±/60'] / 3600
defense_PP['F/s'] = defense_PP['F±/60'] / 3600
defense_PP['C/s'] = defense_PP['C±/60'] / 3600
defense_PP['xG/s'] = defense_PP['xG±/60'] / 3600
defense_PP.head()

Unnamed: 0,Player,Season,Team,Position,GP,TOI,G±/60,F±/60,C±/60,xG±/60,TOI_PP,G/s,F/s,C/s,xG/s
0,Aaron Ekblad,19-20,FLA,D,67,95.9,5.91,54.16,69.8,5.69,5754.0,0.001642,0.015044,0.019389,0.001581
1,Aaron Ekblad,20-21,FLA,D,35,132.75,8.92,79.28,94.85,7.55,7965.0,0.002478,0.022022,0.026347,0.002097
2,Aaron Ekblad,21-22,FLA,D,61,213.88,9.14,65.0,83.1,7.49,12832.8,0.002539,0.018056,0.023083,0.002081
3,Adam Boqvist,19-20,CHI,D,41,87.02,4.42,52.28,72.63,5.52,5221.2,0.001228,0.014522,0.020175,0.001533
4,Adam Boqvist,20-21,CHI,D,35,118.65,5.32,42.88,61.26,4.25,7119.0,0.001478,0.011911,0.017017,0.001181


In [20]:
#Calculate averages per second for forwards and defensemen
avg_forward_G_PP = forwards_PP['G/s'].mean()
avg_forward_F_PP = forwards_PP['F/s'].mean()
avg_forward_C_PP = forwards_PP['C/s'].mean()
avg_forward_xG_PP = forwards_PP['xG/s'].mean()

avg_defense_G_PP = defense_PP['G/s'].mean()
avg_defense_F_PP = defense_PP['F/s'].mean()
avg_defense_C_PP = defense_PP['C/s'].mean()
avg_defense_xG_PP = defense_PP['xG/s'].mean()

In [21]:
#Add impact columns to Forward table
forwards_PP['G Impact'] = (forwards_PP['G/s']-avg_forward_G_PP) * forwards_PP['TOI_PP']
forwards_PP['F Impact'] = (forwards_PP['F/s']-avg_forward_F_PP) * forwards_PP['TOI_PP']
forwards_PP['C Impact'] = (forwards_PP['C/s']-avg_forward_C_PP) * forwards_PP['TOI_PP']
forwards_PP['xG Impact'] = (forwards_PP['xG/s']-avg_forward_xG_PP) * forwards_PP['TOI_PP']
forwards_PP

Unnamed: 0,Player,Season,Team,Position,GP,TOI,G±/60,F±/60,C±/60,xG±/60,TOI_PP,G/s,F/s,C/s,xG/s,G Impact,F Impact,C Impact,xG Impact
6,Adam Erne,19-20,DET,L,55,48.92,-1.14,13.82,25.20,2.07,2935.2,-0.000317,0.003839,0.007000,0.000575,-5.673323,-35.557240,-42.736553,-3.039736
7,Adam Erne,20-21,DET,L,44,67.95,5.14,34.78,58.76,3.68,4077.0,0.001428,0.009661,0.016322,0.001022,-0.768159,-25.651894,-21.354477,-2.398876
8,Adam Erne,21-22,DET,L,77,72.83,0.84,43.20,54.68,4.43,4369.8,0.000233,0.012000,0.015189,0.001231,-6.042810,-17.273673,-27.840542,-1.660783
12,Adam Gaudette,19-20,VAN,C,58,89.62,11.30,57.44,78.84,5.49,5377.2,0.003139,0.015956,0.021900,0.001525,8.187853,0.013922,1.828173,-0.460368
13,Adam Gaudette,20-21,CHI/VAN,C,40,40.63,2.73,37.82,56.35,4.27,2437.8,0.000758,0.010506,0.015653,0.001186,-2.091285,-13.279698,-14.400660,-1.034855
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1276,Zach Sanford,19-20,STL,L,58,40.98,5.68,63.57,83.88,6.30,2458.8,0.001578,0.017658,0.023300,0.001750,-0.094450,4.193156,4.278278,0.342720
1277,Zach Sanford,20-21,STL,L,51,47.40,11.72,68.81,83.68,5.61,2844.0,0.003256,0.019114,0.023244,0.001558,4.662354,8.989663,4.790520,-0.148688
1278,Zach Sanford,21-22,OTT/WPG,L,78,103.13,4.57,48.03,63.86,4.79,6187.8,0.001269,0.013342,0.017739,0.001331,-2.145596,-16.158201,-23.644358,-1.732950
1282,Zack Kassian,19-20,EDM,R,58,23.18,0.49,35.51,52.57,3.05,1390.8,0.000136,0.009864,0.014603,0.000847,-2.058495,-8.468689,-9.676124,-1.061726


In [22]:
#Restrict data for Forwards table
forwards_PP = forwards_PP.loc[:,['Player', 'Season', 'Team', 'Position', 'GP', 'G Impact', 'F Impact', 'C Impact', 'xG Impact']]
forwards_PP.head()

Unnamed: 0,Player,Season,Team,Position,GP,G Impact,F Impact,C Impact,xG Impact
6,Adam Erne,19-20,DET,L,55,-5.673323,-35.55724,-42.736553,-3.039736
7,Adam Erne,20-21,DET,L,44,-0.768159,-25.651894,-21.354477,-2.398876
8,Adam Erne,21-22,DET,L,77,-6.04281,-17.273673,-27.840542,-1.660783
12,Adam Gaudette,19-20,VAN,C,58,8.187853,0.013922,1.828173,-0.460368
13,Adam Gaudette,20-21,CHI/VAN,C,40,-2.091285,-13.279698,-14.40066,-1.034855


In [23]:
#Add impact columns to defense table
defense_PP['G Impact'] = (defense_PP['G/s']-avg_defense_G_PP) * defense_PP['TOI_PP']
defense_PP['F Impact'] = (defense_PP['F/s']-avg_defense_F_PP) * defense_PP['TOI_PP']
defense_PP['C Impact'] = (defense_PP['C/s']-avg_defense_C_PP) * defense_PP['TOI_PP']
defense_PP['xG Impact'] = (defense_PP['xG/s']-avg_defense_xG_PP) * defense_PP['TOI_PP']
defense_PP

Unnamed: 0,Player,Season,Team,Position,GP,TOI,G±/60,F±/60,C±/60,xG±/60,TOI_PP,G/s,F/s,C/s,xG/s,G Impact,F Impact,C Impact,xG Impact
0,Aaron Ekblad,19-20,FLA,D,67,95.90,5.91,54.16,69.80,5.69,5754.0,0.001642,0.015044,0.019389,0.001581,0.670713,-1.013744,-6.573500,0.345572
1,Aaron Ekblad,20-21,FLA,D,35,132.75,8.92,79.28,94.85,7.55,7965.0,0.002478,0.022022,0.026347,0.002097,7.588063,54.174720,46.323728,4.593610
2,Aaron Ekblad,21-22,FLA,D,61,213.88,9.14,65.00,83.10,7.49,12832.8,0.002539,0.018056,0.023083,0.002081,13.009725,36.380094,32.749585,7.187109
3,Adam Boqvist,19-20,CHI,D,41,87.02,4.42,52.28,72.63,5.52,5221.2,0.001228,0.014522,0.020175,0.001533,-1.552389,-3.646502,-1.860374,0.067017
4,Adam Boqvist,20-21,CHI,D,35,118.65,5.32,42.88,61.26,4.25,7119.0,0.001478,0.011911,0.017017,0.001181,-0.336901,-23.560431,-25.020757,-2.420049
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1254,Will Butcher,21-22,BUF,D,37,25.58,9.36,48.84,58.57,5.16,1534.8,0.002600,0.013567,0.016269,0.001433,1.649754,-2.538496,-6.541114,-0.133780
1262,Wyatt Kalynuk,20-21,CHI,D,21,22.98,4.86,69.74,98.83,6.61,1378.8,0.001350,0.019372,0.027453,0.001836,-0.241431,5.724222,9.543318,0.435168
1279,Zach Werenski,19-20,CBJ,D,63,171.17,4.68,56.80,75.07,5.30,10270.2,0.001300,0.015778,0.020853,0.001472,-2.311842,5.722068,3.301522,-0.495800
1280,Zach Werenski,20-21,CBJ,D,33,73.60,4.07,45.55,60.21,3.73,4416.0,0.001131,0.012653,0.016725,0.001036,-1.742317,-11.339614,-16.808672,-2.139052


In [24]:
#Restrict data for defense table
defense_PP = defense_PP.loc[:,['Player', 'Season', 'Team', 'Position', 'GP', 'G Impact', 'F Impact', 'C Impact', 'xG Impact']]
defense_PP.head()

Unnamed: 0,Player,Season,Team,Position,GP,G Impact,F Impact,C Impact,xG Impact
0,Aaron Ekblad,19-20,FLA,D,67,0.670713,-1.013744,-6.5735,0.345572
1,Aaron Ekblad,20-21,FLA,D,35,7.588063,54.17472,46.323728,4.59361
2,Aaron Ekblad,21-22,FLA,D,61,13.009725,36.380094,32.749585,7.187109
3,Adam Boqvist,19-20,CHI,D,41,-1.552389,-3.646502,-1.860374,0.067017
4,Adam Boqvist,20-21,CHI,D,35,-0.336901,-23.560431,-25.020757,-2.420049


In [25]:
#Merge forwards and defense tables together
concat_frames = [forwards_PP, defense_PP]
skater_PP = pd.concat(concat_frames)
skater_PP

Unnamed: 0,Player,Season,Team,Position,GP,G Impact,F Impact,C Impact,xG Impact
6,Adam Erne,19-20,DET,L,55,-5.673323,-35.557240,-42.736553,-3.039736
7,Adam Erne,20-21,DET,L,44,-0.768159,-25.651894,-21.354477,-2.398876
8,Adam Erne,21-22,DET,L,77,-6.042810,-17.273673,-27.840542,-1.660783
12,Adam Gaudette,19-20,VAN,C,58,8.187853,0.013922,1.828173,-0.460368
13,Adam Gaudette,20-21,CHI/VAN,C,40,-2.091285,-13.279698,-14.400660,-1.034855
...,...,...,...,...,...,...,...,...,...
1254,Will Butcher,21-22,BUF,D,37,1.649754,-2.538496,-6.541114,-0.133780
1262,Wyatt Kalynuk,20-21,CHI,D,21,-0.241431,5.724222,9.543318,0.435168
1279,Zach Werenski,19-20,CBJ,D,63,-2.311842,5.722068,3.301522,-0.495800
1280,Zach Werenski,20-21,CBJ,D,33,-1.742317,-11.339614,-16.808672,-2.139052


In [26]:
#Import Skater SH Data
skater_SH_rates_raw = pd.read_csv('https://raw.githubusercontent.com/mikeytw11/Hockey-Model/main/Files/SH%20On-Ice%20Rates.csv')
skater_SH_rates_raw.head()

Unnamed: 0,Player,Season,Team,Position,GP,TOI,GF%,SF%,FF%,CF%,...,CA/60,xGF/60,xGA/60,G±/60,S±/60,F±/60,C±/60,xG±/60,Sh%,Sv%
0,Aaron Ekblad,19-20,FLA,D,65,134.87,7.45,22.51,21.41,19.08,...,101.33,1.72,8.54,-10.17,-43.66,-57.6,-77.44,-6.82,4.98,82.03
1,Aaron Ekblad,20-21,FLA,D,33,86.47,18.24,17.92,14.84,15.32,...,91.36,0.9,6.94,-4.86,-44.75,-65.14,-74.83,-6.04,11.15,89.08
2,Aaron Ekblad,21-22,FLA,D,61,167.0,25.76,19.05,17.45,16.22,...,92.41,1.64,6.87,-4.06,-43.86,-56.93,-74.52,-5.23,15.97,89.17
3,Adam Fox,20-21,NYR,D,54,139.75,18.44,20.18,17.25,15.49,...,100.31,1.06,7.38,-4.47,-37.59,-60.7,-81.93,-6.32,10.26,88.52
4,Adam Fox,21-22,NYR,D,78,163.83,17.42,11.66,11.33,9.96,...,101.1,0.6,7.46,-4.19,-49.91,-68.53,-89.91,-6.85,14.77,90.76


In [27]:
#Restrict data columns to Player, Season, Team, Position, GP, TOI, G/60, F/60, C/60, xG/60
skater_SH_rates = skater_SH_rates_raw.loc[:,['Player','Season','Team','Position','GP','TOI','G±/60','F±/60','C±/60','xG±/60']]
skater_SH_rates.head()

Unnamed: 0,Player,Season,Team,Position,GP,TOI,G±/60,F±/60,C±/60,xG±/60
0,Aaron Ekblad,19-20,FLA,D,65,134.87,-10.17,-57.6,-77.44,-6.82
1,Aaron Ekblad,20-21,FLA,D,33,86.47,-4.86,-65.14,-74.83,-6.04
2,Aaron Ekblad,21-22,FLA,D,61,167.0,-4.06,-56.93,-74.52,-5.23
3,Adam Fox,20-21,NYR,D,54,139.75,-4.47,-60.7,-81.93,-6.32
4,Adam Fox,21-22,NYR,D,78,163.83,-4.19,-68.53,-89.91,-6.85


In [28]:
#Convert TOI to seconds
skater_SH_rates['TOI_SH'] = skater_SH_rates['TOI'] * 60

In [29]:
#Separate Forwards and Defense in order to get averages for each position
forwards_SH = skater_SH_rates.loc[skater_SH_rates.Position!="D"]
defense_SH = skater_SH_rates.loc[skater_SH_rates.Position=="D"]
forwards_SH.head()

Unnamed: 0,Player,Season,Team,Position,GP,TOI,G±/60,F±/60,C±/60,xG±/60,TOI_SH
5,Adam Henrique,19-20,ANA,C,70,38.63,-3.0,-41.27,-55.46,-5.22,2317.8
6,Adam Henrique,20-21,ANA,C,44,40.68,-10.88,-68.66,-89.9,-6.77,2440.8
10,Adam Lowry,19-20,WPG,L,48,107.42,-6.16,-66.1,-90.34,-5.33,6445.2
11,Adam Lowry,20-21,WPG,L,52,123.95,-4.78,-56.03,-79.29,-5.43,7437.0
12,Adam Lowry,21-22,WPG,C/L,76,181.13,-7.86,-71.7,-87.43,-6.94,10867.8


In [30]:
#Calculate Variables/s for Forward Table
pd.options.mode.chained_assignment = None
forwards_SH['G/s'] = forwards_SH['G±/60'] / 3600
forwards_SH['F/s'] = forwards_SH['F±/60'] / 3600
forwards_SH['C/s'] = forwards_SH['C±/60'] / 3600
forwards_SH['xG/s'] = forwards_SH['xG±/60'] / 3600
forwards_SH.head()

Unnamed: 0,Player,Season,Team,Position,GP,TOI,G±/60,F±/60,C±/60,xG±/60,TOI_SH,G/s,F/s,C/s,xG/s
5,Adam Henrique,19-20,ANA,C,70,38.63,-3.0,-41.27,-55.46,-5.22,2317.8,-0.000833,-0.011464,-0.015406,-0.00145
6,Adam Henrique,20-21,ANA,C,44,40.68,-10.88,-68.66,-89.9,-6.77,2440.8,-0.003022,-0.019072,-0.024972,-0.001881
10,Adam Lowry,19-20,WPG,L,48,107.42,-6.16,-66.1,-90.34,-5.33,6445.2,-0.001711,-0.018361,-0.025094,-0.001481
11,Adam Lowry,20-21,WPG,L,52,123.95,-4.78,-56.03,-79.29,-5.43,7437.0,-0.001328,-0.015564,-0.022025,-0.001508
12,Adam Lowry,21-22,WPG,C/L,76,181.13,-7.86,-71.7,-87.43,-6.94,10867.8,-0.002183,-0.019917,-0.024286,-0.001928


In [31]:
#Calculate variable/s for Defense Table
defense_SH['G/s'] = defense_SH['G±/60'] / 3600
defense_SH['F/s'] = defense_SH['F±/60'] / 3600
defense_SH['C/s'] = defense_SH['C±/60'] / 3600
defense_SH['xG/s'] = defense_SH['xG±/60'] / 3600
defense_SH.head()

Unnamed: 0,Player,Season,Team,Position,GP,TOI,G±/60,F±/60,C±/60,xG±/60,TOI_SH,G/s,F/s,C/s,xG/s
0,Aaron Ekblad,19-20,FLA,D,65,134.87,-10.17,-57.6,-77.44,-6.82,8092.2,-0.002825,-0.016,-0.021511,-0.001894
1,Aaron Ekblad,20-21,FLA,D,33,86.47,-4.86,-65.14,-74.83,-6.04,5188.2,-0.00135,-0.018094,-0.020786,-0.001678
2,Aaron Ekblad,21-22,FLA,D,61,167.0,-4.06,-56.93,-74.52,-5.23,10020.0,-0.001128,-0.015814,-0.0207,-0.001453
3,Adam Fox,20-21,NYR,D,54,139.75,-4.47,-60.7,-81.93,-6.32,8385.0,-0.001242,-0.016861,-0.022758,-0.001756
4,Adam Fox,21-22,NYR,D,78,163.83,-4.19,-68.53,-89.91,-6.85,9829.8,-0.001164,-0.019036,-0.024975,-0.001903


In [32]:
#Calculate averages per second for forwards and defensemen
avg_forward_G_SH = forwards_SH['G/s'].mean()
avg_forward_F_SH = forwards_SH['F/s'].mean()
avg_forward_C_SH = forwards_SH['C/s'].mean()
avg_forward_xG_SH = forwards_SH['xG/s'].mean()

avg_defense_G_SH = defense_SH['G/s'].mean()
avg_defense_F_SH = defense_SH['F/s'].mean()
avg_defense_C_SH = defense_SH['C/s'].mean()
avg_defense_xG_SH = defense_SH['xG/s'].mean()

In [33]:
#Add impact columns to Forward table
forwards_SH['G Impact'] = (forwards_SH['G/s']-avg_forward_G_SH) * forwards_SH['TOI_SH']
forwards_SH['F Impact'] = (forwards_SH['F/s']-avg_forward_F_SH) * forwards_SH['TOI_SH']
forwards_SH['C Impact'] = (forwards_SH['C/s']-avg_forward_C_SH) * forwards_SH['TOI_SH']
forwards_SH['xG Impact'] = (forwards_SH['xG/s']-avg_forward_xG_SH) * forwards_SH['TOI_SH']
forwards_SH

Unnamed: 0,Player,Season,Team,Position,GP,TOI,G±/60,F±/60,C±/60,xG±/60,TOI_SH,G/s,F/s,C/s,xG/s,G Impact,F Impact,C Impact,xG Impact
5,Adam Henrique,19-20,ANA,C,70,38.63,-3.00,-41.27,-55.46,-5.22,2317.8,-0.000833,-0.011464,-0.015406,-0.001450,2.025321,11.496054,15.510498,0.463671
6,Adam Henrique,20-21,ANA,C,44,40.68,-10.88,-68.66,-89.90,-6.77,2440.8,-0.003022,-0.019072,-0.024972,-0.001881,-3.209840,-6.464298,-7.016718,-0.562624
10,Adam Lowry,19-20,WPG,L,48,107.42,-6.16,-66.10,-90.34,-5.33,6445.2,-0.001711,-0.018361,-0.025094,-0.001481,-0.025562,-12.486435,-19.316160,1.092411
11,Adam Lowry,20-21,WPG,L,52,123.95,-4.78,-56.03,-79.29,-5.43,7437.0,-0.001328,-0.015564,-0.022025,-0.001508,2.821355,6.395069,0.538890,1.053930
12,Adam Lowry,21-22,WPG,C/L,76,181.13,-7.86,-71.70,-87.43,-6.94,10867.8,-0.002183,-0.019917,-0.024286,-0.001928,-5.175118,-37.959907,-23.785815,-3.018315
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1271,Zach Sanford,20-21,STL,L,51,81.05,-5.11,-70.41,-92.44,-7.25,4863.0,-0.001419,-0.019558,-0.025678,-0.002014,1.399088,-15.243294,-17.411082,-1.769360
1272,Zach Sanford,21-22,OTT/WPG,L,78,34.95,-8.58,-75.73,-101.06,-7.67,2097.0,-0.002383,-0.021036,-0.028072,-0.002131,-1.417967,-9.672042,-12.529075,-1.007625
1278,Zack Smith,19-20,CHI,C,50,61.80,-1.74,-49.88,-77.20,-4.99,3708.0,-0.000483,-0.013856,-0.021444,-0.001386,4.537894,9.523006,2.421384,0.978677
1282,Zemgus Girgensons,19-20,BUF,C,68,147.87,-9.21,-65.75,-85.04,-5.85,8872.2,-0.002558,-0.018264,-0.023622,-0.001625,-7.551912,-16.325744,-13.527990,0.222228


In [34]:
#Restrict data for Forwards table
forwards_SH = forwards_SH.loc[:,['Player', 'Season', 'Team', 'Position', 'GP', 'G Impact', 'F Impact', 'C Impact', 'xG Impact']]
forwards_SH.head()

Unnamed: 0,Player,Season,Team,Position,GP,G Impact,F Impact,C Impact,xG Impact
5,Adam Henrique,19-20,ANA,C,70,2.025321,11.496054,15.510498,0.463671
6,Adam Henrique,20-21,ANA,C,44,-3.20984,-6.464298,-7.016718,-0.562624
10,Adam Lowry,19-20,WPG,L,48,-0.025562,-12.486435,-19.31616,1.092411
11,Adam Lowry,20-21,WPG,L,52,2.821355,6.395069,0.53889,1.05393
12,Adam Lowry,21-22,WPG,C/L,76,-5.175118,-37.959907,-23.785815,-3.018315


In [35]:
#Add impact columns to defense table
defense_SH['G Impact'] = (defense_SH['G/s']-avg_defense_G_SH) * defense_SH['TOI_SH']
defense_SH['F Impact'] = (defense_SH['F/s']-avg_defense_F_SH) * defense_SH['TOI_SH']
defense_SH['C Impact'] = (defense_SH['C/s']-avg_defense_C_SH) * defense_SH['TOI_SH']
defense_SH['xG Impact'] = (defense_SH['xG/s']-avg_defense_xG_SH) * defense_SH['TOI_SH']
defense_SH

Unnamed: 0,Player,Season,Team,Position,GP,TOI,G±/60,F±/60,C±/60,xG±/60,TOI_SH,G/s,F/s,C/s,xG/s,G Impact,F Impact,C Impact,xG Impact
0,Aaron Ekblad,19-20,FLA,D,65,134.87,-10.17,-57.60,-77.44,-6.82,8092.2,-0.002825,-0.016000,-0.021511,-0.001894,-8.636752,4.737779,6.022887,-1.746816
1,Aaron Ekblad,20-21,FLA,D,33,86.47,-4.86,-65.14,-74.83,-6.04,5188.2,-0.001350,-0.018094,-0.020786,-0.001678,2.115263,-7.828836,7.622934,0.004163
2,Aaron Ekblad,21-22,FLA,D,61,167.00,-4.06,-56.93,-74.52,-5.23,10020.0,-0.001128,-0.015814,-0.020700,-0.001453,6.311886,7.731291,15.585049,2.262541
3,Adam Fox,20-21,NYR,D,54,139.75,-4.47,-60.70,-81.93,-6.32,8385.0,-0.001242,-0.016861,-0.022758,-0.001756,4.326994,-2.311210,-4.217145,-0.645438
4,Adam Fox,21-22,NYR,D,78,163.83,-4.19,-68.53,-89.91,-6.85,9829.8,-0.001164,-0.019036,-0.024975,-0.001903,5.837109,-24.089264,-26.733182,-2.203817
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1276,Zach Whitecloud,20-21,VGK,D,49,98.10,-4.41,-33.45,-55.74,-3.25,5886.0,-0.001225,-0.009292,-0.015483,-0.000903,3.135511,42.931355,39.860350,4.566373
1277,Zach Whitecloud,21-22,VGK,D,58,98.67,-10.20,-70.30,-103.36,-8.37,5920.2,-0.002833,-0.019528,-0.028711,-0.002325,-6.367926,-17.419022,-38.219136,-3.826934
1279,Zdeno Chara,19-20,BOS,D,67,214.07,-5.95,-55.76,-74.07,-5.75,12844.2,-0.001653,-0.015489,-0.020575,-0.001597,1.347726,14.084769,21.583318,1.044979
1280,Zdeno Chara,20-21,WSH,D,55,145.72,-6.65,-55.45,-72.01,-6.34,8743.2,-0.001847,-0.015403,-0.020003,-0.001761,-0.782653,10.340557,19.695075,-0.721584


In [36]:
#Restrict data for defense table
defense_SH = defense_SH.loc[:,['Player', 'Season', 'Team', 'Position', 'GP', 'G Impact', 'F Impact', 'C Impact', 'xG Impact']]
defense_SH.head()

Unnamed: 0,Player,Season,Team,Position,GP,G Impact,F Impact,C Impact,xG Impact
0,Aaron Ekblad,19-20,FLA,D,65,-8.636752,4.737779,6.022887,-1.746816
1,Aaron Ekblad,20-21,FLA,D,33,2.115263,-7.828836,7.622934,0.004163
2,Aaron Ekblad,21-22,FLA,D,61,6.311886,7.731291,15.585049,2.262541
3,Adam Fox,20-21,NYR,D,54,4.326994,-2.31121,-4.217145,-0.645438
4,Adam Fox,21-22,NYR,D,78,5.837109,-24.089264,-26.733182,-2.203817


In [37]:
#Merge forwards and defense tables together
concat_frames = [forwards_SH, defense_SH]
skater_SH = pd.concat(concat_frames)
skater_SH

Unnamed: 0,Player,Season,Team,Position,GP,G Impact,F Impact,C Impact,xG Impact
5,Adam Henrique,19-20,ANA,C,70,2.025321,11.496054,15.510498,0.463671
6,Adam Henrique,20-21,ANA,C,44,-3.209840,-6.464298,-7.016718,-0.562624
10,Adam Lowry,19-20,WPG,L,48,-0.025562,-12.486435,-19.316160,1.092411
11,Adam Lowry,20-21,WPG,L,52,2.821355,6.395069,0.538890,1.053930
12,Adam Lowry,21-22,WPG,C/L,76,-5.175118,-37.959907,-23.785815,-3.018315
...,...,...,...,...,...,...,...,...,...
1276,Zach Whitecloud,20-21,VGK,D,49,3.135511,42.931355,39.860350,4.566373
1277,Zach Whitecloud,21-22,VGK,D,58,-6.367926,-17.419022,-38.219136,-3.826934
1279,Zdeno Chara,19-20,BOS,D,67,1.347726,14.084769,21.583318,1.044979
1280,Zdeno Chara,20-21,WSH,D,55,-0.782653,10.340557,19.695075,-0.721584
