<a href="https://colab.research.google.com/github/mikeytw11/Hockey-Model/blob/main/Pre_Season_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
#Import Skater Data
skater_EV_rates_raw = pd.read_csv('https://raw.githubusercontent.com/mikeytw11/Hockey-Model/main/Files/EV%20On-Ice%20Rates.csv')

In [3]:
#Restrict data columns to Player, Season, Team, Position, GP, TOI, G/60, F/60, C/60, xG/60
skater_EV_rates = skater_EV_rates_raw.loc[:,['Player','Season','Team','Position','GP','TOI','G±/60','F±/60','C±/60','xG±/60']]

In [4]:
#Convert TOI to seconds
skater_EV_rates['TOI_EV'] = skater_EV_rates['TOI'] * 60

In [5]:
#Separate Forwards and Defense in order to get averages for each position
forwards_EV = skater_EV_rates.loc[skater_EV_rates.Position!="D"]
defense_EV = skater_EV_rates.loc[skater_EV_rates.Position=="D"]
forwards_EV.head()

Unnamed: 0,Player,Season,Team,Position,GP,TOI,G±/60,F±/60,C±/60,xG±/60,TOI_EV
0,A.J. Greer,21-22,N.J,L,9,70.45,0.05,12.5,14.78,0.8,4227.0
8,Adam Brooks,19-20,TOR,C,7,51.6,2.27,-0.63,-3.12,0.41,3096.0
9,Adam Brooks,20-21,TOR,C,11,110.38,1.7,-0.32,2.03,0.27,6622.8
10,Adam Brooks,21-22,MTL/VGK/WPG,C,25,213.51,0.63,-6.53,-8.36,-0.4,12810.6
11,Adam Erne,19-20,DET,L,56,599.72,-2.13,-10.27,-13.9,-0.64,35983.2


In [6]:
#Calculate Variables/s
pd.options.mode.chained_assignment = None
forwards_EV['G/s'] = forwards_EV['G±/60'] / 3600
forwards_EV['F/s'] = forwards_EV['F±/60'] / 3600
forwards_EV['C/s'] = forwards_EV['C±/60'] / 3600
forwards_EV['xG/s'] = forwards_EV['xG±/60'] / 3600
forwards_EV.head()

Unnamed: 0,Player,Season,Team,Position,GP,TOI,G±/60,F±/60,C±/60,xG±/60,TOI_EV,G/s,F/s,C/s,xG/s
0,A.J. Greer,21-22,N.J,L,9,70.45,0.05,12.5,14.78,0.8,4227.0,1.4e-05,0.003472,0.004106,0.000222
8,Adam Brooks,19-20,TOR,C,7,51.6,2.27,-0.63,-3.12,0.41,3096.0,0.000631,-0.000175,-0.000867,0.000114
9,Adam Brooks,20-21,TOR,C,11,110.38,1.7,-0.32,2.03,0.27,6622.8,0.000472,-8.9e-05,0.000564,7.5e-05
10,Adam Brooks,21-22,MTL/VGK/WPG,C,25,213.51,0.63,-6.53,-8.36,-0.4,12810.6,0.000175,-0.001814,-0.002322,-0.000111
11,Adam Erne,19-20,DET,L,56,599.72,-2.13,-10.27,-13.9,-0.64,35983.2,-0.000592,-0.002853,-0.003861,-0.000178


In [7]:
#Calculate average/s for defense
defense_EV['G/s'] = defense_EV['G±/60'] / 3600
defense_EV['F/s'] = defense_EV['F±/60'] / 3600
defense_EV['C/s'] = defense_EV['C±/60'] / 3600
defense_EV['xG/s'] = defense_EV['xG±/60'] / 3600
defense_EV.head()

Unnamed: 0,Player,Season,Team,Position,GP,TOI,G±/60,F±/60,C±/60,xG±/60,TOI_EV,G/s,F/s,C/s,xG/s
1,Aaron Ekblad,19-20,FLA,D,67,1268.28,0.59,5.43,6.79,-0.01,76096.8,0.000164,0.001508,0.001886,-3e-06
2,Aaron Ekblad,20-21,FLA,D,35,630.15,0.64,5.64,7.91,0.47,37809.0,0.000178,0.001567,0.002197,0.000131
3,Aaron Ekblad,21-22,FLA,D,61,1092.75,1.6,16.84,21.62,1.08,65565.0,0.000444,0.004678,0.006006,0.0003
4,Aaron Ness,19-20,ARI,D,24,292.63,-0.06,-0.54,-1.01,0.08,17557.8,-1.7e-05,-0.00015,-0.000281,2.2e-05
5,Adam Boqvist,19-20,CHI,D,41,563.95,0.34,-5.33,-4.23,-0.2,33837.0,9.4e-05,-0.001481,-0.001175,-5.6e-05


In [8]:
#Calculate averages per second for forwards and defensemen
avg_forward_G = forwards_EV['G/s'].mean()
avg_forward_F = forwards_EV['F/s'].mean()
avg_forward_C = forwards_EV['C/s'].mean()
avg_forward_xG = forwards_EV['xG/s'].mean()

avg_defense_G = defense_EV['G/s'].mean()
avg_defense_F = defense_EV['F/s'].mean()
avg_defense_C = defense_EV['C/s'].mean()
avg_defense_xG = defense_EV['xG/s'].mean()


In [9]:
#Add impact columns to Forward table
forwards_EV['G Impact'] = (forwards_EV['G/s']-avg_forward_G) * forwards_EV['TOI_EV']
forwards_EV['F Impact'] = (forwards_EV['F/s']-avg_forward_F) * forwards_EV['TOI_EV']
forwards_EV['C Impact'] = (forwards_EV['C/s']-avg_forward_C) * forwards_EV['TOI_EV']
forwards_EV['xG Impact'] = (forwards_EV['xG/s']-avg_forward_xG) * forwards_EV['TOI_EV']
forwards_EV

Unnamed: 0,Player,Season,Team,Position,GP,TOI,G±/60,F±/60,C±/60,xG±/60,TOI_EV,G/s,F/s,C/s,xG/s,G Impact,F Impact,C Impact,xG Impact
0,A.J. Greer,21-22,N.J,L,9,70.45,0.05,12.50,14.78,0.80,4227.0,0.000014,0.003472,0.004106,0.000222,0.223663,15.804349,19.006918,1.023018
8,Adam Brooks,19-20,TOR,C,7,51.60,2.27,-0.63,-3.12,0.41,3096.0,0.000631,-0.000175,-0.000867,0.000114,2.073018,0.283848,-1.472680,0.413894
9,Adam Brooks,20-21,TOR,C,11,110.38,1.70,-0.32,2.03,0.27,6622.8,0.000472,-0.000089,0.000564,0.000075,3.385881,1.177490,6.324003,0.627826
10,Adam Brooks,21-22,MTL/VGK/WPG,C,25,213.51,0.63,-6.53,-8.36,-0.40,12810.6,0.000175,-0.001814,-0.002322,-0.000111,2.741775,-19.820645,-24.740183,-1.169779
11,Adam Erne,19-20,DET,L,56,599.72,-2.13,-10.27,-13.90,-0.64,35983.2,-0.000592,-0.002853,-0.003861,-0.000178,-19.885853,-93.055992,-124.865892,-5.684628
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2472,Zack MacEwen,20-21,VAN,C,34,288.65,-0.14,-4.98,-5.31,-0.21,17319.0,-0.000039,-0.001383,-0.001475,-0.000058,0.002339,-19.339280,-18.773887,-0.667398
2473,Zack MacEwen,21-22,PHI,C,75,707.83,-1.09,-12.07,-18.68,-1.06,42469.8,-0.000303,-0.003353,-0.005189,-0.000294,-11.201572,-131.065860,-203.765605,-11.664191
2474,Zack Smith,19-20,CHI,C,50,514.12,-0.36,-9.73,-9.24,-0.95,30847.2,-0.000100,-0.002703,-0.002567,-0.000264,-1.880940,-75.146726,-67.113387,-7.529529
2478,Zemgus Girgensons,19-20,BUF,C,69,785.45,-0.33,-1.16,-1.24,-0.03,47127.0,-0.000092,-0.000322,-0.000344,-0.000008,-2.480892,-2.617432,2.193776,0.540283


In [10]:
#Restrict data for Forwards table
forwards_EV = forwards_EV.loc[:,['Player', 'Season', 'Team', 'Position', 'GP', 'G Impact', 'F Impact', 'C Impact', 'xG Impact']]
forwards_EV.head()

Unnamed: 0,Player,Season,Team,Position,GP,G Impact,F Impact,C Impact,xG Impact
0,A.J. Greer,21-22,N.J,L,9,0.223663,15.804349,19.006918,1.023018
8,Adam Brooks,19-20,TOR,C,7,2.073018,0.283848,-1.47268,0.413894
9,Adam Brooks,20-21,TOR,C,11,3.385881,1.17749,6.324003,0.627826
10,Adam Brooks,21-22,MTL/VGK/WPG,C,25,2.741775,-19.820645,-24.740183,-1.169779
11,Adam Erne,19-20,DET,L,56,-19.885853,-93.055992,-124.865892,-5.684628


In [11]:
#Add impact columns to defense table
defense_EV['G Impact'] = (defense_EV['G/s']-avg_forward_G) * defense_EV['TOI_EV']
defense_EV['F Impact'] = (defense_EV['F/s']-avg_forward_F) * defense_EV['TOI_EV']
defense_EV['C Impact'] = (defense_EV['C/s']-avg_forward_C) * defense_EV['TOI_EV']
defense_EV['xG Impact'] = (defense_EV['xG/s']-avg_forward_xG) * defense_EV['TOI_EV']
defense_EV

Unnamed: 0,Player,Season,Team,Position,GP,TOI,G±/60,F±/60,C±/60,xG±/60,TOI_EV,G/s,F/s,C/s,xG/s,G Impact,F Impact,C Impact,xG Impact
1,Aaron Ekblad,19-20,FLA,D,67,1268.28,0.59,5.43,6.79,-0.01,76096.8,0.000164,0.001508,0.001886,-0.000003,15.441019,135.073006,173.280468,1.295164
2,Aaron Ekblad,20-21,FLA,D,35,630.15,0.64,5.64,7.91,0.47,37809.0,0.000178,0.001567,0.002197,0.000131,8.197057,69.317089,97.857895,5.684707
3,Aaron Ekblad,21-22,FLA,D,61,1092.75,1.60,16.84,21.62,1.08,65565.0,0.000444,0.004678,0.006006,0.000300,31.698606,324.183522,419.389820,20.967538
4,Aaron Ness,19-20,ARI,D,24,292.63,-0.06,-0.54,-1.01,0.08,17557.8,-0.000017,-0.000150,-0.000281,0.000022,0.392545,2.048684,1.939069,0.737778
5,Adam Boqvist,19-20,CHI,D,41,563.95,0.34,-5.33,-4.23,-0.20,33837.0,0.000094,-0.001481,-0.001175,-0.000056,4.516171,-41.073831,-26.528386,-1.209938
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2466,Zach Whitecloud,20-21,VGK,D,51,794.82,0.60,6.40,7.15,0.35,47689.2,0.000167,0.001778,0.001986,0.000097,9.809222,97.498663,113.362276,5.580588
2467,Zach Whitecloud,21-22,VGK,D,59,993.37,0.91,8.30,7.19,0.54,59602.2,0.000253,0.002306,0.001997,0.000150,17.392026,153.311032,142.342985,10.120318
2475,Zdeno Chara,19-20,BOS,D,68,1171.20,1.15,-2.20,-3.75,0.03,70272.0,0.000319,-0.000611,-0.001042,0.000008,25.190292,-24.203705,-45.724018,1.976826
2476,Zdeno Chara,20-21,WSH,D,55,817.42,0.15,-1.55,0.83,0.15,49045.2,0.000042,-0.000431,0.000231,0.000042,3.957488,-8.037199,30.484058,3.014534


In [12]:
#Restrict data for Forwards table
defense_EV = defense_EV.loc[:,['Player', 'Season', 'Team', 'Position', 'GP', 'G Impact', 'F Impact', 'C Impact', 'xG Impact']]
defense_EV.head()

Unnamed: 0,Player,Season,Team,Position,GP,G Impact,F Impact,C Impact,xG Impact
1,Aaron Ekblad,19-20,FLA,D,67,15.441019,135.073006,173.280468,1.295164
2,Aaron Ekblad,20-21,FLA,D,35,8.197057,69.317089,97.857895,5.684707
3,Aaron Ekblad,21-22,FLA,D,61,31.698606,324.183522,419.38982,20.967538
4,Aaron Ness,19-20,ARI,D,24,0.392545,2.048684,1.939069,0.737778
5,Adam Boqvist,19-20,CHI,D,41,4.516171,-41.073831,-26.528386,-1.209938


In [13]:
#Merge forwards and defense tables together
concat_frames = [forwards_EV, defense_EV]
skater_EV = pd.concat(concat_frames)
skater_EV

Unnamed: 0,Player,Season,Team,Position,GP,G Impact,F Impact,C Impact,xG Impact
0,A.J. Greer,21-22,N.J,L,9,0.223663,15.804349,19.006918,1.023018
8,Adam Brooks,19-20,TOR,C,7,2.073018,0.283848,-1.472680,0.413894
9,Adam Brooks,20-21,TOR,C,11,3.385881,1.177490,6.324003,0.627826
10,Adam Brooks,21-22,MTL/VGK/WPG,C,25,2.741775,-19.820645,-24.740183,-1.169779
11,Adam Erne,19-20,DET,L,56,-19.885853,-93.055992,-124.865892,-5.684628
...,...,...,...,...,...,...,...,...,...
2466,Zach Whitecloud,20-21,VGK,D,51,9.809222,97.498663,113.362276,5.580588
2467,Zach Whitecloud,21-22,VGK,D,59,17.392026,153.311032,142.342985,10.120318
2475,Zdeno Chara,19-20,BOS,D,68,25.190292,-24.203705,-45.724018,1.976826
2476,Zdeno Chara,20-21,WSH,D,55,3.957488,-8.037199,30.484058,3.014534


In [9]:
#Convert TOI_EV to seconds, Add column for G/s, F/s, C/s, xG/s
skater_EV_rates['G Impact'] = (forwards_EV['G/s']-avg_forward_G) * forwards_EV['TOI_EV']
skater_EV_rates['F Impact'] = (forwards_EV['F/s']-avg_forward_F) * forwards_EV['TOI_EV']
skater_EV_rates['C Impact'] = (forwards_EV['C/s']-avg_forward_C) * forwards_EV['TOI_EV']
skater_EV_rates['xG Impact'] = (forwards_EV['xG/s']-avg_forward_xG) * forwards_EV['TOI_EV']
skater_EV_rates

Unnamed: 0,Player,Season,Team,Position,GP,TOI,G±/60,F±/60,C±/60,xG±/60,TOI_EV,G Impact,F Impact,C Impact,xG Impact
0,A.J. Greer,21-22,N.J,L,9,70.45,0.05,12.50,14.78,0.80,4227.0,0.223663,15.804349,19.006918,1.023018
1,Aaron Ekblad,19-20,FLA,D,67,1268.28,0.59,5.43,6.79,-0.01,76096.8,,,,
2,Aaron Ekblad,20-21,FLA,D,35,630.15,0.64,5.64,7.91,0.47,37809.0,,,,
3,Aaron Ekblad,21-22,FLA,D,61,1092.75,1.60,16.84,21.62,1.08,65565.0,,,,
4,Aaron Ness,19-20,ARI,D,24,292.63,-0.06,-0.54,-1.01,0.08,17557.8,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2475,Zdeno Chara,19-20,BOS,D,68,1171.20,1.15,-2.20,-3.75,0.03,70272.0,,,,
2476,Zdeno Chara,20-21,WSH,D,55,817.42,0.15,-1.55,0.83,0.15,49045.2,,,,
2477,Zdeno Chara,21-22,NYI,D,72,1196.30,0.30,-9.06,-13.94,-0.41,71778.0,,,,
2478,Zemgus Girgensons,19-20,BUF,C,69,785.45,-0.33,-1.16,-1.24,-0.03,47127.0,-2.480892,-2.617432,2.193776,0.540283


In [None]:
#Reduce table data to Player, Season, Team, Position, GP, TOI_EV, G, F, C, xG Impact
skater_EV_rates = skater_EV_rates.loc[:,['Player','Season', 'Team', 'Position', 'GP', 'TOI_EV','G Impact', 'F Impact', 'C Impact', 'xG Impact']]
skater_EV_rates.head()