# Value of on-ice events

<p>data frames used in this notebook:</p>
<p>&nbsp; &nbsp; 1. all on-ice prior to a goal events.</p>
<p>&nbsp; &nbsp; 2. all even strength on-ice events.</p> 
 

In [51]:
import sys
import os
import pandas as pd
import numpy as np
import datetime, time
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.formula.api import ols
from pylab import hist, show
import scipy
import zipfile


pd.set_option('display.max_rows', 50)
pd.set_option('display.max_columns', 200)

In [52]:
pwd

'/Users/stefanostselios/Desktop/nhl_roster_design-master'

In [53]:
da = pd.read_csv('/Users/stefanostselios/Brock University/Kevin Mongeon - StephanosShare/out/pbp_merged.csv')
#da = pd.read_csv('/Users/kevinmongeon/Brock University/Steve Tselios - StephanosShare/out/pbp_merged.csv')
da = da.drop('Unnamed: 0', axis=1)
da = da.rename(columns={'TeamCode': 'EventTeamCode'})

- keep regular season games and relevant on-ice events in **regulation time**. Drop duplicates by season, game number, event number and event team to have one obsrevation per event per game.

In [54]:
da = da[da['GameNumber'] <= 21230]
da = da[da['Period'] <= 3]
da = da[da['Period'] >= 1]
da = da[da['EventType']!='STOP']
da = da[da['EventType']!='EISTR']
da = da[da['EventType']!='EIEND']
da = da[da['EventType'] !='FIGHT']
da = da.dropna(subset=['EventNumber'])

In [55]:
da.head()

Unnamed: 0,Season,GameNumber,EventNumber,Period,AdvantageType,EventTimeFromZero,EventTimeFromTwenty,EventType,EventDetail,VPlayer1,VPosition1,VPlayer2,VPosition2,VPlayer3,VPosition3,VPlayer4,VPosition4,VPlayer5,VPosition5,VPlayer6,VPosition6,HPlayer1,HPosition1,HPlayer2,HPosition2,HPlayer3,HPosition3,HPlayer4,HPosition4,HPlayer5,HPosition5,HPlayer6,HPosition6,GameDate,VTeamCode,HTeamCode,EventTeamCode,PlayerNumber,PlayerName,ShotType,ShotResult,Zone,Length,PenaltyType
0,2010,20001,1,1,,0,1200,FAC,MTL won Neu. Zone - MTL #11 GOMEZ vs TOR #37 B...,11,C,21.0,R,57.0,L,26.0,D,75.0,D,31.0,G,37,C,9.0,R,11.0,L,3.0,D,22.0,D,35.0,G,2010-10-07,MTL,TOR,MTL,11.0,GOMEZ,,,N,,
1,2010,20001,3,1,EV,15,1185,HIT,"TOR #37 BRENT HIT MTL #26 GORGES, Off. Zone",11,C,21.0,R,57.0,L,26.0,D,75.0,D,31.0,G,37,C,9.0,R,11.0,L,3.0,D,22.0,D,35.0,G,2010-10-07,MTL,TOR,TOR,37.0,BRENT,,,O,,
2,2010,20001,4,1,EV,46,1154,HIT,"MTL #14 PLEKANEC HIT TOR #2 SCHENN, Off. Zone",14,C,81.0,C,46.0,L,6.0,D,76.0,D,31.0,G,42,C,81.0,C,32.0,R,2.0,D,15.0,D,35.0,G,2010-10-07,MTL,TOR,MTL,14.0,PLEKANEC,,,O,,
3,2010,20001,5,1,EV,57,1143,HIT,"MTL #76 SUBBAN HIT TOR #15 KABERLE, Neu. Zone",14,C,81.0,C,46.0,L,6.0,D,76.0,D,31.0,G,42,C,81.0,C,32.0,R,2.0,D,15.0,D,35.0,G,2010-10-07,MTL,TOR,MTL,76.0,SUBBAN,,,N,,
4,2010,20001,6,1,EV,69,1131,GIVE,"TOR&nbsp;GIVEAWAY - #35 GIGUERE, Def. Zone",14,C,81.0,C,46.0,L,6.0,D,76.0,D,31.0,G,42,C,81.0,C,32.0,R,2.0,D,15.0,D,35.0,G,2010-10-07,MTL,TOR,TOR,35.0,GIGUERE,,,D,,


In [56]:
da.shape

(310113, 44)

- create a goal dataframe that will display the number of goal per game.

In [57]:
df = da[['Season', 'GameNumber', 'EventNumber', 'AdvantageType', 'Period', 'Zone', 'EventType', 'EventTimeFromZero', 'VTeamCode', 'HTeamCode', 'EventTeamCode']]
dg = df[df['EventType'] == 'GOAL']
dg['Goal'] = dg.apply(lambda x: 1 if (x['EventType'] == 'GOAL') else 0, axis=1)
dg['GoalNumber'] = dg.groupby(['Season', 'GameNumber']).cumcount()+1
dg.head()
dg = dg[['Season', 'GameNumber', 'EventNumber', 'AdvantageType', 'Period', 'Zone', 'EventType', 'EventTimeFromZero', 'EventTeamCode', 'VTeamCode', 'HTeamCode', 'GoalNumber']]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


- merge dg onto df to display the goal number per game. Group by season, game number and period to backwardfill advantage type and goal number.

In [58]:
df = pd.merge(df, dg, on=['Season', 'GameNumber', 'EventNumber', 'AdvantageType', 'Period', 'Zone', 'EventType', 'EventTimeFromZero', 'EventTeamCode', 'VTeamCode', 'HTeamCode'], how='left')
df['AdvantageType'] = df.groupby(['Season', 'GameNumber'])['AdvantageType'].apply(lambda x: x.bfill())
df['GoalNumber'] = df.groupby(['Season', 'GameNumber', 'Period'])['GoalNumber'].apply(lambda x: x.bfill())
df.head()

Unnamed: 0,Season,GameNumber,EventNumber,AdvantageType,Period,Zone,EventType,EventTimeFromZero,VTeamCode,HTeamCode,EventTeamCode,GoalNumber
0,2010,20001,1,EV,1,N,FAC,0,MTL,TOR,MTL,1.0
1,2010,20001,3,EV,1,O,HIT,15,MTL,TOR,TOR,1.0
2,2010,20001,4,EV,1,O,HIT,46,MTL,TOR,MTL,1.0
3,2010,20001,5,EV,1,N,HIT,57,MTL,TOR,MTL,1.0
4,2010,20001,6,EV,1,D,GIVE,69,MTL,TOR,TOR,1.0


##  all on-ice events prior to a goal

- display the home goal number and visitor goal number by game number and season. Keep all on-ice events that happened prior to a goal when the score differential was between -1 and 1. Exclude all other events.

In [59]:
dz = dg[dg['EventTeamCode'] == dg['HTeamCode']]
dz['HGoalNumber'] = dz.groupby(['Season', 'GameNumber']).cumcount()+1
dy = dg[dg['EventTeamCode'] == dg['VTeamCode']]
dy['VGoalNumber'] = dy.groupby(['Season', 'GameNumber']).cumcount()+1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


- merge visitor goal number dataframe (dy) and home goal number dataframe (dz) onto goal dataframe (dg). 

In [60]:
dg = pd.merge(dg, dy, on=['Season', 'GameNumber', 'EventNumber', 'AdvantageType', 'Period', 'Zone', 'EventType', 'EventTimeFromZero', 'EventTeamCode', 'VTeamCode', 'HTeamCode', 'GoalNumber'], how='left')
dg = pd.merge(dg, dz, on=['Season', 'GameNumber', 'EventNumber', 'AdvantageType', 'Period', 'Zone', 'EventType', 'EventTimeFromZero', 'EventTeamCode', 'VTeamCode', 'HTeamCode', 'GoalNumber'], how='left')
dg.head()

Unnamed: 0,Season,GameNumber,EventNumber,AdvantageType,Period,Zone,EventType,EventTimeFromZero,EventTeamCode,VTeamCode,HTeamCode,GoalNumber,VGoalNumber,HGoalNumber
0,2010,20001,35,EV,1,O,GOAL,402,TOR,MTL,TOR,1,,1.0
1,2010,20001,49,EV,1,O,GOAL,537,TOR,MTL,TOR,2,,2.0
2,2010,20001,68,EV,1,O,GOAL,739,MTL,MTL,TOR,3,1.0,
3,2010,20001,223,EV,3,O,GOAL,96,TOR,MTL,TOR,4,,3.0
4,2010,20001,232,EV,3,O,GOAL,148,MTL,MTL,TOR,5,2.0,


- forward fill home goal number and visitor game number by season and game number. Fill in 'NaN' values with zero for home and visitor game number.

In [61]:
dg['HGoalNumber'] = dg.groupby(['Season', 'GameNumber'])['HGoalNumber'].apply(lambda x: x.ffill())
dg['VGoalNumber'] = dg.groupby(['Season', 'GameNumber'])['VGoalNumber'].apply(lambda x: x.ffill())
dg['VGoalNumber'] = dg['VGoalNumber'].fillna(0)
dg['HGoalNumber'] = dg['HGoalNumber'].fillna(0)
dg.head()

Unnamed: 0,Season,GameNumber,EventNumber,AdvantageType,Period,Zone,EventType,EventTimeFromZero,EventTeamCode,VTeamCode,HTeamCode,GoalNumber,VGoalNumber,HGoalNumber
0,2010,20001,35,EV,1,O,GOAL,402,TOR,MTL,TOR,1,0.0,1.0
1,2010,20001,49,EV,1,O,GOAL,537,TOR,MTL,TOR,2,0.0,2.0
2,2010,20001,68,EV,1,O,GOAL,739,MTL,MTL,TOR,3,1.0,2.0
3,2010,20001,223,EV,3,O,GOAL,96,TOR,MTL,TOR,4,1.0,3.0
4,2010,20001,232,EV,3,O,GOAL,148,MTL,MTL,TOR,5,2.0,3.0


- merge goal dataframe on dk and backward fill by home goal number and visitor goal number.

In [62]:
dk = da[['Season', 'GameNumber', 'EventNumber', 'AdvantageType', 'Period', 'Zone', 'EventType', 'EventTimeFromZero', 'VTeamCode', 'HTeamCode', 'EventTeamCode']]
dk = pd.merge(dk, dg, on=['Season', 'GameNumber', 'EventNumber', 'AdvantageType', 'Period', 'Zone', 'EventType', 'EventTimeFromZero', 'EventTeamCode', 'VTeamCode', 'HTeamCode'], how='left')
dk['AdvantageType'] = dk.groupby(['Season', 'GameNumber'])['AdvantageType'].apply(lambda x: x.bfill())
dk['GoalNumber'] = dk.groupby(['Season', 'GameNumber', 'Period'])['GoalNumber'].apply(lambda x: x.bfill())
dk['HGoalNumber'] = dk.groupby(['Season', 'GameNumber', 'Period'])['HGoalNumber'].apply(lambda x: x.bfill())
dk['VGoalNumber'] = dk.groupby(['Season', 'GameNumber', 'Period'])['VGoalNumber'].apply(lambda x: x.bfill())
dk = dk[dk['AdvantageType'] == 'EV']
dk.head()

Unnamed: 0,Season,GameNumber,EventNumber,AdvantageType,Period,Zone,EventType,EventTimeFromZero,VTeamCode,HTeamCode,EventTeamCode,GoalNumber,VGoalNumber,HGoalNumber
0,2010,20001,1,EV,1,N,FAC,0,MTL,TOR,MTL,1.0,0.0,1.0
1,2010,20001,3,EV,1,O,HIT,15,MTL,TOR,TOR,1.0,0.0,1.0
2,2010,20001,4,EV,1,O,HIT,46,MTL,TOR,MTL,1.0,0.0,1.0
3,2010,20001,5,EV,1,N,HIT,57,MTL,TOR,MTL,1.0,0.0,1.0
4,2010,20001,6,EV,1,D,GIVE,69,MTL,TOR,TOR,1.0,0.0,1.0


- **display home zone and visitor zone based on event team code.** If event team code is the same with home team code, home zone will be assigned the same value. If not, it will be assigned the opposite. Neutral zone is the same for both teams.

In [63]:
#dk['HZone'] = dk.apply(lambda x: x['Zone'] if (x['EventTeamCode'] == x['HTeamCode']) else np.nan, axis=1)

In [64]:
#dk.head()

- display the goal differential per game for each team.

In [65]:
dk['GD'] = dk.apply(lambda x: x['HGoalNumber'] - x['VGoalNumber'] if (x['EventTeamCode'] == x['HTeamCode']) else x['VGoalNumber'] - x['HGoalNumber'], axis=1)
dk.head()

Unnamed: 0,Season,GameNumber,EventNumber,AdvantageType,Period,Zone,EventType,EventTimeFromZero,VTeamCode,HTeamCode,EventTeamCode,GoalNumber,VGoalNumber,HGoalNumber,GD
0,2010,20001,1,EV,1,N,FAC,0,MTL,TOR,MTL,1.0,0.0,1.0,-1.0
1,2010,20001,3,EV,1,O,HIT,15,MTL,TOR,TOR,1.0,0.0,1.0,1.0
2,2010,20001,4,EV,1,O,HIT,46,MTL,TOR,MTL,1.0,0.0,1.0,-1.0
3,2010,20001,5,EV,1,N,HIT,57,MTL,TOR,MTL,1.0,0.0,1.0,-1.0
4,2010,20001,6,EV,1,D,GIVE,69,MTL,TOR,TOR,1.0,0.0,1.0,1.0


In [66]:
dk.shape

(256153, 15)

- On-ice events that occured in a different period from a goal or after a goal are excluded from the dataframe.

In [67]:
dk = dk.dropna(subset=['GoalNumber'])
dk = dk.sort_values(['Season', 'GameNumber', 'EventNumber'], ascending=[True, True, True])
dk = dk.drop_duplicates(['Season', 'GameNumber', 'EventNumber', 'EventTeamCode'])
dk.head()

Unnamed: 0,Season,GameNumber,EventNumber,AdvantageType,Period,Zone,EventType,EventTimeFromZero,VTeamCode,HTeamCode,EventTeamCode,GoalNumber,VGoalNumber,HGoalNumber,GD
0,2010,20001,1,EV,1,N,FAC,0,MTL,TOR,MTL,1.0,0.0,1.0,-1.0
1,2010,20001,3,EV,1,O,HIT,15,MTL,TOR,TOR,1.0,0.0,1.0,1.0
2,2010,20001,4,EV,1,O,HIT,46,MTL,TOR,MTL,1.0,0.0,1.0,-1.0
3,2010,20001,5,EV,1,N,HIT,57,MTL,TOR,MTL,1.0,0.0,1.0,-1.0
4,2010,20001,6,EV,1,D,GIVE,69,MTL,TOR,TOR,1.0,0.0,1.0,1.0


In [68]:
dk.shape

(147273, 15)

- Assign a value of 1 if an on-ice event is a goal, 0 if not. Follow the same procedure for block, faceoff, giveaway, hits, miss, penalty, shot and takeaway. Group by season, game number zone and event type to find the sum of each on-ice event per game. 

In [69]:
dk['Goal'] = dk.apply(lambda x: 1 if (x['EventType'] == 'GOAL') else np.nan, axis=1)
dk['Block'] = dk.apply(lambda x: 1 if (x['EventType'] == 'BLOCK') else np.nan, axis=1)
dk['Faceoff'] = dk.apply(lambda x: 1 if (x['EventType'] == 'FAC') else np.nan, axis=1)
dk['Giveaway'] = dk.apply(lambda x: 1 if (x['EventType'] == 'GIVE') else np.nan, axis=1)
dk['Hit'] = dk.apply(lambda x: 1 if (x['EventType'] == 'HIT') else np.nan, axis=1)
dk['Miss'] = dk.apply(lambda x: 1 if (x['EventType'] == 'MISS') else np.nan, axis=1)
dk['Penalty'] = dk.apply(lambda x: 1 if (x['EventType'] == 'PENL') else np.nan, axis=1)
dk['Shot'] = dk.apply(lambda x: 1 if (x['EventType'] == 'SHOT') else np.nan, axis=1)
dk['Takeaway'] = dk.apply(lambda x: 1 if (x['EventType'] == 'TAKE') else np.nan, axis=1)

In [70]:
dk['Blocks'] = dk.groupby(['Season','GameNumber', 'Zone', 'EventTeamCode', 'EventType', 'GoalNumber'])['Block'].transform('sum')
dk['Faceoffs'] = dk.groupby(['Season','GameNumber', 'Zone', 'EventTeamCode', 'EventType', 'GoalNumber'])['Faceoff'].transform('sum')
dk['Giveaways'] = dk.groupby(['Season','GameNumber', 'Zone', 'EventTeamCode', 'EventType', 'GoalNumber'])['Giveaway'].transform('sum')
dk['Goals'] = dk.groupby(['Season','GameNumber', 'Zone', 'EventTeamCode', 'EventType', 'GoalNumber'])['Goal'].transform('sum')
dk['Hits'] = dk.groupby(['Season','GameNumber', 'Zone', 'EventTeamCode', 'EventType', 'GoalNumber'])['Hit'].transform('sum')
dk['Misses'] = dk.groupby(['Season','GameNumber', 'Zone', 'EventTeamCode', 'EventType', 'GoalNumber'])['Miss'].transform('sum')
dk['Penalties'] = dk.groupby(['Season','GameNumber', 'Zone', 'EventTeamCode', 'EventType', 'GoalNumber'])['Penalty'].transform('sum')
dk['Shots'] = dk.groupby(['Season','GameNumber', 'Zone', 'EventTeamCode', 'EventType', 'GoalNumber'])['Shot'].transform('sum')
dk['Takeaways'] = dk.groupby(['Season','GameNumber', 'Zone', 'EventTeamCode', 'EventType', 'GoalNumber'])['Takeaway'].transform('sum')

In [71]:
dk.head()

Unnamed: 0,Season,GameNumber,EventNumber,AdvantageType,Period,Zone,EventType,EventTimeFromZero,VTeamCode,HTeamCode,EventTeamCode,GoalNumber,VGoalNumber,HGoalNumber,GD,Goal,Block,Faceoff,Giveaway,Hit,Miss,Penalty,Shot,Takeaway,Blocks,Faceoffs,Giveaways,Goals,Hits,Misses,Penalties,Shots,Takeaways
0,2010,20001,1,EV,1,N,FAC,0,MTL,TOR,MTL,1.0,0.0,1.0,-1.0,,,1.0,,,,,,,,1.0,,,,,,,
1,2010,20001,3,EV,1,O,HIT,15,MTL,TOR,TOR,1.0,0.0,1.0,1.0,,,,,1.0,,,,,,,,,3.0,,,,
2,2010,20001,4,EV,1,O,HIT,46,MTL,TOR,MTL,1.0,0.0,1.0,-1.0,,,,,1.0,,,,,,,,,5.0,,,,
3,2010,20001,5,EV,1,N,HIT,57,MTL,TOR,MTL,1.0,0.0,1.0,-1.0,,,,,1.0,,,,,,,,,1.0,,,,
4,2010,20001,6,EV,1,D,GIVE,69,MTL,TOR,TOR,1.0,0.0,1.0,1.0,,,,1.0,,,,,,,,2.0,,,,,,


In [72]:
dk.shape

(147273, 33)

- reshape data wide to long.

In [73]:
dk = dk.rename(columns={'EventTeamCode': 'EventTeam'})
a = [col for col in dk.columns if 'TeamCode' in col]
dk = pd.lreshape(dk, {'TeamCode' : a})
dk = dk.sort_values(['Season', 'GameNumber', 'EventNumber'], ascending=[True, True, True])
dk = dk.rename(columns={'EventTeam': 'EventTeamCode'})
dk.head()

Unnamed: 0,AdvantageType,Block,Blocks,EventNumber,EventTeamCode,EventTimeFromZero,EventType,Faceoff,Faceoffs,GD,GameNumber,Giveaway,Giveaways,Goal,GoalNumber,Goals,HGoalNumber,Hit,Hits,Miss,Misses,Penalties,Penalty,Period,Season,Shot,Shots,Takeaway,Takeaways,VGoalNumber,Zone,TeamCode
0,EV,,,1,MTL,0,FAC,1.0,1.0,-1.0,20001,,,,1.0,,1.0,,,,,,,1,2010,,,,,0.0,N,MTL
147273,EV,,,1,MTL,0,FAC,1.0,1.0,-1.0,20001,,,,1.0,,1.0,,,,,,,1,2010,,,,,0.0,N,TOR
1,EV,,,3,TOR,15,HIT,,,1.0,20001,,,,1.0,,1.0,1.0,3.0,,,,,1,2010,,,,,0.0,O,MTL
147274,EV,,,3,TOR,15,HIT,,,1.0,20001,,,,1.0,,1.0,1.0,3.0,,,,,1,2010,,,,,0.0,O,TOR
2,EV,,,4,MTL,46,HIT,,,-1.0,20001,,,,1.0,,1.0,1.0,5.0,,,,,1,2010,,,,,0.0,O,MTL


In [74]:
dk.shape

(294546, 32)

- drop duplicates by season, game number, team code and event type.

In [75]:
dk = dk.drop_duplicates(['Season', 'GameNumber', 'TeamCode', 'EventTeamCode', 'EventType', 'GoalNumber'])
dk = dk [['Season', 'GameNumber', 'AdvantageType', 'Zone', 'Period', 'TeamCode', 'EventNumber', 'EventType', 'EventTeamCode', 'GoalNumber', 'GD',  'Blocks', 'Faceoffs', 'Giveaways', 'Goals', 'Hits', 'Misses', 'Penalties', 'Shots', 'Takeaways']]
dk = dk.sort_values(['Season', 'GameNumber', 'EventNumber'], ascending=[True, True, True])
dk.shape

(125428, 20)

- assign all on-ice events to their respectful teams by zone. If team code is the same as event team code, then the on-ice event is assigned to that team. If not it is assigned to the opposing team. Each on-ice event generates two variables per team: For (F) and Against (A).

In [76]:
dk['Blocks_F'] = dk.apply(lambda x: x['Blocks'] if (x['TeamCode'] == x['EventTeamCode']) else np.nan, axis=1)
dk['Blocks_A'] = dk.apply(lambda x: x['Blocks'] if (x['TeamCode'] != x['EventTeamCode']) else np.nan, axis=1)
dk['Faceoffs_F'] = dk.apply(lambda x: x['Faceoffs'] if (x['TeamCode'] == x['EventTeamCode']) else np.nan, axis=1)
dk['Faceoffs_A'] = dk.apply(lambda x: x['Faceoffs'] if (x['TeamCode'] != x['EventTeamCode']) else np.nan, axis=1)
dk['Giveaways_F'] = dk.apply(lambda x: x['Giveaways'] if (x['TeamCode'] == x['EventTeamCode']) else np.nan, axis=1)
dk['Giveaways_A'] = dk.apply(lambda x: x['Giveaways'] if (x['TeamCode'] != x['EventTeamCode']) else np.nan, axis=1)
dk['Goals_F'] = dk.apply(lambda x: x['Goals'] if (x['TeamCode'] == x['EventTeamCode']) else np.nan, axis=1)
dk['Goals_A'] = dk.apply(lambda x: x['Goals'] if (x['TeamCode'] != x['EventTeamCode']) else np.nan, axis=1)
dk['Hits_F'] = dk.apply(lambda x: x['Hits'] if (x['TeamCode'] == x['EventTeamCode']) else np.nan, axis=1)
dk['Hits_A'] = dk.apply(lambda x: x['Hits'] if (x['TeamCode'] != x['EventTeamCode']) else np.nan, axis=1)
dk['Miss_F'] = dk.apply(lambda x: x['Misses'] if (x['TeamCode'] == x['EventTeamCode']) else np.nan, axis=1)
dk['Miss_A'] = dk.apply(lambda x: x['Misses'] if (x['TeamCode'] != x['EventTeamCode']) else np.nan, axis=1)
dk['Penalties_F'] = dk.apply(lambda x: x['Penalties'] if (x['TeamCode'] == x['EventTeamCode']) else np.nan, axis=1)
dk['Penalties_A'] = dk.apply(lambda x: x['Penalties'] if (x['TeamCode'] != x['EventTeamCode']) else np.nan, axis=1)
dk['Shots_F'] = dk.apply(lambda x: x['Shots'] if (x['TeamCode'] == x['EventTeamCode']) else np.nan, axis=1)
dk['Shots_A'] = dk.apply(lambda x: x['Shots'] if (x['TeamCode'] != x['EventTeamCode']) else np.nan, axis=1)
dk['Takeaways_F'] = dk.apply(lambda x: x['Takeaways'] if (x['TeamCode'] == x['EventTeamCode']) else np.nan, axis=1)
dk['Takeaways_A'] = dk.apply(lambda x: x['Takeaways'] if (x['TeamCode'] != x['EventTeamCode']) else np.nan, axis=1)
dk = dk.sort_values(['Season', 'GameNumber', 'EventNumber'], ascending=[True, True, True])
dk.head()

Unnamed: 0,Season,GameNumber,AdvantageType,Zone,Period,TeamCode,EventNumber,EventType,EventTeamCode,GoalNumber,GD,Blocks,Faceoffs,Giveaways,Goals,Hits,Misses,Penalties,Shots,Takeaways,Blocks_F,Blocks_A,Faceoffs_F,Faceoffs_A,Giveaways_F,Giveaways_A,Goals_F,Goals_A,Hits_F,Hits_A,Miss_F,Miss_A,Penalties_F,Penalties_A,Shots_F,Shots_A,Takeaways_F,Takeaways_A
0,2010,20001,EV,N,1,MTL,1,FAC,MTL,1.0,-1.0,,1.0,,,,,,,,,,1.0,,,,,,,,,,,,,,,
147273,2010,20001,EV,N,1,TOR,1,FAC,MTL,1.0,-1.0,,1.0,,,,,,,,,,,1.0,,,,,,,,,,,,,,
1,2010,20001,EV,O,1,MTL,3,HIT,TOR,1.0,1.0,,,,,3.0,,,,,,,,,,,,,,3.0,,,,,,,,
147274,2010,20001,EV,O,1,TOR,3,HIT,TOR,1.0,1.0,,,,,3.0,,,,,,,,,,,,,3.0,,,,,,,,,
2,2010,20001,EV,O,1,MTL,4,HIT,MTL,1.0,-1.0,,,,,5.0,,,,,,,,,,,,,5.0,,,,,,,,,


- backward and forward fill of on-ice events by season, game number and team code.

In [77]:
dk['Blocks_F'] = dk.groupby(['Season','GameNumber', 'TeamCode', 'Zone', 'GoalNumber'])['Blocks_F'].apply(lambda x: x.ffill().bfill())
dk['Faceoffs_F'] = dk.groupby(['Season','GameNumber', 'TeamCode', 'Zone', 'GoalNumber'])['Faceoffs_F'].apply(lambda x: x.ffill().bfill())
dk['Giveaways_F'] = dk.groupby(['Season','GameNumber', 'TeamCode', 'Zone', 'GoalNumber'])['Giveaways_F'].apply(lambda x: x.ffill().bfill())
dk['Goals_F'] = dk.groupby(['Season','GameNumber', 'TeamCode', 'Zone', 'GoalNumber'])['Goals_F'].apply(lambda x: x.ffill().bfill())
dk['Hits_F'] = dk.groupby(['Season','GameNumber', 'TeamCode', 'Zone', 'GoalNumber'])['Hits_F'].apply(lambda x: x.ffill().bfill())
dk['Miss_F'] = dk.groupby(['Season','GameNumber', 'TeamCode', 'Zone', 'GoalNumber'])['Miss_F'].apply(lambda x: x.ffill().bfill())
dk['Penalties_F'] = dk.groupby(['Season','GameNumber', 'TeamCode', 'Zone', 'GoalNumber'])['Penalties_F'].apply(lambda x: x.ffill().bfill())
dk['Shots_F'] = dk.groupby(['Season','GameNumber', 'TeamCode', 'Zone', 'GoalNumber'])['Shots_F'].apply(lambda x: x.ffill().bfill())
dk['Takeaways_F'] = dk.groupby(['Season','GameNumber', 'TeamCode', 'Zone', 'GoalNumber'])['Takeaways_F'].apply(lambda x: x.ffill().bfill())
dk['Blocks_A'] = dk.groupby(['Season','GameNumber', 'TeamCode', 'Zone', 'GoalNumber'])['Blocks_A'].apply(lambda x: x.ffill().bfill())
dk['Faceoffs_A'] = dk.groupby(['Season','GameNumber', 'TeamCode', 'Zone', 'GoalNumber'])['Faceoffs_A'].apply(lambda x: x.ffill().bfill())
dk['Giveaways_A'] = dk.groupby(['Season','GameNumber', 'TeamCode', 'Zone', 'GoalNumber'])['Giveaways_A'].apply(lambda x: x.ffill().bfill())
dk['Goals_A'] = dk.groupby(['Season','GameNumber', 'TeamCode', 'Zone', 'GoalNumber'])['Goals_A'].apply(lambda x: x.ffill().bfill())
dk['Hits_A'] = dk.groupby(['Season','GameNumber', 'TeamCode', 'Zone', 'GoalNumber'])['Hits_A'].apply(lambda x: x.ffill().bfill())
dk['Miss_A'] = dk.groupby(['Season','GameNumber', 'TeamCode', 'Zone', 'GoalNumber'])['Miss_A'].apply(lambda x: x.ffill().bfill())
dk['Penalties_A'] = dk.groupby(['Season','GameNumber', 'TeamCode', 'Zone', 'GoalNumber'])['Penalties_A'].apply(lambda x: x.ffill().bfill())
dk['Shots_A'] = dk.groupby(['Season','GameNumber', 'TeamCode', 'Zone', 'GoalNumber'])['Shots_A'].apply(lambda x: x.ffill().bfill())
dk['Takeaways_A'] = dk.groupby(['Season','GameNumber', 'TeamCode', 'Zone', 'GoalNumber'])['Takeaways_A'].apply(lambda x: x.ffill().bfill())
dk = dk.sort_values(['Season', 'GameNumber', 'EventNumber'], ascending=[True, True, True])
dk = dk.fillna(0)
dk.head()

Unnamed: 0,Season,GameNumber,AdvantageType,Zone,Period,TeamCode,EventNumber,EventType,EventTeamCode,GoalNumber,GD,Blocks,Faceoffs,Giveaways,Goals,Hits,Misses,Penalties,Shots,Takeaways,Blocks_F,Blocks_A,Faceoffs_F,Faceoffs_A,Giveaways_F,Giveaways_A,Goals_F,Goals_A,Hits_F,Hits_A,Miss_F,Miss_A,Penalties_F,Penalties_A,Shots_F,Shots_A,Takeaways_F,Takeaways_A
0,2010,20001,EV,N,1,MTL,1,FAC,MTL,1.0,-1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
147273,2010,20001,EV,N,1,TOR,1,FAC,MTL,1.0,-1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2010,20001,EV,O,1,MTL,3,HIT,TOR,1.0,1.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,5.0,3.0,1.0,0.0,0.0,0.0,2.0,3.0,0.0,0.0
147274,2010,20001,EV,O,1,TOR,3,HIT,TOR,1.0,1.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,3.0,5.0,0.0,1.0,0.0,0.0,3.0,2.0,0.0,0.0
2,2010,20001,EV,O,1,MTL,4,HIT,MTL,1.0,-1.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,5.0,3.0,1.0,0.0,0.0,0.0,2.0,3.0,0.0,0.0


- keep only relative columns and drop duplicates by season, gamenumber and teamcode, to have two observations per game.

In [78]:
dk = dk[['Season', 'GameNumber', 'Zone', 'EventTeamCode', 'GoalNumber', 'GD', 'Blocks_F', 'Blocks_A', 'Faceoffs_F', 'Faceoffs_A', 'Giveaways_F', 'Giveaways_A', 'Goals_F', 'Goals_A', 'Hits_F', 'Hits_A', 'Miss_F', 'Miss_A', 'Penalties_F', 'Penalties_A', 'Shots_F', 'Shots_A', 'Takeaways_F', 'Takeaways_A']]
dk = dk.sort_values(['Season', 'GameNumber'], ascending=[True, True])
dk = dk.drop_duplicates(['Season', 'GameNumber', 'EventTeamCode', 'GoalNumber', 'GD', 'Zone'])
dk = dk.rename(columns={'EventTeamCode': 'TeamCode'})
dk.head()

Unnamed: 0,Season,GameNumber,Zone,TeamCode,GoalNumber,GD,Blocks_F,Blocks_A,Faceoffs_F,Faceoffs_A,Giveaways_F,Giveaways_A,Goals_F,Goals_A,Hits_F,Hits_A,Miss_F,Miss_A,Penalties_F,Penalties_A,Shots_F,Shots_A,Takeaways_F,Takeaways_A
0,2010,20001,N,MTL,1.0,-1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2010,20001,O,TOR,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,5.0,3.0,1.0,0.0,0.0,0.0,2.0,3.0,0.0,0.0
2,2010,20001,O,MTL,1.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,5.0,3.0,1.0,0.0,0.0,0.0,2.0,3.0,0.0,0.0
4,2010,20001,D,TOR,1.0,1.0,3.0,4.0,0.0,1.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
11,2010,20001,D,MTL,1.0,-1.0,3.0,4.0,0.0,1.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [79]:
dk.shape

(30219, 24)

In [80]:
dk.isnull().sum()

Season         0
GameNumber     0
Zone           0
TeamCode       0
GoalNumber     0
GD             0
Blocks_F       0
Blocks_A       0
Faceoffs_F     0
Faceoffs_A     0
Giveaways_F    0
Giveaways_A    0
Goals_F        0
Goals_A        0
Hits_F         0
Hits_A         0
Miss_F         0
Miss_A         0
Penalties_F    0
Penalties_A    0
Shots_F        0
Shots_A        0
Takeaways_F    0
Takeaways_A    0
dtype: int64

- group by season, team code and goal differential to compute the mean of each on-ice events while score differential was the same throughout the season.

In [81]:
dk['MBlocks_F'] = dk.groupby(['Season', 'TeamCode', 'Zone', 'GD'])['Blocks_F'].transform('mean')
dk['MFaceoffs_F'] = dk.groupby(['Season', 'TeamCode', 'Zone', 'GD'])['Faceoffs_F'].transform('mean')
dk['MGiveaways_F'] = dk.groupby(['Season', 'TeamCode', 'GD'])['Giveaways_F'].transform('mean')
dk['MGoals_F'] = dk.groupby(['Season', 'TeamCode', 'Zone', 'GD'])['Goals_F'].transform('mean')
dk['MHits_F'] = dk.groupby(['Season', 'TeamCode', 'Zone', 'GD'])['Hits_F'].transform('mean')
dk['MMiss_F'] = dk.groupby(['Season', 'TeamCode', 'Zone', 'GD'])['Miss_F'].transform('mean')
dk['MPenalties_F'] = dk.groupby(['Season', 'TeamCode', 'Zone', 'GD'])['Penalties_F'].transform('mean')
dk['MShots_F'] = dk.groupby(['Season', 'TeamCode', 'Zone', 'GD'])['Shots_F'].transform('mean')
dk['MTakeaways_F'] = dk.groupby(['Season', 'TeamCode', 'Zone', 'GD'])['Takeaways_F'].transform('mean')
dk['MBlocks_A'] = dk.groupby(['Season', 'TeamCode', 'Zone', 'GD'])['Blocks_A'].transform('mean')
dk['MFaceoffs_A'] = dk.groupby(['Season', 'TeamCode', 'Zone', 'GD'])['Faceoffs_A'].transform('mean')
dk['MGiveaways_A'] = dk.groupby(['Season', 'TeamCode', 'Zone', 'GD'])['Giveaways_A'].transform('mean')
dk['MGoals_A'] = dk.groupby(['Season', 'TeamCode', 'Zone', 'GD'])['Goals_A'].transform('mean')
dk['MHits_A'] = dk.groupby(['Season', 'TeamCode', 'Zone', 'GD'])['Hits_A'].transform('mean')
dk['MMiss_A'] = dk.groupby(['Season', 'TeamCode', 'Zone', 'GD'])['Miss_A'].transform('mean')
dk['MPenalties_A'] = dk.groupby(['Season', 'TeamCode', 'Zone', 'GD'])['Penalties_A'].transform('mean')
dk['MShots_A'] = dk.groupby(['Season', 'TeamCode', 'Zone', 'GD'])['Shots_A'].transform('mean')
dk['MTakeaways_A'] = dk.groupby(['Season', 'TeamCode', 'Zone', 'GD'])['Takeaways_A'].transform('mean')
dk.head()

Unnamed: 0,Season,GameNumber,Zone,TeamCode,GoalNumber,GD,Blocks_F,Blocks_A,Faceoffs_F,Faceoffs_A,Giveaways_F,Giveaways_A,Goals_F,Goals_A,Hits_F,Hits_A,Miss_F,Miss_A,Penalties_F,Penalties_A,Shots_F,Shots_A,Takeaways_F,Takeaways_A,MBlocks_F,MFaceoffs_F,MGiveaways_F,MGoals_F,MHits_F,MMiss_F,MPenalties_F,MShots_F,MTakeaways_F,MBlocks_A,MFaceoffs_A,MGiveaways_A,MGoals_A,MHits_A,MMiss_A,MPenalties_A,MShots_A,MTakeaways_A
0,2010,20001,N,MTL,1.0,-1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016667,1.283333,0.361809,0.016667,0.166667,0.016667,0.166667,0.05,0.166667,0.0,0.966667,0.1,0.0,0.283333,0.0,0.1,0.033333,0.216667
1,2010,20001,O,TOR,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,5.0,3.0,1.0,0.0,0.0,0.0,2.0,3.0,0.0,0.0,0.0,0.103896,0.277512,0.363636,0.61039,1.012987,0.116883,2.298701,0.12987,0.0,0.311688,0.116883,0.441558,0.805195,1.207792,0.077922,2.363636,0.246753
2,2010,20001,O,MTL,1.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,5.0,3.0,1.0,0.0,0.0,0.0,2.0,3.0,0.0,0.0,0.0,0.140845,0.361809,0.366197,0.802817,1.366197,0.112676,2.633803,0.15493,0.014085,0.253521,0.126761,0.43662,1.239437,1.591549,0.211268,3.366197,0.352113
4,2010,20001,D,TOR,1.0,1.0,3.0,4.0,0.0,1.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.791045,0.298507,0.277512,0.014925,1.134328,0.0,0.238806,0.0,0.283582,1.820896,0.283582,1.0,0.0,0.746269,0.0,0.179104,0.0,0.208955
11,2010,20001,D,MTL,1.0,-1.0,3.0,4.0,0.0,1.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.970588,0.279412,0.361809,0.0,0.823529,0.0,0.264706,0.014706,0.191176,1.764706,0.470588,0.941176,0.0,0.323529,0.014706,0.220588,0.0,0.294118


- drop duplicates by season, team code and goal differential.

In [82]:
dk = dk.drop_duplicates(['Season', 'TeamCode', 'Zone', 'GD'])
dk = dk [['Season', 'TeamCode', 'Zone', 'GD','MBlocks_F', 'MFaceoffs_F', 'MGiveaways_F', 'MGoals_F','MHits_F', 'MMiss_F', 'MPenalties_F', 'MShots_F', 'MTakeaways_F','MBlocks_A', 'MFaceoffs_A', 'MGiveaways_A', 'MGoals_A', 'MHits_A','MMiss_A', 'MPenalties_A', 'MShots_A', 'MTakeaways_A']]
dk.head()

Unnamed: 0,Season,TeamCode,Zone,GD,MBlocks_F,MFaceoffs_F,MGiveaways_F,MGoals_F,MHits_F,MMiss_F,MPenalties_F,MShots_F,MTakeaways_F,MBlocks_A,MFaceoffs_A,MGiveaways_A,MGoals_A,MHits_A,MMiss_A,MPenalties_A,MShots_A,MTakeaways_A
0,2010,MTL,N,-1.0,0.016667,1.283333,0.361809,0.016667,0.166667,0.016667,0.166667,0.05,0.166667,0.0,0.966667,0.1,0.0,0.283333,0.0,0.1,0.033333,0.216667
1,2010,TOR,O,1.0,0.0,0.103896,0.277512,0.363636,0.61039,1.012987,0.116883,2.298701,0.12987,0.0,0.311688,0.116883,0.441558,0.805195,1.207792,0.077922,2.363636,0.246753
2,2010,MTL,O,-1.0,0.0,0.140845,0.361809,0.366197,0.802817,1.366197,0.112676,2.633803,0.15493,0.014085,0.253521,0.126761,0.43662,1.239437,1.591549,0.211268,3.366197,0.352113
4,2010,TOR,D,1.0,1.791045,0.298507,0.277512,0.014925,1.134328,0.0,0.238806,0.0,0.283582,1.820896,0.283582,1.0,0.0,0.746269,0.0,0.179104,0.0,0.208955
11,2010,MTL,D,-1.0,1.970588,0.279412,0.361809,0.0,0.823529,0.0,0.264706,0.014706,0.191176,1.764706,0.470588,0.941176,0.0,0.323529,0.014706,0.220588,0.0,0.294118


### summary analysis

In [83]:
dk['TBlocks_F'] = dk.groupby(['Season', 'Zone', 'GD'])['MBlocks_F'].transform('mean')
dk['TFaceoffs_F'] = dk.groupby(['Season', 'Zone', 'GD'])['MFaceoffs_F'].transform('mean')
dk['TGiveaways_F'] = dk.groupby(['Season', 'Zone', 'GD'])['MGiveaways_F'].transform('mean')
dk['TGoals_F'] = dk.groupby(['Season','Zone', 'GD'])['MGoals_F'].transform('mean')
dk['THits_F'] = dk.groupby(['Season', 'Zone', 'GD'])['MHits_F'].transform('mean')
dk['TMiss_F'] = dk.groupby(['Season', 'Zone', 'GD'])['MMiss_F'].transform('mean')
dk['TPenalties_F'] = dk.groupby(['Season', 'Zone', 'GD'])['MPenalties_F'].transform('mean')
dk['TShots_F'] = dk.groupby(['Season', 'Zone', 'GD'])['MShots_F'].transform('mean')
dk['TTakeaways_F'] = dk.groupby(['Season', 'Zone', 'GD'])['MTakeaways_F'].transform('mean')
dk['TBlocks_A'] = dk.groupby(['Season', 'Zone', 'GD'])['MBlocks_A'].transform('mean')
dk['TFaceoffs_A'] = dk.groupby(['Season', 'Zone', 'GD'])['MFaceoffs_A'].transform('mean')
dk['TGiveaways_A'] = dk.groupby(['Season', 'Zone', 'GD'])['MGiveaways_A'].transform('mean')
dk['TGoals_A'] = dk.groupby(['Season', 'Zone', 'GD'])['MGoals_A'].transform('mean')
dk['THits_A'] = dk.groupby(['Season', 'Zone', 'GD'])['MHits_A'].transform('mean')
dk['TMiss_A'] = dk.groupby(['Season', 'Zone', 'GD'])['MMiss_A'].transform('mean')
dk['TPenalties_A'] = dk.groupby(['Season', 'Zone', 'GD'])['MPenalties_A'].transform('mean')
dk['TShots_A'] = dk.groupby(['Season', 'Zone', 'GD'])['MShots_A'].transform('mean')
dk['TTakeaways_A'] = dk.groupby(['Season', 'Zone', 'GD'])['MTakeaways_A'].transform('mean')
dk.head()

Unnamed: 0,Season,TeamCode,Zone,GD,MBlocks_F,MFaceoffs_F,MGiveaways_F,MGoals_F,MHits_F,MMiss_F,MPenalties_F,MShots_F,MTakeaways_F,MBlocks_A,MFaceoffs_A,MGiveaways_A,MGoals_A,MHits_A,MMiss_A,MPenalties_A,MShots_A,MTakeaways_A,TBlocks_F,TFaceoffs_F,TGiveaways_F,TGoals_F,THits_F,TMiss_F,TPenalties_F,TShots_F,TTakeaways_F,TBlocks_A,TFaceoffs_A,TGiveaways_A,TGoals_A,THits_A,TMiss_A,TPenalties_A,TShots_A,TTakeaways_A
0,2010,MTL,N,-1.0,0.016667,1.283333,0.361809,0.016667,0.166667,0.016667,0.166667,0.05,0.166667,0.0,0.966667,0.1,0.0,0.283333,0.0,0.1,0.033333,0.216667,0.001019,0.951604,0.219524,0.001093,0.164387,0.014127,0.125689,0.039736,0.120866,0.000505,0.942758,0.096092,0.001677,0.162392,0.008537,0.11684,0.0394,0.153622
1,2010,TOR,O,1.0,0.0,0.103896,0.277512,0.363636,0.61039,1.012987,0.116883,2.298701,0.12987,0.0,0.311688,0.116883,0.441558,0.805195,1.207792,0.077922,2.363636,0.246753,0.005156,0.236208,0.213541,0.37236,0.69585,1.076839,0.096877,2.526075,0.177241,0.008098,0.260032,0.17374,0.406754,0.860531,1.135992,0.103198,2.654044,0.228705
2,2010,MTL,O,-1.0,0.0,0.140845,0.361809,0.366197,0.802817,1.366197,0.112676,2.633803,0.15493,0.014085,0.253521,0.126761,0.43662,1.239437,1.591549,0.211268,3.366197,0.352113,0.006305,0.255126,0.219524,0.370462,0.748076,1.138242,0.099228,2.690892,0.182709,0.008356,0.269037,0.178857,0.388819,0.930989,1.205694,0.109002,2.784155,0.236438
4,2010,TOR,D,1.0,1.791045,0.298507,0.277512,0.014925,1.134328,0.0,0.238806,0.0,0.283582,1.820896,0.283582,1.0,0.0,0.746269,0.0,0.179104,0.0,0.208955,1.634582,0.310933,0.213541,0.000914,0.742224,0.00434,0.233873,0.007065,0.25253,1.584137,0.337389,0.566149,0.002034,0.776256,0.004806,0.212238,0.007162,0.289375
11,2010,MTL,D,-1.0,1.970588,0.279412,0.361809,0.0,0.823529,0.0,0.264706,0.014706,0.191176,1.764706,0.470588,0.941176,0.0,0.323529,0.014706,0.220588,0.0,0.294118,1.607473,0.307599,0.219524,0.00079,0.73091,0.00514,0.240369,0.006519,0.240733,1.558148,0.336761,0.57195,0.002655,0.743028,0.004407,0.213483,0.006342,0.276171


In [84]:
dk = dk[['GD', 'Zone', 'TBlocks_F', 'TFaceoffs_F', 'TGiveaways_F', 'TGoals_F', 'THits_F', 'TMiss_F', 'TPenalties_F', 'TShots_F', 'TTakeaways_F', 'TBlocks_A', 'TFaceoffs_A', 'TGiveaways_A', 'TGoals_A', 'THits_A', 'TMiss_A', 'TPenalties_A', 'TShots_A', 'TTakeaways_A']]
dk = dk.drop_duplicates(['GD', 'Zone'])
dk = dk.sort_values(['GD'], ascending=[False])
dk.set_index(['GD', 'Zone'])

Unnamed: 0_level_0,Unnamed: 1_level_0,TBlocks_F,TFaceoffs_F,TGiveaways_F,TGoals_F,THits_F,TMiss_F,TPenalties_F,TShots_F,TTakeaways_F,TBlocks_A,TFaceoffs_A,TGiveaways_A,TGoals_A,THits_A,TMiss_A,TPenalties_A,TShots_A,TTakeaways_A
GD,Zone,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
8.0,N,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,0.000000,0.000000,0.000000,2.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
8.0,O,0.000000,1.000000,0.000000,0.000000,0.000000,2.000000,0.000000,5.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,2.000000,0.000000,4.000000,0.000000
8.0,D,4.000000,0.000000,0.000000,0.000000,1.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,0.000000,0.000000
7.0,D,1.611111,0.759259,0.142593,0.000000,0.777778,0.000000,0.444444,0.000000,0.333333,1.240741,0.092593,0.148148,0.000000,0.500000,0.000000,0.055556,0.000000,0.314815
7.0,O,0.000000,0.030303,0.116667,0.454545,0.166667,0.575758,0.060606,1.666667,0.060606,0.000000,0.363636,0.000000,0.409091,0.530303,0.848485,0.121212,2.969697,0.151515
7.0,N,0.000000,1.018519,0.087037,0.000000,0.000000,0.000000,0.000000,0.055556,0.111111,0.000000,0.666667,0.092593,0.000000,0.037037,0.000000,0.055556,0.000000,0.000000
6.0,D,0.789683,0.238095,0.172786,0.000000,0.435714,0.015873,0.152381,0.047619,0.120635,0.764286,0.317460,0.157937,0.015873,0.642857,0.000000,0.184127,0.000000,0.182540
6.0,N,0.000000,1.245614,0.164658,0.000000,0.026316,0.000000,0.210526,0.000000,0.052632,0.000000,0.938596,0.039474,0.000000,0.096491,0.000000,0.236842,0.030702,0.131579
6.0,O,0.000000,0.170000,0.181426,0.340000,0.590833,1.156667,0.143333,1.705833,0.085000,0.012500,0.135833,0.086667,0.425000,0.439167,0.679167,0.116667,1.611667,0.080000
5.0,D,0.987607,0.264316,0.119500,0.000000,0.645299,0.019231,0.148504,0.000000,0.159188,1.060470,0.349573,0.283120,0.004274,0.637607,0.000000,0.208333,0.000000,0.327350


In [85]:
dk = pd.pivot_table(dk, values=(['TBlocks_F', 'TFaceoffs_F', 'TGiveaways_F', 'TGoals_F', 'THits_F', 'TMiss_F', 'TPenalties_F', 'TShots_F', 'TTakeaways_F', 'TBlocks_A', 'TFaceoffs_A', 'TGiveaways_A', 'TGoals_A', 'THits_A', 'TMiss_A', 'TPenalties_A', 'TShots_A', 'TTakeaways_A']), index=['GD'], columns=['Zone'])
dk.head()

Unnamed: 0_level_0,TBlocks_F,TBlocks_F,TBlocks_F,TFaceoffs_F,TFaceoffs_F,TFaceoffs_F,TGiveaways_F,TGiveaways_F,TGiveaways_F,TGoals_F,TGoals_F,TGoals_F,THits_F,THits_F,THits_F,TMiss_F,TMiss_F,TMiss_F,TPenalties_F,TPenalties_F,TPenalties_F,TShots_F,TShots_F,TShots_F,TTakeaways_F,TTakeaways_F,TTakeaways_F,TBlocks_A,TBlocks_A,TBlocks_A,TFaceoffs_A,TFaceoffs_A,TFaceoffs_A,TGiveaways_A,TGiveaways_A,TGiveaways_A,TGoals_A,TGoals_A,TGoals_A,THits_A,THits_A,THits_A,TMiss_A,TMiss_A,TMiss_A,TPenalties_A,TPenalties_A,TPenalties_A,TShots_A,TShots_A,TShots_A,TTakeaways_A,TTakeaways_A,TTakeaways_A
Zone,D,N,O,D,N,O,D,N,O,D,N,O,D,N,O,D,N,O,D,N,O,D,N,O,D,N,O,D,N,O,D,N,O,D,N,O,D,N,O,D,N,O,D,N,O,D,N,O,D,N,O,D,N,O
GD,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2,Unnamed: 26_level_2,Unnamed: 27_level_2,Unnamed: 28_level_2,Unnamed: 29_level_2,Unnamed: 30_level_2,Unnamed: 31_level_2,Unnamed: 32_level_2,Unnamed: 33_level_2,Unnamed: 34_level_2,Unnamed: 35_level_2,Unnamed: 36_level_2,Unnamed: 37_level_2,Unnamed: 38_level_2,Unnamed: 39_level_2,Unnamed: 40_level_2,Unnamed: 41_level_2,Unnamed: 42_level_2,Unnamed: 43_level_2,Unnamed: 44_level_2,Unnamed: 45_level_2,Unnamed: 46_level_2,Unnamed: 47_level_2,Unnamed: 48_level_2,Unnamed: 49_level_2,Unnamed: 50_level_2,Unnamed: 51_level_2,Unnamed: 52_level_2,Unnamed: 53_level_2,Unnamed: 54_level_2
-8.0,4.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,2.0,0.0,1.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,2.0,1.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0
-7.0,1.555556,0.0,0.0,0.611111,1.714286,0.05,0.111111,0.142857,0.1,0.0,0.0,0.35,0.666667,0.285714,0.216667,0.0,0.0,0.95,0.333333,0.0,0.15,0.0,0.071429,1.633333,0.222222,0.214286,0.1,1.222222,0.0,0.0,0.277778,0.5,0.283333,0.222222,0.142857,0.0,0.0,0.0,0.516667,0.5,0.071429,0.583333,0.0,0.0,0.716667,0.166667,0.142857,0.1,0.0,0.0,2.55,0.333333,0.142857,0.183333
-6.0,1.007407,0.0,0.0,0.196296,1.4875,0.134058,0.18254,0.160119,0.153727,0.0,0.0,0.294928,0.372222,0.05,0.746377,0.027778,0.029167,0.866667,0.133333,0.183333,0.122464,0.018519,0.0,1.747101,0.112963,0.075,0.105072,0.687037,0.0,0.014493,0.390741,0.654167,0.218841,0.135185,0.075,0.16087,0.027778,0.0,0.406522,0.766667,0.133333,0.439855,0.0,0.0,0.71087,0.346296,0.15,0.073913,0.0,0.066667,1.723913,0.231481,0.166667,0.17029
-5.0,1.112835,0.0,0.0,0.156705,0.921605,0.270443,0.125016,0.134276,0.125016,0.0,0.0,0.274877,0.656322,0.138889,0.579885,0.003831,0.0,0.72225,0.151341,0.442593,0.178489,0.0,0.08642,2.448686,0.179119,0.108025,0.135386,1.11705,0.0,0.0,0.20977,0.698457,0.407225,0.328544,0.132716,0.042693,0.011494,0.0,0.427586,0.47567,0.047531,0.82225,0.0,0.00463,1.04647,0.353448,0.27284,0.085386,0.005747,0.060185,1.815764,0.395211,0.117284,0.051724
-4.0,1.465011,0.0,0.0,0.268784,0.904286,0.511379,0.172695,0.158946,0.172695,0.011111,0.0,0.252141,0.560919,0.143818,0.767231,0.0,0.008425,1.290259,0.254049,0.230969,0.149649,0.00303,0.023563,2.294445,0.224776,0.066891,0.196044,1.480752,0.011494,0.0,0.218294,0.797001,0.327991,0.417189,0.074011,0.172009,0.003704,0.006897,0.471297,0.729626,0.09227,0.608516,0.008889,0.027011,0.852285,0.330652,0.194219,0.15658,0.006364,0.014475,2.197196,0.307306,0.066437,0.169125


In [86]:
beginningtex = """\\documentclass{report}
\\usepackage{booktabs}
\\begin{document}"""
endtex = "\end{document}"

f = open('/Users/stefanostselios/Brock University/Kevin Mongeon - StephanosShare/out/latex/events/gd_even_strength_events_with_zones.tex', 'w')
f.write(beginningtex)
f.write(dk.to_latex())
f.write(endtex)
f.close()