# Occurance of on-ice events by minutes

In [202]:
import sys
import os
import pandas as pd
import numpy as np
import datetime, time
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.formula.api import ols
from pylab import hist, show
import scipy
import zipfile


pd.set_option('display.max_rows', 50)
pd.set_option('display.max_columns', 200)

In [203]:
pwd

'/Users/stefanostselios/Desktop/nhl_roster_design-master'

In [204]:
da = pd.read_csv('/Users/stefanostselios/Brock University/Kevin Mongeon - StephanosShare/out/pbp_merged.csv')
#da = pd.read_csv('/Users/kevinmongeon/Brock University/Steve Tselios - StephanosShare/out/pbp_merged.csv')
da = da.drop('Unnamed: 0', axis=1)
da = da.rename(columns={'TeamCode': 'EventTeamCode'})

- keep regular season games and relevant on-ice events in **regulation time**. Drop duplicates by season, game number, event number and event team to have one obsrevation per event per game.

In [205]:
da = da[da['GameNumber'] <= 21230]
da = da[da['Period'] <= 3]
da = da[da['Period'] >= 1]
da = da[da['EventType']!='STOP']
da = da[da['EventType']!='EISTR']
da = da[da['EventType']!='EIEND']
da = da[da['EventType'] !='FIGHT']
da = da.dropna(subset=['EventNumber'])

In [206]:
da.head()

Unnamed: 0,Season,GameNumber,EventNumber,Period,AdvantageType,EventTimeFromZero,EventTimeFromTwenty,EventType,EventDetail,VPlayer1,VPosition1,VPlayer2,VPosition2,VPlayer3,VPosition3,VPlayer4,VPosition4,VPlayer5,VPosition5,VPlayer6,VPosition6,HPlayer1,HPosition1,HPlayer2,HPosition2,HPlayer3,HPosition3,HPlayer4,HPosition4,HPlayer5,HPosition5,HPlayer6,HPosition6,GameDate,VTeamCode,HTeamCode,EventTeamCode,PlayerNumber,PlayerName,ShotType,ShotResult,Zone,Length,PenaltyType
0,2010,20001,1,1,,0,1200,FAC,MTL won Neu. Zone - MTL #11 GOMEZ vs TOR #37 B...,11,C,21.0,R,57.0,L,26.0,D,75.0,D,31.0,G,37,C,9.0,R,11.0,L,3.0,D,22.0,D,35.0,G,2010-10-07,MTL,TOR,MTL,11.0,GOMEZ,,,N,,
1,2010,20001,3,1,EV,15,1185,HIT,"TOR #37 BRENT HIT MTL #26 GORGES, Off. Zone",11,C,21.0,R,57.0,L,26.0,D,75.0,D,31.0,G,37,C,9.0,R,11.0,L,3.0,D,22.0,D,35.0,G,2010-10-07,MTL,TOR,TOR,37.0,BRENT,,,O,,
2,2010,20001,4,1,EV,46,1154,HIT,"MTL #14 PLEKANEC HIT TOR #2 SCHENN, Off. Zone",14,C,81.0,C,46.0,L,6.0,D,76.0,D,31.0,G,42,C,81.0,C,32.0,R,2.0,D,15.0,D,35.0,G,2010-10-07,MTL,TOR,MTL,14.0,PLEKANEC,,,O,,
3,2010,20001,5,1,EV,57,1143,HIT,"MTL #76 SUBBAN HIT TOR #15 KABERLE, Neu. Zone",14,C,81.0,C,46.0,L,6.0,D,76.0,D,31.0,G,42,C,81.0,C,32.0,R,2.0,D,15.0,D,35.0,G,2010-10-07,MTL,TOR,MTL,76.0,SUBBAN,,,N,,
4,2010,20001,6,1,EV,69,1131,GIVE,"TOR&nbsp;GIVEAWAY - #35 GIGUERE, Def. Zone",14,C,81.0,C,46.0,L,6.0,D,76.0,D,31.0,G,42,C,81.0,C,32.0,R,2.0,D,15.0,D,35.0,G,2010-10-07,MTL,TOR,TOR,35.0,GIGUERE,,,D,,


In [207]:
da.shape

(310113, 44)

- create a goal dataframe that will display the number of goal per game.

In [208]:
df = da[['Season', 'GameNumber', 'EventNumber', 'AdvantageType', 'Period', 'EventType', 'EventTimeFromZero', 'VTeamCode', 'HTeamCode', 'EventTeamCode']]
dg = df[df['EventType'] == 'GOAL']
dg['Goal'] = dg.apply(lambda x: 1 if (x['EventType'] == 'GOAL') else 0, axis=1)
dg['GoalNumber'] = dg.groupby(['Season', 'GameNumber']).cumcount()+1
dg.head()
dg = dg[['Season', 'GameNumber', 'EventNumber', 'AdvantageType', 'Period', 'EventType', 'EventTimeFromZero', 'EventTeamCode', 'VTeamCode', 'HTeamCode', 'GoalNumber']]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


- merge dg onto df to display the goal number per game. Group by season, game number and period to backwardfill advantage type and goal number.

In [209]:
df = pd.merge(df, dg, on=['Season', 'GameNumber', 'EventNumber', 'AdvantageType', 'Period', 'EventType', 'EventTimeFromZero', 'EventTeamCode', 'VTeamCode', 'HTeamCode'], how='left')
df['AdvantageType'] = df.groupby(['Season', 'GameNumber'])['AdvantageType'].apply(lambda x: x.bfill())
df['GoalNumber'] = df.groupby(['Season', 'GameNumber', 'Period'])['GoalNumber'].apply(lambda x: x.bfill())
df.head()

Unnamed: 0,Season,GameNumber,EventNumber,AdvantageType,Period,EventType,EventTimeFromZero,VTeamCode,HTeamCode,EventTeamCode,GoalNumber
0,2010,20001,1,EV,1,FAC,0,MTL,TOR,MTL,1.0
1,2010,20001,3,EV,1,HIT,15,MTL,TOR,TOR,1.0
2,2010,20001,4,EV,1,HIT,46,MTL,TOR,MTL,1.0
3,2010,20001,5,EV,1,HIT,57,MTL,TOR,MTL,1.0
4,2010,20001,6,EV,1,GIVE,69,MTL,TOR,TOR,1.0


- display the home goal number and visitor goal number by game number and season. Keep all on-ice events that happened prior to a goal when the score differential was between -1 and 1. Exclude all other events.

In [210]:
dz = dg[dg['EventTeamCode'] == dg['HTeamCode']]
dz['HGoalNumber'] = dz.groupby(['Season', 'GameNumber']).cumcount()+1
dy = dg[dg['EventTeamCode'] == dg['VTeamCode']]
dy['VGoalNumber'] = dy.groupby(['Season', 'GameNumber']).cumcount()+1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


- merge visitor goal number dataframe (dy) and home goal number dataframe (dz) onto goal dataframe (dg). 

In [211]:
dg = pd.merge(dg, dy, on=['Season', 'GameNumber', 'EventNumber', 'AdvantageType', 'Period', 'EventType', 'EventTimeFromZero', 'EventTeamCode', 'VTeamCode', 'HTeamCode', 'GoalNumber'], how='left')
dg = pd.merge(dg, dz, on=['Season', 'GameNumber', 'EventNumber', 'AdvantageType', 'Period', 'EventType', 'EventTimeFromZero', 'EventTeamCode', 'VTeamCode', 'HTeamCode', 'GoalNumber'], how='left')
dg.head()

Unnamed: 0,Season,GameNumber,EventNumber,AdvantageType,Period,EventType,EventTimeFromZero,EventTeamCode,VTeamCode,HTeamCode,GoalNumber,VGoalNumber,HGoalNumber
0,2010,20001,35,EV,1,GOAL,402,TOR,MTL,TOR,1,,1.0
1,2010,20001,49,EV,1,GOAL,537,TOR,MTL,TOR,2,,2.0
2,2010,20001,68,EV,1,GOAL,739,MTL,MTL,TOR,3,1.0,
3,2010,20001,223,EV,3,GOAL,96,TOR,MTL,TOR,4,,3.0
4,2010,20001,232,EV,3,GOAL,148,MTL,MTL,TOR,5,2.0,


- forward fill home goal number and visitor game number by season and game number. Fill in 'NaN' values with zero for home and visitor game number.

In [212]:
dg['HGoalNumber'] = dg.groupby(['Season', 'GameNumber'])['HGoalNumber'].apply(lambda x: x.ffill())
dg['VGoalNumber'] = dg.groupby(['Season', 'GameNumber'])['VGoalNumber'].apply(lambda x: x.ffill())
dg['VGoalNumber'] = dg['VGoalNumber'].fillna(0)
dg['HGoalNumber'] = dg['HGoalNumber'].fillna(0)
dg.head()

Unnamed: 0,Season,GameNumber,EventNumber,AdvantageType,Period,EventType,EventTimeFromZero,EventTeamCode,VTeamCode,HTeamCode,GoalNumber,VGoalNumber,HGoalNumber
0,2010,20001,35,EV,1,GOAL,402,TOR,MTL,TOR,1,0.0,1.0
1,2010,20001,49,EV,1,GOAL,537,TOR,MTL,TOR,2,0.0,2.0
2,2010,20001,68,EV,1,GOAL,739,MTL,MTL,TOR,3,1.0,2.0
3,2010,20001,223,EV,3,GOAL,96,TOR,MTL,TOR,4,1.0,3.0
4,2010,20001,232,EV,3,GOAL,148,MTL,MTL,TOR,5,2.0,3.0


- merge goal dataframe on dk and backward fill by home goal number and visitor goal number.

In [213]:
dk = da[['Season', 'GameNumber', 'EventNumber', 'AdvantageType', 'Period', 'EventType', 'EventTimeFromZero', 'VTeamCode', 'HTeamCode', 'EventTeamCode']]
dk = pd.merge(dk, dg, on=['Season', 'GameNumber', 'EventNumber', 'AdvantageType', 'Period', 'EventType', 'EventTimeFromZero', 'EventTeamCode', 'VTeamCode', 'HTeamCode'], how='left')
dk['AdvantageType'] = dk.groupby(['Season', 'GameNumber'])['AdvantageType'].apply(lambda x: x.bfill())
dk['GoalNumber'] = dk.groupby(['Season', 'GameNumber', 'Period'])['GoalNumber'].apply(lambda x: x.bfill())
dk['HGoalNumber'] = dk.groupby(['Season', 'GameNumber', 'Period'])['HGoalNumber'].apply(lambda x: x.bfill())
dk['VGoalNumber'] = dk.groupby(['Season', 'GameNumber', 'Period'])['VGoalNumber'].apply(lambda x: x.bfill())
dk.head()

Unnamed: 0,Season,GameNumber,EventNumber,AdvantageType,Period,EventType,EventTimeFromZero,VTeamCode,HTeamCode,EventTeamCode,GoalNumber,VGoalNumber,HGoalNumber
0,2010,20001,1,EV,1,FAC,0,MTL,TOR,MTL,1.0,0.0,1.0
1,2010,20001,3,EV,1,HIT,15,MTL,TOR,TOR,1.0,0.0,1.0
2,2010,20001,4,EV,1,HIT,46,MTL,TOR,MTL,1.0,0.0,1.0
3,2010,20001,5,EV,1,HIT,57,MTL,TOR,MTL,1.0,0.0,1.0
4,2010,20001,6,EV,1,GIVE,69,MTL,TOR,TOR,1.0,0.0,1.0


- create a column that will display the time of a goal. Backward fill so events prior to a goal will have the same goal time. Compute the difference betweewn time of event and time of goal. Categorize events based on their time differnce from goal. If event happened within a minute, 2 minutes, 3 minutes, 4 minutes, 5 minutes or 6 + minutes prior to a goal.

In [214]:
dk['GoalTime'] = dk.apply(lambda x: x['EventTimeFromZero'] if (x['EventType'] == 'GOAL') else np.nan, axis=1)
dk['GoalTime'] = dk.groupby(['Season', 'GameNumber', 'GoalNumber'])['GoalTime'].apply(lambda x: x.bfill())
dk['TimeFromGoal'] = dk['GoalTime'] - dk['EventTimeFromZero']
dk['Minute'] = dk.apply(lambda x: 1 if (x['TimeFromGoal'] <= 60) else 2 if ((x['TimeFromGoal'] > 60) & (x['TimeFromGoal'] <=120)) else 3 if ((x['TimeFromGoal'] > 120) & (x['TimeFromGoal'] <=180)) else 4 if ((x['TimeFromGoal'] > 180) & (x['TimeFromGoal'] <=240)) else 5 if ((x['TimeFromGoal'] > 240) & (x['TimeFromGoal'] <=300)) else 6, axis=1)
dk.head()

Unnamed: 0,Season,GameNumber,EventNumber,AdvantageType,Period,EventType,EventTimeFromZero,VTeamCode,HTeamCode,EventTeamCode,GoalNumber,VGoalNumber,HGoalNumber,GoalTime,TimeFromGoal,Minute
0,2010,20001,1,EV,1,FAC,0,MTL,TOR,MTL,1.0,0.0,1.0,402.0,402.0,6
1,2010,20001,3,EV,1,HIT,15,MTL,TOR,TOR,1.0,0.0,1.0,402.0,387.0,6
2,2010,20001,4,EV,1,HIT,46,MTL,TOR,MTL,1.0,0.0,1.0,402.0,356.0,6
3,2010,20001,5,EV,1,HIT,57,MTL,TOR,MTL,1.0,0.0,1.0,402.0,345.0,6
4,2010,20001,6,EV,1,GIVE,69,MTL,TOR,TOR,1.0,0.0,1.0,402.0,333.0,6


- create zone for home and visitor team. Offensive zone for the home team is the defensive zone of the visitor team and defensive zone for the home team is the offensive zone for the visitor team. Neutral zone is the same for both teams.

In [215]:
#dk['VZone'] = dk.apply(lambda x: x['Zone'] if (x['EventTeamCode'] == x['VTeamCode']) else 'D' if ((x['EventTeamCode'] ==x['HTeamCode']) & (x['Zone'] == 'O')) else 'O' if ((x['EventTeamCode'] == x['HTeamCode']) & (x['Zone'] == 'D')) else 'N', axis=1)
#dk['HZone'] = dk.apply(lambda x: x['Zone'] if (x['EventTeamCode'] == x['HTeamCode']) else 'D' if ((x['EventTeamCode'] ==x['VTeamCode']) & (x['Zone'] == 'O')) else 'O' if ((x['EventTeamCode'] == x['VTeamCode']) & (x['Zone'] == 'D')) else 'N', axis=1)
#dk.head()

- ** for even strength events only!**

In [216]:
#dk = dk[dk['AdvantageType'] == 'EV']

- display the goal differential per game for each team.

In [217]:
dk['GD'] = dk.apply(lambda x: x['HGoalNumber'] - x['VGoalNumber'] if (x['EventTeamCode'] == x['HTeamCode']) else x['VGoalNumber'] - x['HGoalNumber'], axis=1)
dk.head()

Unnamed: 0,Season,GameNumber,EventNumber,AdvantageType,Period,EventType,EventTimeFromZero,VTeamCode,HTeamCode,EventTeamCode,GoalNumber,VGoalNumber,HGoalNumber,GoalTime,TimeFromGoal,Minute,GD
0,2010,20001,1,EV,1,FAC,0,MTL,TOR,MTL,1.0,0.0,1.0,402.0,402.0,6,-1.0
1,2010,20001,3,EV,1,HIT,15,MTL,TOR,TOR,1.0,0.0,1.0,402.0,387.0,6,1.0
2,2010,20001,4,EV,1,HIT,46,MTL,TOR,MTL,1.0,0.0,1.0,402.0,356.0,6,-1.0
3,2010,20001,5,EV,1,HIT,57,MTL,TOR,MTL,1.0,0.0,1.0,402.0,345.0,6,-1.0
4,2010,20001,6,EV,1,GIVE,69,MTL,TOR,TOR,1.0,0.0,1.0,402.0,333.0,6,1.0


In [218]:
dk.shape

(310113, 17)

- On-ice events that occured in a different period from a goal or after a goal are excluded from the dataframe.

In [219]:
dk = dk.dropna(subset=['GoalNumber'])
dk = dk.sort_values(['Season', 'GameNumber', 'EventNumber'], ascending=[True, True, True])
dk = dk.drop_duplicates(['Season', 'GameNumber', 'EventNumber', 'EventTeamCode'])
dk.head()

Unnamed: 0,Season,GameNumber,EventNumber,AdvantageType,Period,EventType,EventTimeFromZero,VTeamCode,HTeamCode,EventTeamCode,GoalNumber,VGoalNumber,HGoalNumber,GoalTime,TimeFromGoal,Minute,GD
0,2010,20001,1,EV,1,FAC,0,MTL,TOR,MTL,1.0,0.0,1.0,402.0,402.0,6,-1.0
1,2010,20001,3,EV,1,HIT,15,MTL,TOR,TOR,1.0,0.0,1.0,402.0,387.0,6,1.0
2,2010,20001,4,EV,1,HIT,46,MTL,TOR,MTL,1.0,0.0,1.0,402.0,356.0,6,-1.0
3,2010,20001,5,EV,1,HIT,57,MTL,TOR,MTL,1.0,0.0,1.0,402.0,345.0,6,-1.0
4,2010,20001,6,EV,1,GIVE,69,MTL,TOR,TOR,1.0,0.0,1.0,402.0,333.0,6,1.0


In [220]:
dk.shape

(178759, 17)

- Assign a value of 1 if an on-ice event is a goal, 0 if not. Follow the same procedure for block, faceoff, giveaway, hits, miss, penalty, shot and takeaway. Group by season, game number, zone, event type, goal number and minute from goal to find the sum of each on-ice event per game. 

In [221]:
dk['Goal'] = dk.apply(lambda x: 1 if (x['EventType'] == 'GOAL') else np.nan, axis=1)
dk['Block'] = dk.apply(lambda x: 1 if (x['EventType'] == 'BLOCK') else np.nan, axis=1)
dk['Faceoff'] = dk.apply(lambda x: 1 if (x['EventType'] == 'FAC') else np.nan, axis=1)
dk['Giveaway'] = dk.apply(lambda x: 1 if (x['EventType'] == 'GIVE') else np.nan, axis=1)
dk['Hit'] = dk.apply(lambda x: 1 if (x['EventType'] == 'HIT') else np.nan, axis=1)
dk['Miss'] = dk.apply(lambda x: 1 if (x['EventType'] == 'MISS') else np.nan, axis=1)
dk['Penalty'] = dk.apply(lambda x: 1 if (x['EventType'] == 'PENL') else np.nan, axis=1)
dk['Shot'] = dk.apply(lambda x: 1 if (x['EventType'] == 'SHOT') else np.nan, axis=1)
dk['Takeaway'] = dk.apply(lambda x: 1 if (x['EventType'] == 'TAKE') else np.nan, axis=1)

In [222]:
dk['Blocks'] = dk.groupby(['Season','GameNumber', 'EventTeamCode', 'EventType', 'GoalNumber', 'Minute'])['Block'].transform('sum')
dk['Faceoffs'] = dk.groupby(['Season','GameNumber',  'EventTeamCode', 'EventType', 'GoalNumber', 'Minute'])['Faceoff'].transform('sum')
dk['Giveaways'] = dk.groupby(['Season','GameNumber', 'EventTeamCode', 'EventType', 'GoalNumber', 'Minute'])['Giveaway'].transform('sum')
dk['Goals'] = dk.groupby(['Season','GameNumber', 'EventTeamCode', 'EventType', 'GoalNumber', 'Minute'])['Goal'].transform('sum')
dk['Hits'] = dk.groupby(['Season','GameNumber', 'EventTeamCode', 'EventType', 'GoalNumber', 'Minute'])['Hit'].transform('sum')
dk['Misses'] = dk.groupby(['Season','GameNumber', 'EventTeamCode', 'EventType', 'GoalNumber', 'Minute'])['Miss'].transform('sum')
dk['Penalties'] = dk.groupby(['Season','GameNumber', 'EventTeamCode', 'EventType', 'GoalNumber', 'Minute'])['Penalty'].transform('sum')
dk['Shots'] = dk.groupby(['Season','GameNumber', 'EventTeamCode', 'EventType', 'GoalNumber', 'Minute'])['Shot'].transform('sum')
dk['Takeaways'] = dk.groupby(['Season','GameNumber', 'EventTeamCode', 'EventType', 'GoalNumber', 'Minute'])['Takeaway'].transform('sum')

In [223]:
dk.head()

Unnamed: 0,Season,GameNumber,EventNumber,AdvantageType,Period,EventType,EventTimeFromZero,VTeamCode,HTeamCode,EventTeamCode,GoalNumber,VGoalNumber,HGoalNumber,GoalTime,TimeFromGoal,Minute,GD,Goal,Block,Faceoff,Giveaway,Hit,Miss,Penalty,Shot,Takeaway,Blocks,Faceoffs,Giveaways,Goals,Hits,Misses,Penalties,Shots,Takeaways
0,2010,20001,1,EV,1,FAC,0,MTL,TOR,MTL,1.0,0.0,1.0,402.0,402.0,6,-1.0,,,1.0,,,,,,,,1.0,,,,,,,
1,2010,20001,3,EV,1,HIT,15,MTL,TOR,TOR,1.0,0.0,1.0,402.0,387.0,6,1.0,,,,,1.0,,,,,,,,,1.0,,,,
2,2010,20001,4,EV,1,HIT,46,MTL,TOR,MTL,1.0,0.0,1.0,402.0,356.0,6,-1.0,,,,,1.0,,,,,,,,,2.0,,,,
3,2010,20001,5,EV,1,HIT,57,MTL,TOR,MTL,1.0,0.0,1.0,402.0,345.0,6,-1.0,,,,,1.0,,,,,,,,,2.0,,,,
4,2010,20001,6,EV,1,GIVE,69,MTL,TOR,TOR,1.0,0.0,1.0,402.0,333.0,6,1.0,,,,1.0,,,,,,,,1.0,,,,,,


In [224]:
dk.shape

(178759, 35)

### reshape data wide to long.

In [225]:
dk = dk.rename(columns={'EventTeamCode': 'EventTeam', 'Zone': 'Z'})
a = [col for col in dk.columns if 'TeamCode' in col]
dk = pd.lreshape(dk, {'TeamCode' : a})
dk = dk.sort_values(['Season', 'GameNumber', 'EventNumber'], ascending=[True, True, True])
dk = dk.rename(columns={'EventTeam': 'EventTeamCode'})
dk.head()

Unnamed: 0,AdvantageType,Block,Blocks,EventNumber,EventTeamCode,EventTimeFromZero,EventType,Faceoff,Faceoffs,GD,GameNumber,Giveaway,Giveaways,Goal,GoalNumber,GoalTime,Goals,HGoalNumber,Hit,Hits,Minute,Miss,Misses,Penalties,Penalty,Period,Season,Shot,Shots,Takeaway,Takeaways,TimeFromGoal,VGoalNumber,TeamCode
0,EV,,,1,MTL,0,FAC,1.0,1.0,-1.0,20001,,,,1.0,402.0,,1.0,,,6,,,,,1,2010,,,,,402.0,0.0,MTL
178759,EV,,,1,MTL,0,FAC,1.0,1.0,-1.0,20001,,,,1.0,402.0,,1.0,,,6,,,,,1,2010,,,,,402.0,0.0,TOR
1,EV,,,3,TOR,15,HIT,,,1.0,20001,,,,1.0,402.0,,1.0,1.0,1.0,6,,,,,1,2010,,,,,387.0,0.0,MTL
178760,EV,,,3,TOR,15,HIT,,,1.0,20001,,,,1.0,402.0,,1.0,1.0,1.0,6,,,,,1,2010,,,,,387.0,0.0,TOR
2,EV,,,4,MTL,46,HIT,,,-1.0,20001,,,,1.0,402.0,,1.0,1.0,2.0,6,,,,,1,2010,,,,,356.0,0.0,MTL


In [226]:
dk.shape

(357518, 34)

- drop duplicates by season, game number, team code and event type.

In [227]:
dk = dk.drop_duplicates(['Season', 'GameNumber', 'TeamCode', 'EventTeamCode', 'EventType', 'GoalNumber'])
dk = dk [['Season', 'GameNumber', 'AdvantageType','Period', 'TeamCode', 'EventNumber', 'EventType', 'EventTeamCode', 'GoalNumber', 'GD', 'GoalTime', 'EventTimeFromZero', 'TimeFromGoal', 'Minute',  'Blocks', 'Faceoffs', 'Giveaways', 'Goals', 'Hits', 'Misses', 'Penalties', 'Shots', 'Takeaways']]
dk = dk.sort_values(['Season', 'GameNumber', 'EventNumber'], ascending=[True, True, True])
dk.shape

(138546, 23)

- assign all on-ice events to their respectful teams by zone. If team code is the same as event team code, then the on-ice event is assigned to that team. If not it is assigned to the opposing team. Each on-ice event generates two variables per team: For (F) and Against (A).

In [228]:
dk['Blocks_F'] = dk.apply(lambda x: x['Blocks'] if (x['TeamCode'] == x['EventTeamCode']) else np.nan, axis=1)
dk['Blocks_A'] = dk.apply(lambda x: x['Blocks'] if (x['TeamCode'] != x['EventTeamCode']) else np.nan, axis=1)
dk['Faceoffs_F'] = dk.apply(lambda x: x['Faceoffs'] if (x['TeamCode'] == x['EventTeamCode']) else np.nan, axis=1)
dk['Faceoffs_A'] = dk.apply(lambda x: x['Faceoffs'] if (x['TeamCode'] != x['EventTeamCode']) else np.nan, axis=1)
dk['Giveaways_F'] = dk.apply(lambda x: x['Giveaways'] if (x['TeamCode'] == x['EventTeamCode']) else np.nan, axis=1)
dk['Giveaways_A'] = dk.apply(lambda x: x['Giveaways'] if (x['TeamCode'] != x['EventTeamCode']) else np.nan, axis=1)
dk['Goals_F'] = dk.apply(lambda x: x['Goals'] if (x['TeamCode'] == x['EventTeamCode']) else np.nan, axis=1)
dk['Goals_A'] = dk.apply(lambda x: x['Goals'] if (x['TeamCode'] != x['EventTeamCode']) else np.nan, axis=1)
dk['Hits_F'] = dk.apply(lambda x: x['Hits'] if (x['TeamCode'] == x['EventTeamCode']) else np.nan, axis=1)
dk['Hits_A'] = dk.apply(lambda x: x['Hits'] if (x['TeamCode'] != x['EventTeamCode']) else np.nan, axis=1)
dk['Miss_F'] = dk.apply(lambda x: x['Misses'] if (x['TeamCode'] == x['EventTeamCode']) else np.nan, axis=1)
dk['Miss_A'] = dk.apply(lambda x: x['Misses'] if (x['TeamCode'] != x['EventTeamCode']) else np.nan, axis=1)
dk['Penalties_F'] = dk.apply(lambda x: x['Penalties'] if (x['TeamCode'] == x['EventTeamCode']) else np.nan, axis=1)
dk['Penalties_A'] = dk.apply(lambda x: x['Penalties'] if (x['TeamCode'] != x['EventTeamCode']) else np.nan, axis=1)
dk['Shots_F'] = dk.apply(lambda x: x['Shots'] if (x['TeamCode'] == x['EventTeamCode']) else np.nan, axis=1)
dk['Shots_A'] = dk.apply(lambda x: x['Shots'] if (x['TeamCode'] != x['EventTeamCode']) else np.nan, axis=1)
dk['Takeaways_F'] = dk.apply(lambda x: x['Takeaways'] if (x['TeamCode'] == x['EventTeamCode']) else np.nan, axis=1)
dk['Takeaways_A'] = dk.apply(lambda x: x['Takeaways'] if (x['TeamCode'] != x['EventTeamCode']) else np.nan, axis=1)
dk = dk.sort_values(['Season', 'GameNumber', 'EventNumber'], ascending=[True, True, True])
dk.head()

Unnamed: 0,Season,GameNumber,AdvantageType,Period,TeamCode,EventNumber,EventType,EventTeamCode,GoalNumber,GD,GoalTime,EventTimeFromZero,TimeFromGoal,Minute,Blocks,Faceoffs,Giveaways,Goals,Hits,Misses,Penalties,Shots,Takeaways,Blocks_F,Blocks_A,Faceoffs_F,Faceoffs_A,Giveaways_F,Giveaways_A,Goals_F,Goals_A,Hits_F,Hits_A,Miss_F,Miss_A,Penalties_F,Penalties_A,Shots_F,Shots_A,Takeaways_F,Takeaways_A
0,2010,20001,EV,1,MTL,1,FAC,MTL,1.0,-1.0,402.0,0,402.0,6,,1.0,,,,,,,,,,1.0,,,,,,,,,,,,,,,
178759,2010,20001,EV,1,TOR,1,FAC,MTL,1.0,-1.0,402.0,0,402.0,6,,1.0,,,,,,,,,,,1.0,,,,,,,,,,,,,,
1,2010,20001,EV,1,MTL,3,HIT,TOR,1.0,1.0,402.0,15,387.0,6,,,,,1.0,,,,,,,,,,,,,,1.0,,,,,,,,
178760,2010,20001,EV,1,TOR,3,HIT,TOR,1.0,1.0,402.0,15,387.0,6,,,,,1.0,,,,,,,,,,,,,1.0,,,,,,,,,
2,2010,20001,EV,1,MTL,4,HIT,MTL,1.0,-1.0,402.0,46,356.0,6,,,,,2.0,,,,,,,,,,,,,2.0,,,,,,,,,


- backward and forward fill of on-ice events by season, game number and team code.

In [229]:
dk['Blocks_F'] = dk.groupby(['Season','GameNumber', 'TeamCode', 'GoalNumber', 'Minute'])['Blocks_F'].apply(lambda x: x.ffill().bfill())
dk['Faceoffs_F'] = dk.groupby(['Season','GameNumber', 'TeamCode', 'GoalNumber', 'Minute'])['Faceoffs_F'].apply(lambda x: x.ffill().bfill())
dk['Giveaways_F'] = dk.groupby(['Season','GameNumber', 'TeamCode',  'GoalNumber', 'Minute'])['Giveaways_F'].apply(lambda x: x.ffill().bfill())
dk['Goals_F'] = dk.groupby(['Season','GameNumber', 'TeamCode' , 'GoalNumber', 'Minute'])['Goals_F'].apply(lambda x: x.ffill().bfill())
dk['Hits_F'] = dk.groupby(['Season','GameNumber', 'TeamCode', 'GoalNumber', 'Minute'])['Hits_F'].apply(lambda x: x.ffill().bfill())
dk['Miss_F'] = dk.groupby(['Season','GameNumber', 'TeamCode', 'GoalNumber', 'Minute'])['Miss_F'].apply(lambda x: x.ffill().bfill())
dk['Penalties_F'] = dk.groupby(['Season','GameNumber', 'TeamCode', 'GoalNumber', 'Minute'])['Penalties_F'].apply(lambda x: x.ffill().bfill())
dk['Shots_F'] = dk.groupby(['Season','GameNumber', 'TeamCode', 'GoalNumber', 'Minute'])['Shots_F'].apply(lambda x: x.ffill().bfill())
dk['Takeaways_F'] = dk.groupby(['Season','GameNumber', 'TeamCode', 'GoalNumber', 'Minute'])['Takeaways_F'].apply(lambda x: x.ffill().bfill())
dk['Blocks_A'] = dk.groupby(['Season','GameNumber', 'TeamCode',  'GoalNumber', 'Minute'])['Blocks_A'].apply(lambda x: x.ffill().bfill())
dk['Faceoffs_A'] = dk.groupby(['Season','GameNumber', 'TeamCode', 'GoalNumber', 'Minute'])['Faceoffs_A'].apply(lambda x: x.ffill().bfill())
dk['Giveaways_A'] = dk.groupby(['Season','GameNumber', 'TeamCode', 'GoalNumber', 'Minute'])['Giveaways_A'].apply(lambda x: x.ffill().bfill())
dk['Goals_A'] = dk.groupby(['Season','GameNumber', 'TeamCode',  'GoalNumber', 'Minute'])['Goals_A'].apply(lambda x: x.ffill().bfill())
dk['Hits_A'] = dk.groupby(['Season','GameNumber', 'TeamCode',  'GoalNumber', 'Minute'])['Hits_A'].apply(lambda x: x.ffill().bfill())
dk['Miss_A'] = dk.groupby(['Season','GameNumber', 'TeamCode',  'GoalNumber'])['Miss_A'].apply(lambda x: x.ffill().bfill())
dk['Penalties_A'] = dk.groupby(['Season','GameNumber', 'TeamCode',  'GoalNumber', 'Minute'])['Penalties_A'].apply(lambda x: x.ffill().bfill())
dk['Shots_A'] = dk.groupby(['Season','GameNumber', 'TeamCode',  'GoalNumber', 'Minute'])['Shots_A'].apply(lambda x: x.ffill().bfill())
dk['Takeaways_A'] = dk.groupby(['Season','GameNumber', 'TeamCode', 'GoalNumber', 'Minute'])['Takeaways_A'].apply(lambda x: x.ffill().bfill())
dk = dk.sort_values(['Season', 'GameNumber', 'EventNumber'], ascending=[True, True, True])
dk = dk.fillna(0)
dk.head()

Unnamed: 0,Season,GameNumber,AdvantageType,Period,TeamCode,EventNumber,EventType,EventTeamCode,GoalNumber,GD,GoalTime,EventTimeFromZero,TimeFromGoal,Minute,Blocks,Faceoffs,Giveaways,Goals,Hits,Misses,Penalties,Shots,Takeaways,Blocks_F,Blocks_A,Faceoffs_F,Faceoffs_A,Giveaways_F,Giveaways_A,Goals_F,Goals_A,Hits_F,Hits_A,Miss_F,Miss_A,Penalties_F,Penalties_A,Shots_F,Shots_A,Takeaways_F,Takeaways_A
0,2010,20001,EV,1,MTL,1,FAC,MTL,1.0,-1.0,402.0,0,402.0,6,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,1.0,0.0,0.0,1.0,0.0,0.0,2.0,1.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0
178759,2010,20001,EV,1,TOR,1,FAC,MTL,1.0,-1.0,402.0,0,402.0,6,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,2.0,0.0,1.0,0.0,0.0,0.0,2.0,0.0,0.0
1,2010,20001,EV,1,MTL,3,HIT,TOR,1.0,1.0,402.0,15,387.0,6,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,2.0,1.0,0.0,0.0,1.0,0.0,0.0,2.0,1.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0
178760,2010,20001,EV,1,TOR,3,HIT,TOR,1.0,1.0,402.0,15,387.0,6,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,2.0,0.0,1.0,0.0,0.0,0.0,2.0,0.0,0.0
2,2010,20001,EV,1,MTL,4,HIT,MTL,1.0,-1.0,402.0,46,356.0,6,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,2.0,1.0,0.0,0.0,1.0,0.0,0.0,2.0,1.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0


- keep only relative columns and drop duplicates by season, gamenumber and teamcode, to have two observations per game.

In [230]:
dk = dk[['Season', 'GameNumber', 'TeamCode', 'GoalNumber', 'GD', 'Minute', 'Blocks_F', 'Blocks_A', 'Faceoffs_F', 'Faceoffs_A', 'Giveaways_F', 'Giveaways_A', 'Goals_F', 'Goals_A', 'Hits_F', 'Hits_A', 'Miss_F', 'Miss_A', 'Penalties_F', 'Penalties_A', 'Shots_F', 'Shots_A', 'Takeaways_F', 'Takeaways_A']]
dk = dk.sort_values(['Season', 'GameNumber'], ascending=[True, True])
dk = dk.drop_duplicates(['Season', 'GameNumber', 'TeamCode', 'GoalNumber', 'GD', 'Minute'])
dk.head()

Unnamed: 0,Season,GameNumber,TeamCode,GoalNumber,GD,Minute,Blocks_F,Blocks_A,Faceoffs_F,Faceoffs_A,Giveaways_F,Giveaways_A,Goals_F,Goals_A,Hits_F,Hits_A,Miss_F,Miss_A,Penalties_F,Penalties_A,Shots_F,Shots_A,Takeaways_F,Takeaways_A
0,2010,20001,MTL,1.0,-1.0,6,0.0,2.0,1.0,0.0,0.0,1.0,0.0,0.0,2.0,1.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0
178759,2010,20001,TOR,1.0,-1.0,6,2.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,2.0,0.0,1.0,0.0,0.0,0.0,2.0,0.0,0.0
1,2010,20001,MTL,1.0,1.0,6,0.0,2.0,1.0,0.0,0.0,1.0,0.0,0.0,2.0,1.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0
178760,2010,20001,TOR,1.0,1.0,6,2.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,2.0,0.0,1.0,0.0,0.0,0.0,2.0,0.0,0.0
9,2010,20001,MTL,1.0,-1.0,5,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [231]:
dk.shape

(62986, 24)

In [232]:
dk.isnull().sum()

Season         0
GameNumber     0
TeamCode       0
GoalNumber     0
GD             0
Minute         0
Blocks_F       0
Blocks_A       0
Faceoffs_F     0
Faceoffs_A     0
Giveaways_F    0
Giveaways_A    0
Goals_F        0
Goals_A        0
Hits_F         0
Hits_A         0
Miss_F         0
Miss_A         0
Penalties_F    0
Penalties_A    0
Shots_F        0
Shots_A        0
Takeaways_F    0
Takeaways_A    0
dtype: int64

- group by season, team code and goal differential to compute the mean and sum of each on-ice events while score differential was the same throughout the season.

In [233]:
#dk['MBlocks_F'] = dk.groupby(['Season', 'TeamCode',  'GD', 'Minute'])['Blocks_F'].transform('mean')
#dk['MFaceoffs_F'] = dk.groupby(['Season', 'TeamCode',  'GD', 'Minute'])['Faceoffs_F'].transform('mean')
#dk['MGiveaways_F'] = dk.groupby(['Season', 'TeamCode', 'GD', 'Minute'])['Giveaways_F'].transform('mean')
#dk['MGoals_F'] = dk.groupby(['Season', 'TeamCode', 'GD', 'Minute'])['Goals_F'].transform('mean')
#dk['MHits_F'] = dk.groupby(['Season', 'TeamCode',  'GD', 'Minute'])['Hits_F'].transform('mean')
#dk['MMiss_F'] = dk.groupby(['Season', 'TeamCode',  'GD', 'Minute'])['Miss_F'].transform('mean')
#dk['MPenalties_F'] = dk.groupby(['Season', 'TeamCode',  'GD', 'Minute'])['Penalties_F'].transform('mean')
#dk['MShots_F'] = dk.groupby(['Season', 'TeamCode',  'GD', 'Minute'])['Shots_F'].transform('mean')
#dk['MTakeaways_F'] = dk.groupby(['Season', 'TeamCode', 'GD', 'Minute'])['Takeaways_F'].transform('mean')
#dk['MBlocks_A'] = dk.groupby(['Season', 'TeamCode', 'GD', 'Minute'])['Blocks_A'].transform('mean')
#dk['MFaceoffs_A'] = dk.groupby(['Season', 'TeamCode', 'GD', 'Minute'])['Faceoffs_A'].transform('mean')
#dk['MGiveaways_A'] = dk.groupby(['Season', 'TeamCode',  'GD', 'Minute'])['Giveaways_A'].transform('mean')
#dk['MGoals_A'] = dk.groupby(['Season', 'TeamCode', 'GD', 'Minute'])['Goals_A'].transform('mean')
#dk['MHits_A'] = dk.groupby(['Season', 'TeamCode',  'GD', 'Minute'])['Hits_A'].transform('mean')
#dk['MMiss_A'] = dk.groupby(['Season', 'TeamCode',  'GD', 'Minute'])['Miss_A'].transform('mean')
#dk['MPenalties_A'] = dk.groupby(['Season', 'TeamCode',  'GD', 'Minute'])['Penalties_A'].transform('mean')
#dk['MShots_A'] = dk.groupby(['Season', 'TeamCode', 'GD', 'Minute'])['Shots_A'].transform('mean')
#dk['MTakeaways_A'] = dk.groupby(['Season', 'TeamCode',  'GD', 'Minute'])['Takeaways_A'].transform('mean')
#dk.head()

In [234]:
dk['MBlocks_F'] = dk.groupby(['Season', 'TeamCode',  'GD', 'Minute'])['Blocks_F'].transform('sum')
dk['MFaceoffs_F'] = dk.groupby(['Season', 'TeamCode',  'GD', 'Minute'])['Faceoffs_F'].transform('sum')
dk['MGiveaways_F'] = dk.groupby(['Season', 'TeamCode', 'GD', 'Minute'])['Giveaways_F'].transform('sum')
dk['MGoals_F'] = dk.groupby(['Season', 'TeamCode', 'GD', 'Minute'])['Goals_F'].transform('sum')
dk['MHits_F'] = dk.groupby(['Season', 'TeamCode',  'GD', 'Minute'])['Hits_F'].transform('sum')
dk['MMiss_F'] = dk.groupby(['Season', 'TeamCode',  'GD', 'Minute'])['Miss_F'].transform('sum')
dk['MPenalties_F'] = dk.groupby(['Season', 'TeamCode',  'GD', 'Minute'])['Penalties_F'].transform('sum')
dk['MShots_F'] = dk.groupby(['Season', 'TeamCode',  'GD', 'Minute'])['Shots_F'].transform('sum')
dk['MTakeaways_F'] = dk.groupby(['Season', 'TeamCode', 'GD', 'Minute'])['Takeaways_F'].transform('sum')
dk['MBlocks_A'] = dk.groupby(['Season', 'TeamCode', 'GD', 'Minute'])['Blocks_A'].transform('sum')
dk['MFaceoffs_A'] = dk.groupby(['Season', 'TeamCode', 'GD', 'Minute'])['Faceoffs_A'].transform('sum')
dk['MGiveaways_A'] = dk.groupby(['Season', 'TeamCode',  'GD', 'Minute'])['Giveaways_A'].transform('sum')
dk['MGoals_A'] = dk.groupby(['Season', 'TeamCode', 'GD', 'Minute'])['Goals_A'].transform('sum')
dk['MHits_A'] = dk.groupby(['Season', 'TeamCode',  'GD', 'Minute'])['Hits_A'].transform('sum')
dk['MMiss_A'] = dk.groupby(['Season', 'TeamCode',  'GD', 'Minute'])['Miss_A'].transform('sum')
dk['MPenalties_A'] = dk.groupby(['Season', 'TeamCode',  'GD', 'Minute'])['Penalties_A'].transform('sum')
dk['MShots_A'] = dk.groupby(['Season', 'TeamCode', 'GD', 'Minute'])['Shots_A'].transform('sum')
dk['MTakeaways_A'] = dk.groupby(['Season', 'TeamCode',  'GD', 'Minute'])['Takeaways_A'].transform('sum')
dk.head()

Unnamed: 0,Season,GameNumber,TeamCode,GoalNumber,GD,Minute,Blocks_F,Blocks_A,Faceoffs_F,Faceoffs_A,Giveaways_F,Giveaways_A,Goals_F,Goals_A,Hits_F,Hits_A,Miss_F,Miss_A,Penalties_F,Penalties_A,Shots_F,Shots_A,Takeaways_F,Takeaways_A,MBlocks_F,MFaceoffs_F,MGiveaways_F,MGoals_F,MHits_F,MMiss_F,MPenalties_F,MShots_F,MTakeaways_F,MBlocks_A,MFaceoffs_A,MGiveaways_A,MGoals_A,MHits_A,MMiss_A,MPenalties_A,MShots_A,MTakeaways_A
0,2010,20001,MTL,1.0,-1.0,6,0.0,2.0,1.0,0.0,0.0,1.0,0.0,0.0,2.0,1.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,126.0,277.0,104.0,0.0,220.0,117.0,50.0,291.0,52.0,137.0,290.0,87.0,0.0,221.0,137.0,31.0,275.0,70.0
178759,2010,20001,TOR,1.0,-1.0,6,2.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,2.0,0.0,1.0,0.0,0.0,0.0,2.0,0.0,0.0,132.0,260.0,102.0,0.0,217.0,91.0,32.0,219.0,68.0,126.0,243.0,101.0,0.0,226.0,170.0,40.0,231.0,68.0
1,2010,20001,MTL,1.0,1.0,6,0.0,2.0,1.0,0.0,0.0,1.0,0.0,0.0,2.0,1.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,126.0,278.0,104.0,0.0,217.0,115.0,50.0,289.0,52.0,137.0,290.0,87.0,0.0,221.0,139.0,31.0,275.0,70.0
178760,2010,20001,TOR,1.0,1.0,6,2.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,2.0,0.0,1.0,0.0,0.0,0.0,2.0,0.0,0.0,132.0,260.0,102.0,0.0,217.0,91.0,32.0,219.0,68.0,125.0,242.0,99.0,0.0,225.0,168.0,40.0,230.0,68.0
9,2010,20001,MTL,1.0,-1.0,5,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,15.0,7.0,6.0,0.0,10.0,12.0,5.0,16.0,11.0,6.0,8.0,11.0,0.0,17.0,56.0,4.0,13.0,5.0


- drop duplicates by season, team code and goal differential.

In [235]:
dk = dk.drop_duplicates(['Season', 'TeamCode', 'GD', 'Minute'])
dk = dk [['Season', 'TeamCode', 'Minute', 'GD','MBlocks_F', 'MFaceoffs_F', 'MGiveaways_F', 'MGoals_F','MHits_F', 'MMiss_F', 'MPenalties_F', 'MShots_F', 'MTakeaways_F','MBlocks_A', 'MFaceoffs_A', 'MGiveaways_A', 'MGoals_A', 'MHits_A','MMiss_A', 'MPenalties_A', 'MShots_A', 'MTakeaways_A']]
dk.head()

Unnamed: 0,Season,TeamCode,Minute,GD,MBlocks_F,MFaceoffs_F,MGiveaways_F,MGoals_F,MHits_F,MMiss_F,MPenalties_F,MShots_F,MTakeaways_F,MBlocks_A,MFaceoffs_A,MGiveaways_A,MGoals_A,MHits_A,MMiss_A,MPenalties_A,MShots_A,MTakeaways_A
0,2010,MTL,6,-1.0,126.0,277.0,104.0,0.0,220.0,117.0,50.0,291.0,52.0,137.0,290.0,87.0,0.0,221.0,137.0,31.0,275.0,70.0
178759,2010,TOR,6,-1.0,132.0,260.0,102.0,0.0,217.0,91.0,32.0,219.0,68.0,126.0,243.0,101.0,0.0,226.0,170.0,40.0,231.0,68.0
1,2010,MTL,6,1.0,126.0,278.0,104.0,0.0,217.0,115.0,50.0,289.0,52.0,137.0,290.0,87.0,0.0,221.0,139.0,31.0,275.0,70.0
178760,2010,TOR,6,1.0,132.0,260.0,102.0,0.0,217.0,91.0,32.0,219.0,68.0,125.0,242.0,99.0,0.0,225.0,168.0,40.0,230.0,68.0
9,2010,MTL,5,-1.0,15.0,7.0,6.0,0.0,10.0,12.0,5.0,16.0,11.0,6.0,8.0,11.0,0.0,17.0,56.0,4.0,13.0,5.0


### summary analysis

- mean and sum of all events prior to a goal by minute

In [236]:
#dk['TBlocks_F'] = dk.groupby(['Season',  'GD', 'Minute'])['MBlocks_F'].transform('mean')
#dk['TFaceoffs_F'] = dk.groupby(['Season',  'GD', 'Minute'])['MFaceoffs_F'].transform('mean')
#dk['TGiveaways_F'] = dk.groupby(['Season',  'GD', 'Minute'])['MGiveaways_F'].transform('mean')
#dk['TGoals_F'] = dk.groupby(['Season', 'GD', 'Minute'])['MGoals_F'].transform('mean')
#dk['THits_F'] = dk.groupby(['Season', 'GD', 'Minute'])['MHits_F'].transform('mean')
#dk['TMiss_F'] = dk.groupby(['Season', 'GD', 'Minute'])['MMiss_F'].transform('mean')
#dk['TPenalties_F'] = dk.groupby(['Season', 'GD', 'Minute'])['MPenalties_F'].transform('mean')
#dk['TShots_F'] = dk.groupby(['Season',  'GD', 'Minute'])['MShots_F'].transform('mean')
#dk['TTakeaways_F'] = dk.groupby(['Season', 'GD', 'Minute'])['MTakeaways_F'].transform('mean')
#dk['TBlocks_A'] = dk.groupby(['Season',  'GD', 'Minute'])['MBlocks_A'].transform('mean')
#dk['TFaceoffs_A'] = dk.groupby(['Season',  'GD', 'Minute'])['MFaceoffs_A'].transform('mean')
#dk['TGiveaways_A'] = dk.groupby(['Season',  'GD', 'Minute'])['MGiveaways_A'].transform('mean')
#dk['TGoals_A'] = dk.groupby(['Season', 'GD', 'Minute'])['MGoals_A'].transform('mean')
#dk['THits_A'] = dk.groupby(['Season',  'GD', 'Minute'])['MHits_A'].transform('mean')
#dk['TMiss_A'] = dk.groupby(['Season', 'GD', 'Minute'])['MMiss_A'].transform('mean')
#dk['TPenalties_A'] = dk.groupby(['Season', 'GD', 'Minute'])['MPenalties_A'].transform('mean')
#dk['TShots_A'] = dk.groupby(['Season', 'GD', 'Minute'])['MShots_A'].transform('mean')
#dk['TTakeaways_A'] = dk.groupby(['Season',  'GD', 'Minute'])['MTakeaways_A'].transform('mean')
#dk.head()

In [237]:
dk['TBlocks_F'] = dk.groupby(['Season',  'GD', 'Minute'])['MBlocks_F'].transform('sum')
dk['TFaceoffs_F'] = dk.groupby(['Season',  'GD', 'Minute'])['MFaceoffs_F'].transform('sum')
dk['TGiveaways_F'] = dk.groupby(['Season',  'GD', 'Minute'])['MGiveaways_F'].transform('sum')
dk['TGoals_F'] = dk.groupby(['Season', 'GD', 'Minute'])['MGoals_F'].transform('sum')
dk['THits_F'] = dk.groupby(['Season', 'GD', 'Minute'])['MHits_F'].transform('sum')
dk['TMiss_F'] = dk.groupby(['Season', 'GD', 'Minute'])['MMiss_F'].transform('sum')
dk['TPenalties_F'] = dk.groupby(['Season', 'GD', 'Minute'])['MPenalties_F'].transform('sum')
dk['TShots_F'] = dk.groupby(['Season',  'GD', 'Minute'])['MShots_F'].transform('sum')
dk['TTakeaways_F'] = dk.groupby(['Season', 'GD', 'Minute'])['MTakeaways_F'].transform('sum')
dk['TBlocks_A'] = dk.groupby(['Season',  'GD', 'Minute'])['MBlocks_A'].transform('sum')
dk['TFaceoffs_A'] = dk.groupby(['Season',  'GD', 'Minute'])['MFaceoffs_A'].transform('sum')
dk['TGiveaways_A'] = dk.groupby(['Season',  'GD', 'Minute'])['MGiveaways_A'].transform('sum')
dk['TGoals_A'] = dk.groupby(['Season', 'GD', 'Minute'])['MGoals_A'].transform('sum')
dk['THits_A'] = dk.groupby(['Season',  'GD', 'Minute'])['MHits_A'].transform('sum')
dk['TMiss_A'] = dk.groupby(['Season', 'GD', 'Minute'])['MMiss_A'].transform('sum')
dk['TPenalties_A'] = dk.groupby(['Season', 'GD', 'Minute'])['MPenalties_A'].transform('sum')
dk['TShots_A'] = dk.groupby(['Season', 'GD', 'Minute'])['MShots_A'].transform('sum')
dk['TTakeaways_A'] = dk.groupby(['Season',  'GD', 'Minute'])['MTakeaways_A'].transform('sum')
dk.head()

Unnamed: 0,Season,TeamCode,Minute,GD,MBlocks_F,MFaceoffs_F,MGiveaways_F,MGoals_F,MHits_F,MMiss_F,MPenalties_F,MShots_F,MTakeaways_F,MBlocks_A,MFaceoffs_A,MGiveaways_A,MGoals_A,MHits_A,MMiss_A,MPenalties_A,MShots_A,MTakeaways_A,TBlocks_F,TFaceoffs_F,TGiveaways_F,TGoals_F,THits_F,TMiss_F,TPenalties_F,TShots_F,TTakeaways_F,TBlocks_A,TFaceoffs_A,TGiveaways_A,TGoals_A,THits_A,TMiss_A,TPenalties_A,TShots_A,TTakeaways_A
0,2010,MTL,6,-1.0,126.0,277.0,104.0,0.0,220.0,117.0,50.0,291.0,52.0,137.0,290.0,87.0,0.0,221.0,137.0,31.0,275.0,70.0,3707.0,7746.0,2277.0,0.0,6505.0,3050.0,970.0,7274.0,1973.0,3707.0,7746.0,2277.0,0.0,6505.0,3898.0,970.0,7274.0,1973.0
178759,2010,TOR,6,-1.0,132.0,260.0,102.0,0.0,217.0,91.0,32.0,219.0,68.0,126.0,243.0,101.0,0.0,226.0,170.0,40.0,231.0,68.0,3707.0,7746.0,2277.0,0.0,6505.0,3050.0,970.0,7274.0,1973.0,3707.0,7746.0,2277.0,0.0,6505.0,3898.0,970.0,7274.0,1973.0
1,2010,MTL,6,1.0,126.0,278.0,104.0,0.0,217.0,115.0,50.0,289.0,52.0,137.0,290.0,87.0,0.0,221.0,139.0,31.0,275.0,70.0,3705.0,7766.0,2276.0,0.0,6501.0,3053.0,970.0,7273.0,1975.0,3705.0,7766.0,2276.0,0.0,6501.0,3902.0,970.0,7273.0,1975.0
178760,2010,TOR,6,1.0,132.0,260.0,102.0,0.0,217.0,91.0,32.0,219.0,68.0,125.0,242.0,99.0,0.0,225.0,168.0,40.0,230.0,68.0,3705.0,7766.0,2276.0,0.0,6501.0,3053.0,970.0,7273.0,1975.0,3705.0,7766.0,2276.0,0.0,6501.0,3902.0,970.0,7273.0,1975.0
9,2010,MTL,5,-1.0,15.0,7.0,6.0,0.0,10.0,12.0,5.0,16.0,11.0,6.0,8.0,11.0,0.0,17.0,56.0,4.0,13.0,5.0,262.0,381.0,165.0,0.0,342.0,196.0,132.0,370.0,149.0,262.0,381.0,165.0,0.0,342.0,1547.0,132.0,370.0,149.0


In [238]:
dk = dk[['GD',  'Minute', 'TBlocks_F', 'TFaceoffs_F', 'TGiveaways_F', 'TGoals_F', 'THits_F', 'TMiss_F', 'TPenalties_F', 'TShots_F', 'TTakeaways_F', 'TBlocks_A', 'TFaceoffs_A', 'TGiveaways_A', 'TGoals_A', 'THits_A', 'TMiss_A', 'TPenalties_A', 'TShots_A', 'TTakeaways_A']]
dk = dk.drop_duplicates(['GD',  'Minute'])
dk = dk.sort_values(['GD'], ascending=[False])
#dk.set_index(['GD', 'Minute'])

In [239]:
#dk = pd.pivot_table(dk, values=(['TBlocks_F', 'TFaceoffs_F', 'TGiveaways_F', 'TGoals_F', 'THits_F', 'TMiss_F', 'TPenalties_F', 'TShots_F', 'TTakeaways_F', 'TBlocks_A', 'TFaceoffs_A', 'TGiveaways_A', 'TGoals_A', 'THits_A', 'TMiss_A', 'TPenalties_A', 'TShots_A', 'TTakeaways_A']), index=['GD'], columns=['Zone'])
#dk.head()

### display all events that happened by minute 

1) display all events that happened a minute prior to a goal

In [240]:
dm1 = dk[dk['Minute'] ==1]
dm1a = pd.pivot_table(dm1, values=(['TShots_F', 'TShots_A', 'TMiss_F', 'TMiss_A', 'TBlocks_F', 'TBlocks_A', 'TFaceoffs_F', 'TFaceoffs_A']), index=['GD'])
dm1a = dm1a.fillna(0)
dm1a.head()

Unnamed: 0_level_0,TBlocks_A,TBlocks_F,TFaceoffs_A,TFaceoffs_F,TMiss_A,TMiss_F,TShots_A,TShots_F
GD,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
-8.0,0.0,0.0,0.0,0.0,2.0,0.0,2.0,2.0
-7.0,4.0,4.0,2.0,2.0,11.0,1.0,5.0,5.0
-6.0,14.0,14.0,10.0,10.0,39.0,8.0,11.0,11.0
-5.0,19.0,19.0,18.0,18.0,59.0,20.0,36.0,36.0
-4.0,44.0,44.0,55.0,55.0,170.0,49.0,79.0,79.0


In [241]:
beginningtex = """\\documentclass{report}
\\usepackage{booktabs}
\\begin{document}"""
endtex = "\end{document}"

f = open('/Users/stefanostselios/Brock University/Kevin Mongeon - StephanosShare/out/latex/events/minutes/1/sum_all_on_ice_events_a_minute_prior_to_a_goal_1.tex', 'w')
f.write(beginningtex)
f.write(dm1a.to_latex())
f.write(endtex)
f.close()

In [242]:
dm1b = pd.pivot_table(dm1, values=(['TPenalties_F', 'TPenalties_A', 'THits_F', 'THits_A', 'TGiveaways_F', 'TGiveaways_A', 'TTakeaways_F', 'TTakeaways_A' ]), index=['GD'])
dm1b = dm1b.fillna(0)
dm1b.head()

Unnamed: 0_level_0,TGiveaways_A,TGiveaways_F,THits_A,THits_F,TPenalties_A,TPenalties_F,TTakeaways_A,TTakeaways_F
GD,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
-8.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0
-7.0,1.0,1.0,3.0,3.0,1.0,1.0,2.0,2.0
-6.0,8.0,8.0,10.0,10.0,14.0,14.0,8.0,8.0
-5.0,25.0,25.0,23.0,23.0,39.0,39.0,10.0,10.0
-4.0,42.0,42.0,45.0,45.0,55.0,55.0,30.0,30.0


In [243]:
beginningtex = """\\documentclass{report}
\\usepackage{booktabs}
\\begin{document}"""
endtex = "\end{document}"

f = open('/Users/stefanostselios/Brock University/Kevin Mongeon - StephanosShare/out/latex/events/minutes/1/sum_all_on_ice_events_a_minute_prior_to_a_goal_2.tex', 'w')
f.write(beginningtex)
f.write(dm1b.to_latex())
f.write(endtex)
f.close()

2) display all events that happened between a minute and 2 minutes prior to a goal.

In [244]:
dm2 = dk[dk['Minute'] ==2]
dm2a = pd.pivot_table(dm2, values=(['TShots_F', 'TShots_A', 'TMiss_F', 'TMiss_A', 'TBlocks_F', 'TBlocks_A', 'TFaceoffs_F', 'TFaceoffs_A']), index=['GD'])
dm2a = dm2a.fillna(0)
dm2a.head()

Unnamed: 0_level_0,TBlocks_A,TBlocks_F,TFaceoffs_A,TFaceoffs_F,TMiss_A,TMiss_F,TShots_A,TShots_F
GD,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
-7.0,7.0,7.0,5.0,5.0,14.0,2.0,2.0,2.0
-6.0,11.0,11.0,21.0,21.0,41.0,15.0,19.0,19.0
-5.0,18.0,18.0,36.0,36.0,50.0,10.0,36.0,36.0
-4.0,30.0,30.0,59.0,59.0,141.0,30.0,64.0,64.0
-3.0,77.0,77.0,119.0,119.0,379.0,89.0,162.0,162.0


In [245]:
beginningtex = """\\documentclass{report}
\\usepackage{booktabs}
\\begin{document}"""
endtex = "\end{document}"

f = open('/Users/stefanostselios/Brock University/Kevin Mongeon - StephanosShare/out/latex/events/minutes/2/sum_all_on_ice_events_2_minutes_prior_to_a_goal_1.tex', 'w')
f.write(beginningtex)
f.write(dm2a.to_latex())
f.write(endtex)
f.close()

In [246]:
dm2b = pd.pivot_table(dm2, values=(['TPenalties_F', 'TPenalties_A', 'THits_F', 'THits_A', 'TGiveaways_F', 'TGiveaways_A', 'TTakeaways_F', 'TTakeaways_A' ]), index=['GD'])
dm2b = dm2b.fillna(0)
dm2b.head()

Unnamed: 0_level_0,TGiveaways_A,TGiveaways_F,THits_A,THits_F,TPenalties_A,TPenalties_F,TTakeaways_A,TTakeaways_F
GD,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
-7.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0
-6.0,8.0,8.0,10.0,10.0,4.0,4.0,3.0,3.0
-5.0,6.0,6.0,25.0,25.0,25.0,25.0,10.0,10.0
-4.0,25.0,25.0,54.0,54.0,40.0,40.0,25.0,25.0
-3.0,64.0,64.0,102.0,102.0,87.0,87.0,61.0,61.0


In [247]:
beginningtex = """\\documentclass{report}
\\usepackage{booktabs}
\\begin{document}"""
endtex = "\end{document}"

f = open('/Users/stefanostselios/Brock University/Kevin Mongeon - StephanosShare/out/latex/events/minutes/2/sum_all_on_ice_events_2_minutes_prior_to_a_goal_2.tex', 'w')
f.write(beginningtex)
f.write(dm2b.to_latex())
f.write(endtex)
f.close()

3) display all events that happened between 2 minutes and 3 minutes prior to a goal.

In [248]:
dm3 = dk[dk['Minute'] ==3]
dm3a = pd.pivot_table(dm3, values=(['TShots_F', 'TShots_A', 'TMiss_F', 'TMiss_A', 'TBlocks_F', 'TBlocks_A', 'TFaceoffs_F', 'TFaceoffs_A']), index=['GD'])
dm3a = dm3a.fillna(0)
dm3a.head()

Unnamed: 0_level_0,TBlocks_A,TBlocks_F,TFaceoffs_A,TFaceoffs_F,TMiss_A,TMiss_F,TShots_A,TShots_F
GD,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
-7.0,1.0,1.0,0.0,0.0,8.0,1.0,2.0,2.0
-6.0,6.0,6.0,9.0,9.0,39.0,3.0,5.0,5.0
-5.0,16.0,16.0,28.0,28.0,54.0,11.0,22.0,22.0
-4.0,29.0,29.0,55.0,55.0,101.0,20.0,42.0,42.0
-3.0,85.0,85.0,114.0,114.0,383.0,86.0,112.0,112.0


In [249]:
beginningtex = """\\documentclass{report}
\\usepackage{booktabs}
\\begin{document}"""
endtex = "\end{document}"

f = open('/Users/stefanostselios/Brock University/Kevin Mongeon - StephanosShare/out/latex/events/minutes/3/sum_all_on_ice_events_3_minutes_prior_to_a_goal_1.tex', 'w')
f.write(beginningtex)
f.write(dm3a.to_latex())
f.write(endtex)
f.close()

In [250]:
dm3b = pd.pivot_table(dm3, values=(['TPenalties_F', 'TPenalties_A', 'THits_F', 'THits_A', 'TGiveaways_F', 'TGiveaways_A', 'TTakeaways_F', 'TTakeaways_A' ]), index=['GD'])
dm3b = dm3b.fillna(0)
dm3b.head()

Unnamed: 0_level_0,TGiveaways_A,TGiveaways_F,THits_A,THits_F,TPenalties_A,TPenalties_F,TTakeaways_A,TTakeaways_F
GD,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
-7.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0
-6.0,4.0,4.0,9.0,9.0,3.0,3.0,3.0,3.0
-5.0,6.0,6.0,14.0,14.0,6.0,6.0,9.0,9.0
-4.0,18.0,18.0,33.0,33.0,19.0,19.0,14.0,14.0
-3.0,68.0,68.0,96.0,96.0,38.0,38.0,55.0,55.0


In [251]:
beginningtex = """\\documentclass{report}
\\usepackage{booktabs}
\\begin{document}"""
endtex = "\end{document}"

f = open('/Users/stefanostselios/Brock University/Kevin Mongeon - StephanosShare/out/latex/events/minutes/3/sum_all_on_ice_events_3_minutes_prior_to_a_goal_2.tex', 'w')
f.write(beginningtex)
f.write(dm3b.to_latex())
f.write(endtex)
f.close()

4) display all events that happened between 3 minutes and 4 minutes prior to a goal.

In [252]:
dm4 = dk[dk['Minute'] ==4]
dm4a = pd.pivot_table(dm4, values=(['TShots_F', 'TShots_A', 'TMiss_F', 'TMiss_A', 'TBlocks_F', 'TBlocks_A', 'TFaceoffs_F', 'TFaceoffs_A']), index=['GD'])
dm4a = dm4a.fillna(0)
dm4a.head()

Unnamed: 0_level_0,TBlocks_A,TBlocks_F,TFaceoffs_A,TFaceoffs_F,TMiss_A,TMiss_F,TShots_A,TShots_F
GD,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
-7.0,1.0,1.0,1.0,1.0,4.0,2.0,2.0,2.0
-6.0,8.0,8.0,8.0,8.0,34.0,17.0,7.0,7.0
-5.0,9.0,9.0,18.0,18.0,51.0,9.0,19.0,19.0
-4.0,19.0,19.0,37.0,37.0,100.0,17.0,29.0,29.0
-3.0,96.0,96.0,108.0,108.0,378.0,66.0,107.0,107.0


In [253]:
beginningtex = """\\documentclass{report}
\\usepackage{booktabs}
\\begin{document}"""
endtex = "\end{document}"

f = open('/Users/stefanostselios/Brock University/Kevin Mongeon - StephanosShare/out/latex/events/minutes/4/sum_all_on_ice_events_4_minutes_prior_to_a_goal_1.tex', 'w')
f.write(beginningtex)
f.write(dm4a.to_latex())
f.write(endtex)
f.close()

In [254]:
dm4b = pd.pivot_table(dm4, values=(['TPenalties_F', 'TPenalties_A', 'THits_F', 'THits_A', 'TGiveaways_F', 'TGiveaways_A', 'TTakeaways_F', 'TTakeaways_A' ]), index=['GD'])
dm4b = dm4b.fillna(0)
dm4b.head()

Unnamed: 0_level_0,TGiveaways_A,TGiveaways_F,THits_A,THits_F,TPenalties_A,TPenalties_F,TTakeaways_A,TTakeaways_F
GD,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
-7.0,2.0,2.0,3.0,3.0,0.0,0.0,1.0,1.0
-6.0,2.0,2.0,7.0,7.0,2.0,2.0,5.0,5.0
-5.0,7.0,7.0,9.0,9.0,10.0,10.0,8.0,8.0
-4.0,13.0,13.0,30.0,30.0,22.0,22.0,14.0,14.0
-3.0,49.0,49.0,96.0,96.0,48.0,48.0,43.0,43.0


In [255]:
beginningtex = """\\documentclass{report}
\\usepackage{booktabs}
\\begin{document}"""
endtex = "\end{document}"

f = open('/Users/stefanostselios/Brock University/Kevin Mongeon - StephanosShare/out/latex/events/minutes/4/sum_all_on_ice_events_4_minutes_prior_to_a_goal_2.tex', 'w')
f.write(beginningtex)
f.write(dm4b.to_latex())
f.write(endtex)
f.close()

5) display all events that happened between 4 minutes and 5 minutes prior to a goal.

In [256]:
dm5 = dk[dk['Minute'] ==5]
dm5a = pd.pivot_table(dm5, values=(['TShots_F', 'TShots_A', 'TMiss_F', 'TMiss_A', 'TBlocks_F', 'TBlocks_A', 'TFaceoffs_F', 'TFaceoffs_A']), index=['GD'])
dm5a = dm5a.fillna(0)
dm5a.head()

Unnamed: 0_level_0,TBlocks_A,TBlocks_F,TFaceoffs_A,TFaceoffs_F,TMiss_A,TMiss_F,TShots_A,TShots_F
GD,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
-8.0,0.0,0.0,0.0,0.0,2.0,1.0,0.0,0.0
-7.0,1.0,1.0,0.0,0.0,6.0,3.0,0.0,0.0
-6.0,4.0,4.0,11.0,11.0,41.0,4.0,7.0,7.0
-5.0,4.0,4.0,17.0,17.0,46.0,10.0,10.0,10.0
-4.0,22.0,22.0,25.0,25.0,110.0,24.0,21.0,21.0


In [257]:
beginningtex = """\\documentclass{report}
\\usepackage{booktabs}
\\begin{document}"""
endtex = "\end{document}"

f = open('/Users/stefanostselios/Brock University/Kevin Mongeon - StephanosShare/out/latex/events/minutes/5/sum__all_on_ice_events_5_minutes_prior_to_a_goal_1.tex', 'w')
f.write(beginningtex)
f.write(dm5a.to_latex())
f.write(endtex)
f.close()

In [258]:
dm5b = pd.pivot_table(dm5, values=(['TPenalties_F', 'TPenalties_A', 'THits_F', 'THits_A', 'TGiveaways_F', 'TGiveaways_A', 'TTakeaways_F', 'TTakeaways_A' ]), index=['GD'])
dm5b = dm5b.fillna(0)
dm5b.head()

Unnamed: 0_level_0,TGiveaways_A,TGiveaways_F,THits_A,THits_F,TPenalties_A,TPenalties_F,TTakeaways_A,TTakeaways_F
GD,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
-8.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0
-7.0,0.0,0.0,2.0,2.0,1.0,1.0,1.0,1.0
-6.0,8.0,8.0,10.0,10.0,5.0,5.0,0.0,0.0
-5.0,3.0,3.0,19.0,19.0,6.0,6.0,0.0,0.0
-4.0,11.0,11.0,29.0,29.0,18.0,18.0,15.0,15.0


In [259]:
beginningtex = """\\documentclass{report}
\\usepackage{booktabs}
\\begin{document}"""
endtex = "\end{document}"

f = open('/Users/stefanostselios/Brock University/Kevin Mongeon - StephanosShare/out/latex/events/minutes/5/sum_all_on_ice_events_5_minutes_prior_to_a_goal_2.tex', 'w')
f.write(beginningtex)
f.write(dm5b.to_latex())
f.write(endtex)
f.close()

6) display all events that happened 5 minutes plus prior to a goal.

In [260]:
dm6 = dk[dk['Minute'] ==6]
dm6a = pd.pivot_table(dm6, values=(['TShots_F', 'TShots_A', 'TMiss_F', 'TMiss_A', 'TBlocks_F', 'TBlocks_A', 'TFaceoffs_F', 'TFaceoffs_A']), index=['GD'])
dm6a = dm6a.fillna(0)
dm6a.head()

Unnamed: 0_level_0,TBlocks_A,TBlocks_F,TFaceoffs_A,TFaceoffs_F,TMiss_A,TMiss_F,TShots_A,TShots_F
GD,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
-8.0,3.0,3.0,5.0,5.0,2.0,1.0,7.0,7.0
-7.0,14.0,14.0,41.0,41.0,16.0,6.0,46.0,46.0
-6.0,21.0,21.0,79.0,79.0,48.0,27.0,56.0,56.0
-5.0,75.0,75.0,177.0,177.0,82.0,53.0,142.0,142.0
-4.0,232.0,232.0,578.0,578.0,275.0,203.0,473.0,473.0


In [261]:
beginningtex = """\\documentclass{report}
\\usepackage{booktabs}
\\begin{document}"""
endtex = "\end{document}"

f = open('/Users/stefanostselios/Brock University/Kevin Mongeon - StephanosShare/out/latex/events/minutes/6/sum_all_on_ice_events_6_minutes_prior_to_a_goal_1.tex', 'w')
f.write(beginningtex)
f.write(dm6a.to_latex())
f.write(endtex)
f.close()

In [262]:
dm6b = pd.pivot_table(dm6, values=(['TPenalties_F', 'TPenalties_A', 'THits_F', 'THits_A', 'TGiveaways_F', 'TGiveaways_A', 'TTakeaways_F', 'TTakeaways_A' ]), index=['GD'])
dm6b = dm6b.fillna(0)
dm6b.head()

Unnamed: 0_level_0,TGiveaways_A,TGiveaways_F,THits_A,THits_F,TPenalties_A,TPenalties_F,TTakeaways_A,TTakeaways_F
GD,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
-8.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0
-7.0,6.0,6.0,19.0,19.0,4.0,4.0,6.0,6.0
-6.0,10.0,10.0,39.0,39.0,13.0,13.0,10.0,10.0
-5.0,37.0,37.0,132.0,132.0,25.0,25.0,34.0,34.0
-4.0,122.0,122.0,405.0,405.0,108.0,108.0,110.0,110.0


In [263]:
beginningtex = """\\documentclass{report}
\\usepackage{booktabs}
\\begin{document}"""
endtex = "\end{document}"

f = open('/Users/stefanostselios/Brock University/Kevin Mongeon - StephanosShare/out/latex/events/minutes/6/sum_all_on_ice_events_6_minutes_prior_to_a_goal_2.tex', 'w')
f.write(beginningtex)
f.write(dm6b.to_latex())
f.write(endtex)
f.close()