In [180]:
import sys
import os
import pandas as pd
import numpy as np
import datetime, time
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.formula.api import ols
from pylab import hist, show
import scipy
import zipfile


pd.set_option('display.max_rows', 50)
pd.set_option('display.max_columns', 200)

In [181]:
da = pd.read_csv('pbp_merged.csv')
da = da.drop('Unnamed: 0', axis=1)
da = da.rename(columns={'TeamCode': 'EventTeamCode'})

In [182]:
da = da[da['GameNumber'] <= 21230]

In [183]:
da = da[da['EventType']!='STOP']
da = da[da['EventType']!='EISTR']
da = da[da['EventType']!='EIEND']

In [184]:
dg = da[['Season', 'GameNumber', 'VTeamCode', 'HTeamCode', 'EventNumber', 'EventType', 'EventTeamCode']]
dg = dg.sort_values(['Season', 'GameNumber', 'EventNumber'], ascending=[True, True, True])
dg = dg.drop_duplicates(['Season', 'GameNumber', 'EventNumber', 'EventTeamCode'])
dg.head()

Unnamed: 0,Season,GameNumber,VTeamCode,HTeamCode,EventNumber,EventType,EventTeamCode
0,2010,20001,MTL,TOR,1,FAC,MTL
1,2010,20001,MTL,TOR,3,HIT,TOR
2,2010,20001,MTL,TOR,4,HIT,MTL
3,2010,20001,MTL,TOR,5,HIT,MTL
4,2010,20001,MTL,TOR,6,GIVE,TOR


In [185]:
dg['Goal'] = dg.apply(lambda x: 1 if (x['EventType'] == 'GOAL') else np.nan, axis=1)
dg['Block'] = dg.apply(lambda x: 1 if (x['EventType'] == 'BLOCK') else np.nan, axis=1)
dg['Faceoff'] = dg.apply(lambda x: 1 if (x['EventType'] == 'FAC') else np.nan, axis=1)
dg['Giveaway'] = dg.apply(lambda x: 1 if (x['EventType'] == 'GIVE') else np.nan, axis=1)
dg['Hit'] = dg.apply(lambda x: 1 if (x['EventType'] == 'HIT') else np.nan, axis=1)
dg['Miss'] = dg.apply(lambda x: 1 if (x['EventType'] == 'MISS') else np.nan, axis=1)
dg['Penalty'] = dg.apply(lambda x: 1 if (x['EventType'] == 'PENL') else np.nan, axis=1)
dg['Shot'] = dg.apply(lambda x: 1 if (x['EventType'] == 'SHOT') else np.nan, axis=1)
dg['Takeaway'] = dg.apply(lambda x: 1 if (x['EventType'] == 'TAKE') else np.nan, axis=1)

In [186]:
dg['Blocks'] = dg.groupby(['Season','GameNumber', 'EventTeamCode', 'EventType'])['Block'].transform('sum')
dg['Faceoffs'] = dg.groupby(['Season','GameNumber', 'EventTeamCode', 'EventType'])['Faceoff'].transform('sum')
dg['Giveaways'] = dg.groupby(['Season','GameNumber', 'EventTeamCode', 'EventType'])['Giveaway'].transform('sum')
dg['Goals'] = dg.groupby(['Season','GameNumber', 'EventTeamCode', 'EventType'])['Goal'].transform('sum')
dg['Hits'] = dg.groupby(['Season','GameNumber', 'EventTeamCode', 'EventType'])['Hit'].transform('sum')
dg['Misses'] = dg.groupby(['Season','GameNumber', 'EventTeamCode', 'EventType'])['Miss'].transform('sum')
dg['Penalties'] = dg.groupby(['Season','GameNumber', 'EventTeamCode', 'EventType'])['Penalty'].transform('sum')
dg['Shots'] = dg.groupby(['Season','GameNumber', 'EventTeamCode', 'EventType'])['Shot'].transform('sum')
dg['Takeaways'] = dg.groupby(['Season','GameNumber', 'EventTeamCode', 'EventType'])['Takeaway'].transform('sum')

In [187]:
dg = dg.rename(columns={'EventTeamCode': 'EventTeam'})
a = [col for col in dg.columns if 'TeamCode' in col]
dg = pd.lreshape(dg, {'TeamCode' : a})
dg.head()

Unnamed: 0,Block,Blocks,EventNumber,EventTeam,EventType,Faceoff,Faceoffs,GameNumber,Giveaway,Giveaways,Goal,Goals,Hit,Hits,Miss,Misses,Penalties,Penalty,Season,Shot,Shots,Takeaway,Takeaways,TeamCode
0,,,1,MTL,FAC,1.0,23.0,20001,,,,,,,,,,,2010,,,,,MTL
1,,,3,TOR,HIT,,,20001,,,,,1.0,27.0,,,,,2010,,,,,MTL
2,,,4,MTL,HIT,,,20001,,,,,1.0,34.0,,,,,2010,,,,,MTL
3,,,5,MTL,HIT,,,20001,,,,,1.0,34.0,,,,,2010,,,,,MTL
4,,,6,TOR,GIVE,,,20001,1.0,16.0,,,,,,,,,2010,,,,,MTL


In [188]:
dg = dg.sort_values(['Season', 'GameNumber', 'EventNumber'], ascending=[True, True, True])
dg = dg.rename(columns={'EventTeam': 'EventTeamCode'})
dg.head()

Unnamed: 0,Block,Blocks,EventNumber,EventTeamCode,EventType,Faceoff,Faceoffs,GameNumber,Giveaway,Giveaways,Goal,Goals,Hit,Hits,Miss,Misses,Penalties,Penalty,Season,Shot,Shots,Takeaway,Takeaways,TeamCode
0,,,1,MTL,FAC,1.0,23.0,20001,,,,,,,,,,,2010,,,,,MTL
315775,,,1,MTL,FAC,1.0,23.0,20001,,,,,,,,,,,2010,,,,,TOR
1,,,3,TOR,HIT,,,20001,,,,,1.0,27.0,,,,,2010,,,,,MTL
315776,,,3,TOR,HIT,,,20001,,,,,1.0,27.0,,,,,2010,,,,,TOR
2,,,4,MTL,HIT,,,20001,,,,,1.0,34.0,,,,,2010,,,,,MTL


In [189]:
dg.shape

(631550, 24)

In [190]:
dg = dg.drop_duplicates(['Season', 'GameNumber', 'TeamCode', 'EventTeamCode', 'EventType'])

In [191]:
dg = dg [['Season', 'GameNumber', 'TeamCode', 'EventNumber', 'EventType', 'EventTeamCode',  'Blocks', 'Faceoffs', 'Giveaways', 'Goals', 'Hits', 'Misses', 'Penalties', 'Shots', 'Takeaways']]

In [192]:
dg.shape

(44022, 15)

In [193]:
dg['Blocks_For'] = dg.apply(lambda x: x['Blocks'] if (x['TeamCode'] == x['EventTeamCode']) else np.nan, axis=1)
dg['Blocks_Against'] = dg.apply(lambda x: x['Blocks'] if (x['TeamCode'] != x['EventTeamCode']) else np.nan, axis=1)

In [194]:
dg['Faceoffs_For'] = dg.apply(lambda x: x['Faceoffs'] if (x['TeamCode'] == x['EventTeamCode']) else np.nan, axis=1)
dg['Faceoffs_Against'] = dg.apply(lambda x: x['Faceoffs'] if (x['TeamCode'] != x['EventTeamCode']) else np.nan, axis=1)

In [195]:
dg['Giveaways_For'] = dg.apply(lambda x: x['Giveaways'] if (x['TeamCode'] == x['EventTeamCode']) else np.nan, axis=1)
dg['Giveaways_Against'] = dg.apply(lambda x: x['Giveaways'] if (x['TeamCode'] != x['EventTeamCode']) else np.nan, axis=1)

In [196]:
dg['Goals_For'] = dg.apply(lambda x: x['Goals'] if (x['TeamCode'] == x['EventTeamCode']) else np.nan, axis=1)
dg['Goals_Against'] = dg.apply(lambda x: x['Goals'] if (x['TeamCode'] != x['EventTeamCode']) else np.nan, axis=1)

In [197]:
dg['Hits_For'] = dg.apply(lambda x: x['Hits'] if (x['TeamCode'] == x['EventTeamCode']) else np.nan, axis=1)
dg['Hits_Against'] = dg.apply(lambda x: x['Hits'] if (x['TeamCode'] != x['EventTeamCode']) else np.nan, axis=1)

In [198]:
dg['Misses_For'] = dg.apply(lambda x: x['Misses'] if (x['TeamCode'] == x['EventTeamCode']) else np.nan, axis=1)
dg['Misses_Against'] = dg.apply(lambda x: x['Misses'] if (x['TeamCode'] != x['EventTeamCode']) else np.nan, axis=1)

In [199]:
dg['Penalties_For'] = dg.apply(lambda x: x['Penalties'] if (x['TeamCode'] == x['EventTeamCode']) else np.nan, axis=1)
dg['Penalties_Against'] = dg.apply(lambda x: x['Penalties'] if (x['TeamCode'] != x['EventTeamCode']) else np.nan, axis=1)

In [200]:
dg['Shots_For'] = dg.apply(lambda x: x['Shots'] if (x['TeamCode'] == x['EventTeamCode']) else np.nan, axis=1)
dg['Shots_Against'] = dg.apply(lambda x: x['Shots'] if (x['TeamCode'] != x['EventTeamCode']) else np.nan, axis=1)

In [201]:
dg['Takeaways_For'] = dg.apply(lambda x: x['Takeaways'] if (x['TeamCode'] == x['EventTeamCode']) else np.nan, axis=1)
dg['Takeaways_Against'] = dg.apply(lambda x: x['Takeaways'] if (x['TeamCode'] != x['EventTeamCode']) else np.nan, axis=1)

In [202]:
dg = dg.sort_values(['Season', 'GameNumber', 'EventNumber'], ascending=[True, True, True])

In [203]:
dg['Blocks_For'] = dg.groupby(['Season','GameNumber', 'TeamCode'])['Blocks_For'].apply(lambda x: x.ffill().bfill())
dg['Faceoffs_For'] = dg.groupby(['Season','GameNumber', 'TeamCode'])['Faceoffs_For'].apply(lambda x: x.ffill().bfill())
dg['Giveaways_For'] = dg.groupby(['Season','GameNumber', 'TeamCode'])['Giveaways_For'].apply(lambda x: x.ffill().bfill())
dg['Goals_For'] = dg.groupby(['Season','GameNumber', 'TeamCode'])['Goals_For'].apply(lambda x: x.ffill().bfill())
dg['Hits_For'] = dg.groupby(['Season','GameNumber', 'TeamCode'])['Hits_For'].apply(lambda x: x.ffill().bfill())
dg['Misses_For'] = dg.groupby(['Season','GameNumber', 'TeamCode'])['Misses_For'].apply(lambda x: x.ffill().bfill())
dg['Penalties_For'] = dg.groupby(['Season','GameNumber', 'TeamCode'])['Penalties_For'].apply(lambda x: x.ffill().bfill())
dg['Shots_For'] = dg.groupby(['Season','GameNumber', 'TeamCode'])['Shots_For'].apply(lambda x: x.ffill().bfill())
dg['Takeaways_For'] = dg.groupby(['Season','GameNumber', 'TeamCode'])['Takeaways_For'].apply(lambda x: x.ffill().bfill())

In [204]:
dg['Blocks_Against'] = dg.groupby(['Season','GameNumber', 'TeamCode'])['Blocks_Against'].apply(lambda x: x.ffill().bfill())
dg['Faceoffs_Against'] = dg.groupby(['Season','GameNumber', 'TeamCode'])['Faceoffs_Against'].apply(lambda x: x.ffill().bfill())
dg['Giveaways_Against'] = dg.groupby(['Season','GameNumber', 'TeamCode'])['Giveaways_Against'].apply(lambda x: x.ffill().bfill())
dg['Goals_Against'] = dg.groupby(['Season','GameNumber', 'TeamCode'])['Goals_Against'].apply(lambda x: x.ffill().bfill())
dg['Hits_Against'] = dg.groupby(['Season','GameNumber', 'TeamCode'])['Hits_Against'].apply(lambda x: x.ffill().bfill())
dg['Misses_Against'] = dg.groupby(['Season','GameNumber', 'TeamCode'])['Misses_Against'].apply(lambda x: x.ffill().bfill())
dg['Penalties_Against'] = dg.groupby(['Season','GameNumber', 'TeamCode'])['Penalties_Against'].apply(lambda x: x.ffill().bfill())
dg['Shots_Against'] = dg.groupby(['Season','GameNumber', 'TeamCode'])['Shots_Against'].apply(lambda x: x.ffill().bfill())
dg['Takeaways_Against'] = dg.groupby(['Season','GameNumber', 'TeamCode'])['Takeaways_Against'].apply(lambda x: x.ffill().bfill())

In [205]:
dg.head()

Unnamed: 0,Season,GameNumber,TeamCode,EventNumber,EventType,EventTeamCode,Blocks,Faceoffs,Giveaways,Goals,Hits,Misses,Penalties,Shots,Takeaways,Blocks_For,Blocks_Against,Faceoffs_For,Faceoffs_Against,Giveaways_For,Giveaways_Against,Goals_For,Goals_Against,Hits_For,Hits_Against,Misses_For,Misses_Against,Penalties_For,Penalties_Against,Shots_For,Shots_Against,Takeaways_For,Takeaways_Against
0,2010,20001,MTL,1,FAC,MTL,,23.0,,,,,,,,21.0,22.0,23.0,20.0,7.0,16.0,2.0,3.0,34.0,27.0,15.0,9.0,5.0,3.0,26.0,21.0,7.0,6.0
315775,2010,20001,TOR,1,FAC,MTL,,23.0,,,,,,,,22.0,21.0,20.0,23.0,16.0,7.0,3.0,2.0,27.0,34.0,9.0,15.0,3.0,5.0,21.0,26.0,6.0,7.0
1,2010,20001,MTL,3,HIT,TOR,,,,,27.0,,,,,21.0,22.0,23.0,20.0,7.0,16.0,2.0,3.0,34.0,27.0,15.0,9.0,5.0,3.0,26.0,21.0,7.0,6.0
315776,2010,20001,TOR,3,HIT,TOR,,,,,27.0,,,,,22.0,21.0,20.0,23.0,16.0,7.0,3.0,2.0,27.0,34.0,9.0,15.0,3.0,5.0,21.0,26.0,6.0,7.0
2,2010,20001,MTL,4,HIT,MTL,,,,,34.0,,,,,21.0,22.0,23.0,20.0,7.0,16.0,2.0,3.0,34.0,27.0,15.0,9.0,5.0,3.0,26.0,21.0,7.0,6.0


In [206]:
dg.shape

(44022, 33)

In [207]:
dg = dg[['Season', 'GameNumber', 'TeamCode', 'Blocks_For', 'Blocks_Against', 'Faceoffs_For', 'Faceoffs_Against', 'Giveaways_For', 'Giveaways_Against', 'Goals_For', 'Goals_Against', 'Hits_For', 'Hits_Against', 'Misses_For', 'Misses_Against', 'Penalties_For', 'Penalties_Against', 'Shots_For', 'Shots_Against', 'Takeaways_For', 'Takeaways_Against']]
dg = dg.sort_values(['Season', 'GameNumber'], ascending=[True, True])

In [208]:
dg.shape

(44022, 21)

In [209]:
dg = dg.drop_duplicates(['Season', 'GameNumber', 'TeamCode'])
dg.head()

merge

In [253]:
db = pd.read_csv('team_roster_player_rank.csv')
db = db.drop('Unnamed: 0', axis=1)
db.head()

Unnamed: 0,GD,GameNumber,LossTeam,Season,WinTeam,PlayerNumber,PlayerPosition,TeamCode,GF,GA,Rank,RosterCount,Position,PositionCount,FCount,DCount
0,1,20001,MTL,2010,TOR,11.0,C,MTL,2,3,2,18.0,F,12.0,12.0,6.0
1,1,20001,MTL,2010,TOR,21.0,R,MTL,2,3,1,18.0,F,12.0,12.0,6.0
2,1,20001,MTL,2010,TOR,57.0,L,MTL,2,3,2,18.0,F,12.0,12.0,6.0
3,1,20001,MTL,2010,TOR,26.0,D,MTL,2,3,2,18.0,D,6.0,12.0,6.0
4,1,20001,MTL,2010,TOR,75.0,D,MTL,2,3,2,18.0,D,6.0,12.0,6.0


In [254]:
db.shape

(36540, 16)

In [255]:
dv = pd.merge(db, dg, on=['Season', 'GameNumber', 'TeamCode'], how='left')
dv = dw.drop('Unnamed: 0', axis=1)
dv = dv.drop_duplicates(['Season', 'GameNumber', 'TeamCode'])
dv = dv.sort_values(['Season', 'GameNumber'], ascending=[True, True])

In [256]:
dv.shape

(2030, 34)

In [257]:
dv.head()

Unnamed: 0,GD,GameNumber,LossTeam,Season,WinTeam,PlayerNumber,PlayerPosition,TeamCode,GF,GA,Rank,RosterCount,Position,PositionCount,FCount,DCount,Blocks_For,Blocks_Against,Faceoffs_For,Faceoffs_Against,Giveaways_For,Giveaways_Against,Goals_For,Goals_Against,Hits_For,Hits_Against,Misses_For,Misses_Against,Penalties_For,Penalties_Against,Shots_For,Shots_Against,Takeaways_For,Takeaways_Against
0,1,20001,MTL,2010,TOR,11.0,C,MTL,2,3,2,18.0,F,12.0,12.0,6.0,21.0,22.0,23.0,20.0,7.0,16.0,2.0,3.0,34.0,27.0,15.0,9.0,5.0,3.0,26.0,21.0,7.0,6.0
18270,1,20001,MTL,2010,TOR,37.0,C,TOR,3,2,2,18.0,F,12.0,12.0,6.0,22.0,21.0,20.0,23.0,16.0,7.0,3.0,2.0,27.0,34.0,9.0,15.0,3.0,5.0,21.0,26.0,6.0,7.0
18,-1,20002,PIT,2010,PHI,17.0,C,PHI,3,2,1,18.0,F,12.0,12.0,6.0,16.0,14.0,22.0,34.0,9.0,11.0,3.0,2.0,34.0,32.0,10.0,18.0,6.0,5.0,24.0,29.0,1.0,9.0
18288,-1,20002,PIT,2010,PHI,71.0,C,PIT,2,3,1,18.0,F,12.0,12.0,6.0,14.0,16.0,34.0,22.0,11.0,9.0,2.0,3.0,32.0,34.0,18.0,10.0,5.0,6.0,29.0,24.0,9.0,1.0
36,-1,20003,MIN,2010,CAR,53.0,C,CAR,4,3,1,18.0,F,12.0,12.0,6.0,19.0,19.0,33.0,52.0,11.0,11.0,4.0,3.0,14.0,19.0,9.0,8.0,5.0,5.0,27.0,26.0,3.0,8.0


- create columns for team win and team loss. 

In [258]:
dv['TeamWin'] =  dv.apply(lambda x: 1 if x['TeamCode']==x['WinTeam'] else 0, 1)
dv['TeamLos'] =  dv.apply(lambda x: 1 if x['TeamCode']!=x['WinTeam'] else 0, 1)


- display games played, games won, games loss, goals for and goals against by team for the season.

In [259]:
dv['GP'] = dv.groupby(['Season','TeamCode'])['GameNumber'].transform('count')
dv['GW'] = dv.groupby(['Season','WinTeam'])['TeamWin'].transform('sum')
dv['GL'] = dv.groupby(['Season','LossTeam'])['TeamLos'].transform('sum')
dv['GF'] = dv.groupby(['Season','TeamCode'])['GF'].transform('sum')
dv['GA'] = dv.groupby(['Season','TeamCode'])['GA'].transform('sum')
dv['Blocks_For'] = dv.groupby(['Season','TeamCode'])['Blocks_For'].transform('sum')
dv['Faceoffs_For'] = dv.groupby(['Season','TeamCode'])['Faceoffs_For'].transform('sum')
dv['Giveaways_For'] = dv.groupby(['Season','TeamCode'])['Giveaways_For'].transform('sum')
dv['Goals_For'] = dv.groupby(['Season','TeamCode'])['Goals_For'].transform('sum')
dv['Hits_For'] = dv.groupby(['Season','TeamCode'])['Hits_For'].transform('sum')
dv['Misses_For'] = dv.groupby(['Season','TeamCode'])['Misses_For'].transform('sum')
dv['Penalties_For'] = dv.groupby(['Season','TeamCode'])['Penalties_For'].transform('sum')
dv['Shots_For'] = dv.groupby(['Season','TeamCode'])['Shots_For'].transform('sum')
dv['Takeaways_For'] = dv.groupby(['Season','TeamCode'])['Takeaways_For'].transform('sum')
dv['Blocks_Against'] = dv.groupby(['Season','TeamCode'])['Blocks_Against'].transform('sum') 
dv['Faceoffs_Against'] = dv.groupby(['Season','TeamCode'])['Faceoffs_Against'].transform('sum')
dv['Giveaways_Against'] = dv.groupby(['Season','TeamCode'])['Giveaways_Against'].transform('sum')
dv['Goals_Against'] = dv.groupby(['Season','TeamCode'])['Goals_Against'].transform('sum')
dv['Hits_Against'] = dv.groupby(['Season','TeamCode'])['Hits_Against'].transform('sum')
dv['Misses_Against'] = dv.groupby(['Season','TeamCode'])['Misses_Against'].transform('sum')
dv['Penalties_Against'] = dv.groupby(['Season','TeamCode'])['Penalties_Against'].transform('sum')
dv['Shots_Against'] = dv.groupby(['Season','TeamCode'])['Shots_Against'].transform('sum')
dv['Takeaways_Against'] = dv.groupby(['Season','TeamCode'])['Takeaways_Against'].transform('sum')
dv.head()

Unnamed: 0,GD,GameNumber,LossTeam,Season,WinTeam,PlayerNumber,PlayerPosition,TeamCode,GF,GA,Rank,RosterCount,Position,PositionCount,FCount,DCount,Blocks_For,Blocks_Against,Faceoffs_For,Faceoffs_Against,Giveaways_For,Giveaways_Against,Goals_For,Goals_Against,Hits_For,Hits_Against,Misses_For,Misses_Against,Penalties_For,Penalties_Against,Shots_For,Shots_Against,Takeaways_For,Takeaways_Against,TeamWin,TeamLos,GP,GW,GL
0,1,20001,MTL,2010,TOR,11.0,C,MTL,188,181,2,18.0,F,12.0,12.0,6.0,1025.0,1008.0,1951.0,1969.0,584.0,501.0,188.0,181.0,1353.0,1549.0,835.0,768.0,355.0,339.0,2014.0,1919.0,434.0,388.0,0,1,68,34,31
18270,1,20001,MTL,2010,TOR,37.0,C,TOR,195,218,2,18.0,F,12.0,12.0,6.0,1150.0,1138.0,2022.0,1975.0,769.0,701.0,195.0,218.0,1777.0,1643.0,865.0,929.0,326.0,365.0,1831.0,1965.0,498.0,567.0,1,0,70,34,31
18,-1,20002,PIT,2010,PHI,17.0,C,PHI,235,207,1,18.0,F,12.0,12.0,6.0,1157.0,1077.0,2166.0,2165.0,575.0,566.0,235.0,207.0,1690.0,1598.0,845.0,917.0,366.0,355.0,2061.0,2003.0,511.0,555.0,1,0,72,41,31
18288,-1,20002,PIT,2010,PHI,71.0,C,PIT,204,180,1,18.0,F,12.0,12.0,6.0,955.0,1136.0,2022.0,2088.0,444.0,416.0,204.0,180.0,1999.0,1784.0,849.0,760.0,425.0,413.0,2095.0,1884.0,393.0,419.0,0,1,71,41,31
36,-1,20003,MIN,2010,CAR,53.0,C,CAR,222,218,1,18.0,F,12.0,12.0,6.0,1113.0,1251.0,2030.0,2520.0,543.0,569.0,222.0,218.0,2005.0,1578.0,974.0,1097.0,309.0,390.0,2124.0,2307.0,658.0,594.0,1,0,76,38,35


In [260]:
dv['L'] = dv.apply(lambda x: x['GL'] if x['TeamCode']== x['LossTeam'] else (x['GP'] - x['GW']), 1)
dv['W'] = dv.apply(lambda x: x['GW'] if x['TeamCode']== x['WinTeam'] else (x['GP'] - x['GL']), 1)
dv.head()

Unnamed: 0,GD,GameNumber,LossTeam,Season,WinTeam,PlayerNumber,PlayerPosition,TeamCode,GF,GA,Rank,RosterCount,Position,PositionCount,FCount,DCount,Blocks_For,Blocks_Against,Faceoffs_For,Faceoffs_Against,Giveaways_For,Giveaways_Against,Goals_For,Goals_Against,Hits_For,Hits_Against,Misses_For,Misses_Against,Penalties_For,Penalties_Against,Shots_For,Shots_Against,Takeaways_For,Takeaways_Against,TeamWin,TeamLos,GP,GW,GL,L,W
0,1,20001,MTL,2010,TOR,11.0,C,MTL,188,181,2,18.0,F,12.0,12.0,6.0,1025.0,1008.0,1951.0,1969.0,584.0,501.0,188.0,181.0,1353.0,1549.0,835.0,768.0,355.0,339.0,2014.0,1919.0,434.0,388.0,0,1,68,34,31,31,37
18270,1,20001,MTL,2010,TOR,37.0,C,TOR,195,218,2,18.0,F,12.0,12.0,6.0,1150.0,1138.0,2022.0,1975.0,769.0,701.0,195.0,218.0,1777.0,1643.0,865.0,929.0,326.0,365.0,1831.0,1965.0,498.0,567.0,1,0,70,34,31,36,34
18,-1,20002,PIT,2010,PHI,17.0,C,PHI,235,207,1,18.0,F,12.0,12.0,6.0,1157.0,1077.0,2166.0,2165.0,575.0,566.0,235.0,207.0,1690.0,1598.0,845.0,917.0,366.0,355.0,2061.0,2003.0,511.0,555.0,1,0,72,41,31,31,41
18288,-1,20002,PIT,2010,PHI,71.0,C,PIT,204,180,1,18.0,F,12.0,12.0,6.0,955.0,1136.0,2022.0,2088.0,444.0,416.0,204.0,180.0,1999.0,1784.0,849.0,760.0,425.0,413.0,2095.0,1884.0,393.0,419.0,0,1,71,41,31,31,40
36,-1,20003,MIN,2010,CAR,53.0,C,CAR,222,218,1,18.0,F,12.0,12.0,6.0,1113.0,1251.0,2030.0,2520.0,543.0,569.0,222.0,218.0,2005.0,1578.0,974.0,1097.0,309.0,390.0,2124.0,2307.0,658.0,594.0,1,0,76,38,35,38,38


In [262]:
dv = dv.drop_duplicates(['Season', 'TeamCode'])
dv['WinPc'] = dv['W']/ dv['GP']
dv['LossPc'] = dv['L']/ dv['GP']
dv['Mean_Blocks_For'] = dv['Blocks_For']/ dv['GP']
dv['Mean_Faceoffs_For'] = dv['Faceoffs_For']/ dv['GP']
dv['Mean_Giveaways_For'] = dv['Giveaways_For']/ dv['GP']
dv['Mean_Goals_For'] = dv['Goals_For']/ dv['GP']
dv['Mean_Hits_For'] = dv['Hits_For']/ dv['GP']
dv['Mean_Misses_For'] = dv['Misses_For']/ dv['GP']
dv['Mean_Penalties_For'] = dv['Penalties_For']/ dv['GP']
dv['Mean_Shots_For'] = dv['Shots_For']/ dv['GP']
dv['Mean_Takeaways_For'] = dv['Takeaways_For']/ dv['GP']
dv['Mean_Blocks_Against'] = dv['Blocks_Against']/ dv['GP']
dv['Mean_Faceoffs_Against'] = dv['Faceoffs_Against']/ dv['GP']
dv['Mean_Giveaways_Against'] = dv['Giveaways_Against']/ dv['GP']
dv['Mean_Goals_Against'] = dv['Goals_Against']/ dv['GP']
dv['Mean_Hits_Against'] = dv['Hits_Against']/ dv['GP']
dv['Mean_Misses_Against'] = dv['Misses_Against']/ dv['GP']
dv['Mean_Penalties_Against'] = dv['Penalties_Against']/ dv['GP']
dv['Mean_Shots_Against'] = dv['Shots_Against']/ dv['GP']
dv['Mean_Takeaways_Against'] = dv['Takeaways_Against']/ dv['GP']
dv.head()

Unnamed: 0,GD,GameNumber,LossTeam,Season,WinTeam,PlayerNumber,PlayerPosition,TeamCode,GF,GA,Rank,RosterCount,Position,PositionCount,FCount,DCount,Blocks_For,Blocks_Against,Faceoffs_For,Faceoffs_Against,Giveaways_For,Giveaways_Against,Goals_For,Goals_Against,Hits_For,Hits_Against,Misses_For,Misses_Against,Penalties_For,Penalties_Against,Shots_For,Shots_Against,Takeaways_For,Takeaways_Against,TeamWin,TeamLos,GP,GW,GL,L,W,WinPc,LossPc,Mean_Blocks_For,Mean_Faceoffs_For,Mean_Giveaways_For,Mean_Goals_For,Mean_Hits_For,Mean_Misses_For,Mean_Penalties_For,Mean_Shots_For,Mean_Takeaways_For,Mean_Blocks_Against,Mean_Faceoffs_Against,Mean_Giveaways_Against,Mean_Goals_Against,Mean_Hits_Against,Mean_Misses_Against,Mean_Penalties_Against,Mean_Shots_Against,Mean_Takeaways_Against
0,1,20001,MTL,2010,TOR,11.0,C,MTL,188,181,2,18.0,F,12.0,12.0,6.0,1025.0,1008.0,1951.0,1969.0,584.0,501.0,188.0,181.0,1353.0,1549.0,835.0,768.0,355.0,339.0,2014.0,1919.0,434.0,388.0,0,1,68,34,31,31,37,0.544118,0.455882,15.073529,28.691176,8.588235,2.764706,19.897059,12.279412,5.220588,29.617647,6.382353,14.823529,28.955882,7.367647,2.661765,22.779412,11.294118,4.985294,28.220588,5.705882
18270,1,20001,MTL,2010,TOR,37.0,C,TOR,195,218,2,18.0,F,12.0,12.0,6.0,1150.0,1138.0,2022.0,1975.0,769.0,701.0,195.0,218.0,1777.0,1643.0,865.0,929.0,326.0,365.0,1831.0,1965.0,498.0,567.0,1,0,70,34,31,36,34,0.485714,0.514286,16.428571,28.885714,10.985714,2.785714,25.385714,12.357143,4.657143,26.157143,7.114286,16.257143,28.214286,10.014286,3.114286,23.471429,13.271429,5.214286,28.071429,8.1
18,-1,20002,PIT,2010,PHI,17.0,C,PHI,235,207,1,18.0,F,12.0,12.0,6.0,1157.0,1077.0,2166.0,2165.0,575.0,566.0,235.0,207.0,1690.0,1598.0,845.0,917.0,366.0,355.0,2061.0,2003.0,511.0,555.0,1,0,72,41,31,31,41,0.569444,0.430556,16.069444,30.083333,7.986111,3.263889,23.472222,11.736111,5.083333,28.625,7.097222,14.958333,30.069444,7.861111,2.875,22.194444,12.736111,4.930556,27.819444,7.708333
18288,-1,20002,PIT,2010,PHI,71.0,C,PIT,204,180,1,18.0,F,12.0,12.0,6.0,955.0,1136.0,2022.0,2088.0,444.0,416.0,204.0,180.0,1999.0,1784.0,849.0,760.0,425.0,413.0,2095.0,1884.0,393.0,419.0,0,1,71,41,31,31,40,0.56338,0.43662,13.450704,28.478873,6.253521,2.873239,28.15493,11.957746,5.985915,29.507042,5.535211,16.0,29.408451,5.859155,2.535211,25.126761,10.704225,5.816901,26.535211,5.901408
36,-1,20003,MIN,2010,CAR,53.0,C,CAR,222,218,1,18.0,F,12.0,12.0,6.0,1113.0,1251.0,2030.0,2520.0,543.0,569.0,222.0,218.0,2005.0,1578.0,974.0,1097.0,309.0,390.0,2124.0,2307.0,658.0,594.0,1,0,76,38,35,38,38,0.5,0.5,14.644737,26.710526,7.144737,2.921053,26.381579,12.815789,4.065789,27.947368,8.657895,16.460526,33.157895,7.486842,2.868421,20.763158,14.434211,5.131579,30.355263,7.815789


In [263]:
dv = dv[['Season', 'TeamCode', 'GP', 'GW', 'GL', 'L', 'W','WinPc', 'LossPc', 'Mean_Blocks_For', 'Mean_Faceoffs_For', 'Mean_Giveaways_For', 'Mean_Goals_For','Mean_Hits_For', 'Mean_Misses_For', 'Mean_Penalties_For','Mean_Shots_For', 'Mean_Takeaways_For', 'Mean_Blocks_Against','Mean_Faceoffs_Against', 'Mean_Giveaways_Against', 'Mean_Goals_Against','Mean_Hits_Against', 'Mean_Misses_Against', 'Mean_Penalties_Against','Mean_Shots_Against', 'Mean_Takeaways_Against']]

In [265]:
dv['Rank_W'] = dv.groupby(['Season'])['WinPc'].rank(ascending=False)
dv = dv.sort_values(['Season', 'Rank_W'], ascending=[True, True])
dv.head(50)

Unnamed: 0,Season,TeamCode,GP,GW,GL,L,W,WinPc,LossPc,Mean_Blocks_For,Mean_Faceoffs_For,Mean_Giveaways_For,Mean_Goals_For,Mean_Hits_For,Mean_Misses_For,Mean_Penalties_For,Mean_Shots_For,Mean_Takeaways_For,Mean_Blocks_Against,Mean_Faceoffs_Against,Mean_Giveaways_Against,Mean_Goals_Against,Mean_Hits_Against,Mean_Misses_Against,Mean_Penalties_Against,Mean_Shots_Against,Mean_Takeaways_Against,Rank_W
18576,2010,VAN,73,40,25,25,48,0.657534,0.342466,13.109589,32.643836,6.849315,3.260274,22.287671,12.589041,4.712329,29.520548,7.342466,15.410959,26.739726,7.493151,2.39726,22.90411,11.273973,4.657534,28.123288,7.191781,1.0
90,2010,SJ,65,41,44,24,41,0.630769,0.369231,14.369231,31.784615,10.2,3.169231,21.938462,13.569231,4.384615,31.892308,8.430769,15.8,27.0,9.892308,2.430769,24.092308,11.4,4.446154,26.6,6.984615,2.0
18432,2010,BOS,76,38,31,31,45,0.592105,0.407895,14.421053,30.184211,6.644737,3.0,21.118421,11.789474,4.671053,29.828947,5.197368,15.802632,27.907895,8.342105,2.315789,24.210526,11.434211,4.684211,30.5,7.763158,3.0
18396,2010,DET,68,40,27,28,40,0.588235,0.411765,11.058824,30.911765,8.897059,3.235294,21.514706,13.544118,4.132353,30.632353,7.102941,13.852941,28.647059,7.470588,2.955882,23.544118,11.279412,4.191176,27.926471,7.220588,4.0
126,2010,ANA,65,40,27,27,38,0.584615,0.415385,15.507692,27.246154,7.446154,2.861538,23.215385,10.538462,5.230769,25.569231,5.323077,10.338462,29.769231,8.984615,2.784615,21.815385,14.538462,4.738462,31.307692,6.169231,5.0
18720,2010,WSH,72,42,44,30,42,0.583333,0.416667,15.583333,30.430556,8.222222,2.638889,22.847222,12.111111,4.430556,28.777778,7.888889,15.625,28.958333,8.013889,2.347222,23.222222,11.791667,4.083333,26.916667,6.916667,6.0
306,2010,LA,70,40,25,30,40,0.571429,0.428571,12.642857,29.814286,10.3,2.771429,25.757143,13.157143,4.428571,26.457143,5.471429,14.814286,28.142857,9.628571,2.542857,28.571429,12.157143,4.671429,26.285714,5.285714,7.0
18,2010,PHI,72,41,31,31,41,0.569444,0.430556,16.069444,30.083333,7.986111,3.263889,23.472222,11.736111,5.083333,28.625,7.097222,14.958333,30.069444,7.861111,2.875,22.194444,12.736111,4.930556,27.819444,7.708333,8.0
18288,2010,PIT,71,41,31,31,40,0.56338,0.43662,13.450704,28.478873,6.253521,2.873239,28.15493,11.957746,5.985915,29.507042,5.535211,16.0,29.408451,5.859155,2.535211,25.126761,10.704225,5.816901,26.535211,5.901408,9.0
180,2010,NYR,73,41,30,32,41,0.561644,0.438356,15.753425,26.890411,5.164384,3.054795,28.150685,11.534247,4.712329,27.726027,7.30137,12.808219,29.273973,6.863014,2.424658,27.054795,10.958904,5.219178,27.561644,6.712329,10.0


In [266]:
dv.to_csv('season_team_event_mean_ranking.csv', index='False')