# Examination of effecient NHL roster design

In [1]:
import sys
import os
import pandas as pd
import numpy as np
import datetime, time
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.formula.api import ols
from pylab import hist, show
import scipy

## Import dataframes

In [2]:
dm = pd.read_csv('t_play_by_play_o.csv')
dm = dm.sort_values(['Season', 'GameNumber', 'Period', 'EventNumber'], ascending=[True, True, True, True])
dm = dm [['Season', 'GameNumber', 'EventNumber', 'Period', 'AdvantageType', 'EventTimeFromZero', 'EventTimeFromTwenty', 'EventType', 'EventDetail', 'VPlayer1', 'VPlayer1Position', 'VPlayer2', 'VPlayer2Position', 'VPlayer3', 'VPlayer3Position', 'VPlayer4', 'VPlayer4Position', 'VPlayer5', 'VPlayer5Position', 'VPlayer6', 'VPlayer6Position', 'HPlayer1', 'HPlayer1Position', 'HPlayer2', 'HPlayer2Position', 'HPlayer3', 'HPlayer3Position', 'HPlayer4', 'HPlayer4Position', 'HPlayer5', 'HPlayer5Position', 'HPlayer6', 'HPlayer6Position']]

In [3]:
dg = pd.read_csv('t_play_by_play_giveaway_detail_o.csv')
dg = dg.rename(columns={'GivePlayerNumber': 'PlayerNumber', 'GivePlayerLName': 'PlayerName', 'GiveTeamCode': 'TeamCode'})

In [4]:
dt = pd.read_csv('t_play_by_play_takeaway_detail_o.csv')
dt = dt.sort_values(['Season', 'GameNumber', 'EventNumber'], ascending=[True, True, True])
dt = dt.rename(columns={'TakePlayerNumber': 'PlayerNumber', 'TakePlayerLName': 'PlayerName', 'TakeTeamCode': 'TeamCode'})

In [5]:
dl = pd.read_csv('t_play_by_play_goal_detail_o.csv')
dl = dl.sort_values(['Season', 'GameNumber', 'EventNumber'], ascending=[True, True, True])
dl = dl.rename(columns={'PlayerLName': 'PlayerName'})

In [6]:
ds = pd.read_csv('t_play_by_play_shot_detail_o.csv')
ds = ds.sort_values(['Season', 'GameNumber', 'EventNumber'], ascending=[True, True, True])
ds = ds.rename(columns={'PlayerLName': 'PlayerName'})

In [7]:
dn = pd.read_csv('t_play_by_play_miss_detail_o.csv')
dn = dn.sort_values(['Season', 'GameNumber', 'EventNumber'], ascending=[True, True, True])
dn = dn.rename(columns={'PlayerLName': 'PlayerName'})

In [8]:
db = pd.read_csv('t_play_by_play_block_detail_o.csv')
db = db.sort_values(['Season', 'GameNumber', 'EventNumber'], ascending=[True, True, True])
db = db.rename(columns={'ShotPlayerLName': 'ShotPlayerName', 'BlockPlayerLName': 'PlayerName', 'BlockPlayerNumber': 'PlayerNumber', 'BlockTeamCode': 'TeamCode'})

In [9]:
dh = pd.read_csv('t_play_by_play_hit_detail_o.csv')
dh = dh.sort_values(['Season', 'GameNumber', 'EventNumber'], ascending=[True, True, True])
dh = dh.rename(columns={'HitterPlayerLName': 'HitterPlayerName', 'HitteePlayerLName': 'HitteePlayerName'})

In [10]:
df = pd.read_csv('t_play_by_play_faceoff_detail_o.csv')
df = df.sort_values(['Season', 'GameNumber', 'EventNumber'], ascending=[True, True, True])
df = df.rename(columns={'VPlayerLName': 'VPlayerName', 'HPlayerLName': 'HPlayerName'})

In [11]:
dp = pd.read_csv('t_play_by_play_penalty_detail_o.csv')
dp = dp.sort_values(['Season', 'GameNumber', 'EventNumber'], ascending=[True, True, True])
dp = dp.rename(columns={'PenaltyPlayerLName': 'PenaltyPlayerName', 'DrawnByPlayerLName': 'DrawnByPlayerName'})

In [12]:
dd = pd.read_csv('t_game_detail_o.csv')

# merge event details into play by play

In [13]:
len(dm)

399135

In [14]:
dz = pd.merge(dm,dg, on=['Season', 'GameNumber', 'EventNumber'], how='left')
dz = pd.merge(dm,dt, on=['Season', 'GameNumber', 'EventNumber'], how='left')
dz = pd.merge(dm,ds, on=['Season', 'GameNumber', 'EventNumber'], how='left')
dz = pd.merge(dm,dn, on=['Season', 'GameNumber', 'EventNumber'], how='left')
dz = pd.merge(dm,db, on=['Season', 'GameNumber', 'EventNumber'], how='left')
dz = pd.merge(dm,df, on=['Season', 'GameNumber', 'EventNumber'], how='left')
dz = pd.merge(dm,dp, on=['Season', 'GameNumber', 'EventNumber'], how='left')
dz = pd.merge(dm,dl, on=['Season', 'GameNumber', 'EventNumber'], how='left')
del dl, dp, df, db, dn, ds, dt, dg

In [15]:
len(dm)

399135

In [16]:
dm.columns

Index(['Season', 'GameNumber', 'EventNumber', 'Period', 'AdvantageType',
       'EventTimeFromZero', 'EventTimeFromTwenty', 'EventType', 'EventDetail',
       'VPlayer1', 'VPlayer1Position', 'VPlayer2', 'VPlayer2Position',
       'VPlayer3', 'VPlayer3Position', 'VPlayer4', 'VPlayer4Position',
       'VPlayer5', 'VPlayer5Position', 'VPlayer6', 'VPlayer6Position',
       'HPlayer1', 'HPlayer1Position', 'HPlayer2', 'HPlayer2Position',
       'HPlayer3', 'HPlayer3Position', 'HPlayer4', 'HPlayer4Position',
       'HPlayer5', 'HPlayer5Position', 'HPlayer6', 'HPlayer6Position'],
      dtype='object')

# remove irrelevant observations

In [17]:
dm = dm[dm['GameNumber'] <= 20002]
dm = dm[dm['EventType']!='STOP']

# man-advantage scenarios

In [18]:
value_list = ['PP', 'SH']
dm[dm['AdvantageType'].isin(value_list)]
dm = dm[dm['AdvantageType'] != 'PP']
dm = dm[dm['AdvantageType'] != 'SH']
dm['AdvantageType'] = dm['AdvantageType'].fillna('EV')

# generate time between events

In [19]:
# set_index (unit of observation: season, gn, en)
dm['TimeBetweenEvent'] = dm['EventTimeFromZero'] - dm['EventTimeFromZero'].shift(1)
dm['TimeBetweenEvent'] = dm['TimeBetweenEvent'].abs()

In [20]:
dm.to_csv('pbpmerge.csv', index='False', sep=',')

Unnamed: 0,Season,GameNumber,EventNumber,Period,AdvantageType,EventTimeFromZero,EventTimeFromTwenty,EventType,EventDetail,VPlayer1,...,HPlayer2Position,HPlayer3,HPlayer3Position,HPlayer4,HPlayer4Position,HPlayer5,HPlayer5Position,HPlayer6,HPlayer6Position,TimeBetweenEvent
0,2010,20001,1,1,EV,0,1200,FAC,MTL won Neu. Zone - MTL #11 GOMEZ vs TOR #37 B...,11,...,R,11.0,L,3.0,D,22.0,D,35.0,G,
68,2010,20001,3,1,EV,15,1185,HIT,"TOR #37 BRENT HIT MTL #26 GORGES, Off. Zone",11,...,R,11.0,L,3.0,D,22.0,D,35.0,G,15.0
67,2010,20001,4,1,EV,46,1154,HIT,"MTL #14 PLEKANEC HIT TOR #2 SCHENN, Off. Zone",14,...,C,32.0,R,2.0,D,15.0,D,35.0,G,31.0
66,2010,20001,5,1,EV,57,1143,HIT,"MTL #76 SUBBAN HIT TOR #15 KABERLE, Neu. Zone",14,...,C,32.0,R,2.0,D,15.0,D,35.0,G,11.0
41,2010,20001,6,1,EV,69,1131,GIVE,"TOR&nbsp;GIVEAWAY - #35 GIGUERE, Def. Zone",14,...,C,32.0,R,2.0,D,15.0,D,35.0,G,12.0
21,2010,20001,7,1,EV,73,1127,BLOCK,"MTL #76 SUBBAN BLOCKED BY TOR #2 SCHENN, Wris...",14,...,C,32.0,R,2.0,D,15.0,D,35.0,G,4.0
90,2010,20001,8,1,EV,86,1114,SHOT,"MTL ONGOAL - #81 ELLER, Wrist, Off. Zone, 11 ft.",14,...,C,32.0,R,2.0,D,15.0,D,35.0,G,13.0
89,2010,20001,9,1,EV,91,1109,SHOT,"MTL ONGOAL - #46 KOSTITSYN, Snap, Off. Zone, 8...",14,...,C,32.0,R,2.0,D,15.0,D,35.0,G,5.0
20,2010,20001,10,1,EV,95,1105,BLOCK,"MTL #76 SUBBAN BLOCKED BY TOR #32 VERSTEEG, S...",14,...,C,32.0,R,2.0,D,15.0,D,35.0,G,4.0
74,2010,20001,11,1,EV,102,1098,MISS,"MTL #76 SUBBAN, Slap, Wide of Net, Off. Zone, ...",15,...,C,16.0,L,8.0,D,36.0,D,35.0,G,7.0
