# Home vs Away Stats

In [1]:
import os
import pandas as pd
import numpy as np
import pprint

In [2]:
# Read in data set
path = os.path.abspath(os.path.join(os.getcwd(), '..'))
path = os.path.abspath(os.path.join(path, 'data/df_all_events.pkl'))
print(path)

df = pd.read_pickle(path)
df.head()

/Users/williamshaw/Projects/NHL/Big-Data-Cup-2021/data/df_all_events.pkl


Unnamed: 0,game_date,home_team,away_team,period,clock,home_team_skaters,away_team_skaters,home_team_goals,away_team_goals,team,...,event,x_coord_1,y_coord_1,detail_1,detail_2,detail_3,detail_4,player_2,x_coord_2,y_coord_2
0,2019-09-20,Erie Otters,Sudbury Wolves,1,20:00,5,5,0,0,Sudbury Wolves,...,Faceoff Win,100,43,Forehand,,,,Connor Lockhart,,
1,2019-09-20,Erie Otters,Sudbury Wolves,1,19:58,5,5,0,0,Sudbury Wolves,...,Puck Recovery,67,77,,,,,,,
2,2019-09-20,Erie Otters,Sudbury Wolves,1,19:53,5,5,0,0,Sudbury Wolves,...,Incomplete Play,45,49,Direct,,,,Blake Murray,74.0,35.0
3,2019-09-20,Erie Otters,Sudbury Wolves,1,19:52,5,5,0,0,Erie Otters,...,Puck Recovery,89,36,,,,,,,
4,2019-09-20,Erie Otters,Sudbury Wolves,1,19:51,5,5,0,0,Erie Otters,...,Incomplete Play,82,49,Direct,,,,Connor Lockhart,94.0,46.0


In [3]:
# show different types of event details
df.groupby('event')['detail_1'].value_counts()

event            detail_1                 
Dump In/Out      Lost                          4143
                 Retained                       745
Faceoff Win      Backhand                      2179
                 Forehand                       245
                 Feet                            17
Goal             Snapshot                       148
                 Wristshot                      115
                 Deflection                      17
                 Slapshot                         8
                 Wrap Around                      5
Incomplete Play  Direct                        5692
                 Indirect                      3198
Penalty Taken    Roughing                        50
                 Slashing                        44
                 Tripping                        44
                 Hooking                         35
                 Fighting                        32
                 High-sticking                   32
                 Cros

In [4]:
# convert dataframe to iterable python dict
events = df.to_dict(orient='records')

In [5]:
# count the number of events per player per date --- this will track the number of games played per player
home_gp = {}
away_gp = {}
cur_date = ''
num_home_games = 0
num_away_games = 0

date_change = 0
for e in events:
    if e['game_date'] != cur_date:
        date_change += 1
        cur_date = e['game_date']
        if e['home_team'] == 'Erie Otters': 
            print('date changed to ' + cur_date + ' | Home game')
            num_home_games += 1
        else: 
            print('date changed to ' + cur_date + ' |              Away game')
            num_away_games += 1
        
    # we only want to track Erie players
    if e['team'] != 'Erie Otters':
        continue

    # figure out if this game is home or away
    home_or_away_gp = home_gp
    if e['home_team'] != 'Erie Otters':
        home_or_away_gp = away_gp

    # if player has appeared in any games yet, add him to the dict
    player_name = e['player_1']
    if player_name not in home_or_away_gp:
        home_or_away_gp[player_name] = {}
            
    # if player has not had any events for this game yet, add this game
    if cur_date not in home_or_away_gp[player_name]:
        home_or_away_gp[player_name][cur_date] = 1
    else: home_or_away_gp[player_name][cur_date] += 1
        
print('\ntotal games: ' + str(date_change))
print('total home games: ' + str(num_home_games))
print('total away games: ' + str(num_away_games))

date changed to 2019-09-20 | Home game
date changed to 2019-09-21 |              Away game
date changed to 2019-09-26 |              Away game
date changed to 2019-10-04 | Home game
date changed to 2019-10-11 | Home game
date changed to 2019-10-12 | Home game
date changed to 2019-10-17 |              Away game
date changed to 2019-10-18 |              Away game
date changed to 2019-10-19 | Home game
date changed to 2019-10-23 | Home game
date changed to 2019-10-26 | Home game
date changed to 2019-11-09 |              Away game
date changed to 2019-11-15 | Home game
date changed to 2019-11-17 |              Away game
date changed to 2019-11-21 |              Away game
date changed to 2019-11-22 |              Away game
date changed to 2019-12-08 | Home game
date changed to 2019-12-13 |              Away game
date changed to 2019-12-15 |              Away game
date changed to 2019-12-29 |              Away game
date changed to 2019-12-31 | Home game
date changed to 2020-01-03 |          

In [6]:
# print('Home games played for each player:')
# for key, value in home_gp.items():
#     print(str(key) + ' : ' + str(len(value)))
    
# print('\nAway games played for each player:')
# for key, value in away_gp.items():
#     print(str(key) + ' : ' + str(len(value)))

In [7]:
# count faceoff losses
erie_home_faceoff_losses = {}
erie_away_faceoff_losses = {}

for e in events:
    if e['event'] == 'Faceoff Win' and e['team'] != 'Erie Otters':
        
        # find out if home or away during faceoff loss
        home_or_away_dict = erie_home_faceoff_losses
        if e['home_team'] != 'Erie Otters':
            home_or_away_dict = erie_away_faceoff_losses
        
        # increment player's faceoff losses total for home/away games
        player_name = e['player_2']
        if player_name not in home_or_away_dict:
            home_or_away_dict[player_name] = 0
        home_or_away_dict[player_name] += 1

In [8]:
pprint.pprint(erie_home_faceoff_losses)
print()
pprint.pprint(erie_away_faceoff_losses)

{'Alex Gritz': 3,
 'Austen Swankler': 86,
 'Brendan Hoffmann': 126,
 'Brendan Sellan': 1,
 'Chad Yetman': 184,
 'Connor Lockhart': 70,
 'Danial Singer': 3,
 "Daniel D'Amato": 6,
 'Elias Cohen': 31,
 'Emmett Sproule': 4,
 'Hayden Fowler': 59,
 'Jamie Drysdale': 1,
 'Kyen Sopa': 2,
 'Matthew MacDougall': 1,
 'Maxim Golod': 2,
 'Noah Sedore': 13}

{'Alex Gritz': 8,
 'Austen Swankler': 79,
 'Brendan Hoffmann': 165,
 'Brendan Sellan': 5,
 'Brett Bressette': 6,
 'Chad Yetman': 197,
 'Connor Lockhart': 45,
 'Danial Singer': 3,
 "Daniel D'Amato": 22,
 'Elias Cohen': 61,
 'Emmett Sproule': 7,
 'Hayden Fowler': 102,
 'Jamie Drysdale': 1,
 'Kyen Sopa': 6,
 'Matthew MacDougall': 3,
 'Maxim Golod': 11,
 'Noah Sedore': 25}


In [9]:
# count home and away stats of Erie Otters players in separate dicts
home_stats_erie = {}
away_stats_erie = {}
new_player_dict = {
    'dump_retained': 0,       # successful dump in attempt
    'dump_lost': 0,           # unsuccessful dump in attempt
    'faceoff_win': 0,         # face off won
    'faceoff_lost': 0,        # face off lost
    'goals': 0,               # goals
    'shots': 0,               # total shots
    'shots_on_net': 0,        # number of shots on goal
    'shots_missed': 0,        # number of attempted shots that miss net
    'shots_blocked': 0,       # number of attempted shots that are blocked
    'zone_entry_carried': 0,  # puck carried into offensive zone
    'zone_entry_dumped': 0,   # puck dumped into offesnive zone
    'zone_entry_played': 0,   # puck played into offesnive zone via pass
    'penalty_taken': 0,       # number of penalties taken
    'play_indirect': 0,       # successful indirect passes (i.e. pass rimmed of boards)
    'play_direct': 0,         # successful direct passes (i.e. tape to tape pass)
    'inc_play_indirect': 0,   # unsuccessful indirect passes (i.e. pass rimmed of boards)
    'inc_play_direct': 0,     # unsuccessful direct passes (i.e. tape to tape pass)
    'games_played': 0
}

for e in events:
    if e['team'] == 'Erie Otters':
        # found Erie player! now figure out if this event belongs to the home or away dataset
        home_or_away_dict = home_stats_erie
        if e['home_team'] != 'Erie Otters':
            home_or_away_dict = away_stats_erie
        
        # if this player has not been seen yet, add them to the dict so we can track their stats
        player_name = e['player_1']
        if player_name not in home_or_away_dict:
            home_or_away_dict[player_name] = new_player_dict.copy()  # add player to dict
        
        # identify event that happend
        event_name = ''
        if e['event'] == 'Dump In/Out' and e['detail_1'] == 'Lost': event_name = 'dump_lost'
        elif e['event'] == 'Dump In/Out' and e['detail_1'] == 'Retained': event_name = 'dump_retained'
        elif e['event'] == 'Faceoff Win': event_name = 'faceoff_win'
        elif e['event'] == 'Goal': event_name = 'goals'   
        elif e['event'] == 'Shot': 
            home_or_away_dict[player_name]['shots'] += 1
            if e['detail_2'] == 'On Net': event_name = 'shots_on_net'
            elif e['detail_2'] == 'Missed': event_name = 'shots_missed'
            elif e['detail_2'] == 'Blocked': event_name = 'shots_blocked'
        elif e['event'] == 'Zone Entry' and e['detail_1'] == 'Carried': event_name = 'zone_entry_carried'
        elif e['event'] == 'Zone Entry' and e['detail_1'] == 'Dumped': event_name = 'zone_entry_dumped'
        elif e['event'] == 'Zone Entry' and e['detail_1'] == 'Played': event_name = 'zone_entry_played'
        elif e['event'] == 'Penalty Taken': event_name = 'penalty_taken'
        elif e['event'] == 'Play' and e['detail_1'] == 'Indirect': event_name = 'play_indirect'
        elif e['event'] == 'Play' and e['detail_1'] == 'Direct': event_name = 'play_direct'
        elif e['event'] == 'Incomplete Play' and e['detail_1'] == 'Indirect': event_name = 'inc_play_indirect'
        elif e['event'] == 'Incomplete Play' and e['detail_1'] == 'Direct': event_name = 'inc_play_direct'
            
        
        # if identified an event we care about, increment that stat
        if len(event_name) > 0:
            home_or_away_dict[player_name][event_name] += 1
        
# append  games played and faceoffs lost to correct dataframes
for player_name in home_stats_erie:
    # home stats
    if player_name in home_gp:
        home_stats_erie[player_name]['games_played'] = len(home_gp[player_name])
    if player_name in erie_home_faceoff_losses:
        home_stats_erie[player_name]['faceoff_lost'] = erie_home_faceoff_losses[player_name]
    
    # away stats
    if player_name in away_gp:
        away_stats_erie[player_name]['games_played'] = len(away_gp[player_name])
    if player_name in erie_away_faceoff_losses:
        away_stats_erie[player_name]['faceoff_lost'] = erie_away_faceoff_losses[player_name]
        


In [10]:
for e in events:
    if e['player_1'] == 'Jamie Drysdale' and e['event'] == 'Goal':
        pprint.pprint(e['home_team'])

'Hamilton Bulldogs'
'Niagara Ice Dogs'
'Niagara Ice Dogs'
'Guelph Storm'
'Erie Otters'
'Kitchener Rangers'


In [11]:
# df_home = pd.DataFrame(home_stats_erie).transpose()
# df_away = pd.DataFrame(away_stats_erie).transpose()

# df_home[df_home['games_played'] >= 5]
df_home = pd.DataFrame.from_dict(home_stats_erie, orient='index')
df_home_per_game = df_home.div(df_home['games_played'], axis=0)

df_away = pd.DataFrame.from_dict(away_stats_erie, orient='index')
df_away_per_game = df_away.div(df_away['games_played'], axis=0)

df_diff = df_home_per_game - df_away_per_game
df_diff['games_played'] = df_home['games_played'] + df_away['games_played']
df_diff['games_played_home'] = df_home['games_played']
df_diff['games_played_away'] = df_away['games_played']
# display(df_home)
# display(df_away)

In [17]:
df_home

Unnamed: 0,dump_retained,dump_lost,faceoff_win,faceoff_lost,goals,shots,shots_on_net,shots_missed,shots_blocked,zone_entry_carried,zone_entry_dumped,zone_entry_played,penalty_taken,play_indirect,play_direct,inc_play_indirect,inc_play_direct,games_played
Hayden Fowler,13,42,70,59,7,63,30,20,13,69,23,9,3,78,212,26,140,17
Jamie Drysdale,13,48,0,1,1,107,43,30,34,36,22,12,2,146,438,54,91,15
Kurtis Henry,14,78,0,0,1,75,26,25,24,13,26,3,4,150,248,65,73,19
Brendan Sellan,18,86,1,1,5,72,37,21,14,54,51,11,4,94,155,49,62,19
Connor Lockhart,11,41,50,70,2,57,31,19,7,40,33,5,4,75,157,21,62,18
Danial Singer,2,21,1,3,1,46,17,12,17,25,13,6,2,21,66,6,22,10
Jacob Golden,8,57,0,0,1,66,21,19,26,37,28,1,0,101,324,55,72,17
Austen Swankler,12,59,84,86,6,91,49,25,17,93,45,11,8,102,256,45,111,18
Emmett Sproule,9,59,3,4,3,70,33,18,19,76,47,8,2,68,202,30,94,19
Jack Duff,12,79,0,0,2,78,28,24,26,14,27,7,12,113,262,61,72,19


In [208]:
df_diff[df_diff['games_played'] >= 8].mean(axis=0)


dump_retained         -0.012076
dump_lost             -0.085745
faceoff_win            0.134902
faceoff_lost          -0.255175
goals                 -0.042511
shots                  0.318385
shots_on_net           0.146620
shots_missed           0.131578
shots_blocked          0.040187
zone_entry_carried     0.110186
zone_entry_dumped      0.089019
zone_entry_played      0.075263
penalty_taken         -0.052666
play_indirect          0.389694
play_direct            0.310010
inc_play_indirect      0.053071
inc_play_direct        0.214446
games_played          30.040000
games_played_home     14.440000
games_played_away     15.600000
dtype: float64