In [1]:
from nhlpy import NHLClient
import pandas as pd

In [3]:
client = NHLClient()
teams = client.teams.teams_info()
team_abbrs = []
for item in teams:
    abbr = item['abbr']
    team_abbrs.append(abbr)

game_ids = []
seasons = [20222023, 20232024, 20242025]

for abbreviation in team_abbrs:
    for season in seasons:

        if abbreviation == 'UTA' and season != 20242025:
            abbreviation = 'ARI'

        games = client.schedule.get_season_schedule(team_abbr=abbreviation, season=season)['games']

        for game in games:
            if game['gameType'] == 1:
                continue

            id = game['id']
            if id not in [game_ids]:
                game_ids.append(id)

In [4]:
def second_diff(time1, time2):
    minutes1 = int(time1[0:2])
    minutes2 = int(time2[0:2])
    seconds1 = int(time1[3:5])
    seconds2 = int(time2[3:5])
    return abs((minutes2 * 60 + seconds2) - (minutes1 * 60 + seconds1))

In [11]:
rows = []
for game_id in game_ids:
    home_id = client.game_center.play_by_play(game_id=game_id)['homeTeam']['id']
    away_id = client.game_center.play_by_play(game_id=game_id)['awayTeam']['id']
    pbp = client.game_center.play_by_play(game_id=game_id)['plays']
    idx = -1

    for play in pbp:
        idx += 1
        if play['typeDescKey'] not in ['missed-shot', 'goal', 'shot-on-goal']:
            continue
        home = 0
        away = 0
        rebound = 0
        rush = 0
        try:
            if home_id == play['details']['eventOwnerTeamId']:
                home = 1
            else:
                away = 1

            if (home == 1 and play['situationCode'][0] == '0') or (away == 1 and play['situationCode'][3] == '0'):
                continue

            if home == 1:
                team_id = home_id
            else:
                team_id = away_id
                
            time_diff = second_diff(play['timeInPeriod'], pbp[idx - 1]['timeInPeriod'])

            if pbp[idx - 1]['typeDescKey'] == 'blocked-shot' and time_diff <= 2:
                rebound = 1
            
            if (pbp[idx-1]['typeDescKey'] in ['missed-shot', 'shot-on-goal']) and time_diff <= 3:
                rebound = 1

            if (pbp[idx-1]['typeDescKey'] in ['takeaway', 'giveaway']) and time_diff <= 4 and pbp[idx-1]['details']['zoneCode'] in ['N', 'D']:
                rush = 1

            assist1 = 0
            assist2 = 0
            home_skaters = play['situationCode'][2]
            away_skaters = play['situationCode'][1]
            shot_class = play['typeDescKey']
            x_coord = abs(play['details']['xCoord'])
            y_coord = play['details']['yCoord']
            shot_type = play['details']['shotType']
            shooter = None
            goalie = play['details']['goalieInNetId']
            last_play = pbp[idx-1]['typeDescKey']

            if shot_class == 'goal':

                shooter = play['details']['scoringPlayerId']

                if 'assist1PlayerId' in play['details']:
                    assist1 = play['details']['assist1PlayerId']

                if 'assist2PlayerId' in play['details']:
                    assist2 = play['details']['assist2PlayerId']
                    
            else:

                shooter = play['details']['shootingPlayerId']

            rows.append([game_id, team_id, home, last_play, rebound, rush, home_skaters, away_skaters, x_coord, y_coord, shooter, assist1, assist2, goalie, shot_type, shot_class])
        except:
            continue

In [13]:
header = ['game_id', 'team_id', 'home', 'last_play', 'rebound', 'rush', 'home_skaters', 'away_skaters', 'x_coord', 'y_coord', 'shooter', 'assist1', 'assist2', 'goalie', 'shot_type', 'shot_class']

In [18]:
df = pd.DataFrame(rows, columns=header)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 586565 entries, 0 to 586564
Data columns (total 16 columns):
 #   Column        Non-Null Count   Dtype 
---  ------        --------------   ----- 
 0   game_id       586565 non-null  int64 
 1   team_id       586565 non-null  int64 
 2   home          586565 non-null  int64 
 3   last_play     586565 non-null  object
 4   rebound       586565 non-null  int64 
 5   rush          586565 non-null  int64 
 6   home_skaters  586565 non-null  object
 7   away_skaters  586565 non-null  object
 8   x_coord       586565 non-null  int64 
 9   y_coord       586565 non-null  int64 
 10  shooter       586565 non-null  int64 
 11  assist1       586565 non-null  int64 
 12  assist2       586565 non-null  int64 
 13  goalie        586565 non-null  int64 
 14  shot_type     586565 non-null  object
 15  shot_class    586565 non-null  object
dtypes: int64(11), object(5)
memory usage: 71.6+ MB


In [16]:
df.to_csv('nhl_shot_data.csv')