In [1]:
import nhl 
import requests
import pandas as pd
from sqlalchemy import create_engine
from credentials import HOCKEY_DB_NAME, HOCKEY_DB_PWD
from datetime import date, timedelta
from pytz import timezone

from schedule import parse_schedule, get_schedule
from play import Shot, Goal, Penalty, Faceoff, Other, process_game, process_play_bundle
from boxscore import process_player_boxscore, process_team_boxscore

In [2]:
BASE = "http://statsapi.web.nhl.com/api/v1"

### Schedule 

In [3]:
engine = create_engine('postgresql://baseball:{}@localhost:5432/{}'.format(HOCKEY_DB_PWD, HOCKEY_DB_NAME))

res = engine.execute("DROP TABLE IF EXISTS nhl_schedule")
res.close()

In [4]:
schedule = get_schedule(startdate = '2019-01-01', enddate = date.today() - timedelta(days = 1))

In [5]:
schedule = schedule[schedule['game_type'] == 'R']

In [6]:
schedule['game_date'] = pd.to_datetime(schedule['game_date'])
schedule['game_date'] = schedule['game_date'].apply(lambda x: x.astimezone(timezone('US/Eastern')))
schedule['game_date'] = schedule['game_date'].apply(lambda x: x.tz_localize(None))

In [7]:
schedule.to_sql('nhl_schedule', con = engine, index = False, if_exists = 'append')

350

### Boxscores

In [8]:
#game_pks = pd.read_sql("""SELECT DISTINCT s.game_pk FROM nhl_schedule s LEFT JOIN nhl_team_boxscore b ON s.game_pk = b.game_pk;""", engine)

In [9]:
#res = engine.execute("DROP TABLE IF EXISTS nhl_team_boxscore")
#res.close()

#res = engine.execute("DROP TABLE IF EXISTS nhl_player_boxscore")
#res.close()

In [10]:
game_pks = pd.read_sql("""SELECT DISTINCT s.game_pk FROM nhl_schedule s LEFT JOIN nhl_team_boxscore b ON s.game_pk = b.game_pk WHERE b.game_pk IS NULL;""", engine)

In [11]:
n_games = game_pks.shape[0]

n_games

7

In [12]:
for i, g in enumerate(game_pks['game_pk'].values):
    s = requests.get('{}/game/{}/boxscore'.format(BASE, g)).json()
    
    try:
        team = process_team_boxscore(s['teams'])
        team['game_pk'] = g
        team.to_sql('nhl_team_boxscore', engine, index = False, if_exists = 'append')
        
        away_players = pd.concat([process_player_boxscore(p) for p in s['teams']['away']['players'].values()]).reset_index()
        away_players['team'] = team['away_team'].iloc[0]
        away_players['opposing_team'] = team['home_team'].iloc[0]
 
        
        home_players = pd.concat([process_player_boxscore(p) for p in s['teams']['home']['players'].values()]).reset_index()
        home_players['team'] = team['home_team'].iloc[0]
        home_players['opposing_team'] = team['away_team'].iloc[0]
        
        away_players['opposing_goalie'] = home_players.query('is_goalie == True').sort_values('timeOnIce', ascending=False).iloc[0]['player_name']
        home_players['opposing_goalie'] = away_players.query('is_goalie == True').sort_values('timeOnIce', ascending=False).iloc[0]['player_name']
        
        players = pd.concat([away_players, home_players]).reset_index()
        players['game_pk'] = g
        players = players[~(pd.isnull(players['timeOnIce']))]
        players['time_on_ice'] = players['timeOnIce'].apply(lambda x: int(x.split(':')[0]) * 60 + int(x.split(':')[1]))
        players = players.query('time_on_ice > 300')
        
        del players['timeOnIce']
        del players['level_0']
        del players['index']
        
        players.to_sql('nhl_player_boxscore', engine, index = False, if_exists = 'append')
    except:
        pass
    
    if i % 20 == 0:
        print(i / n_games)

0.0


In [13]:
g = requests.get('{}/game/{}/boxscore'.format(BASE, g)).json()

In [14]:
g

{'copyright': 'NHL and the NHL Shield are registered trademarks of the National Hockey League. NHL and NHL team marks are the property of the NHL and its teams. © NHL 2022. All Rights Reserved.',
 'teams': {'away': {'team': {'id': 3,
    'name': 'New York Rangers',
    'link': '/api/v1/teams/3'},
   'teamStats': {'teamSkaterStats': {'goals': 5,
     'pim': 13,
     'shots': 37,
     'powerPlayPercentage': '40.0',
     'powerPlayGoals': 2.0,
     'powerPlayOpportunities': 5.0,
     'faceOffWinPercentage': '45.9',
     'blocked': 26,
     'takeaways': 6,
     'giveaways': 9,
     'hits': 32}},
   'players': {'ID8482073': {'person': {'id': 8482073,
      'fullName': 'Braden Schneider',
      'link': '/api/v1/people/8482073',
      'firstName': 'Braden',
      'lastName': 'Schneider',
      'primaryNumber': '4',
      'birthDate': '2001-09-20',
      'currentAge': 21,
      'birthCity': 'Prince Albert',
      'birthStateProvince': 'SK',
      'birthCountry': 'CAN',
      'nationality': 'CA

### Play by Play

In [15]:
#res = engine.execute("DROP TABLE IF EXISTS nhl_play")
#res.close()

In [16]:
n_games = game_pks.shape[0]
for i, g in enumerate(game_pks['game_pk'].values):
    d = process_game(g)

    if d is not None:
        d['game_pk'] = g
        d.to_sql('nhl_play', engine, index = False, if_exists = 'append')
        
    if i % 20 == 0:
        print(i / n_games)

0.0
