In [1]:
import os
import sys
import getopt
import json
import requests
import math
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
from sklearn.linear_model import LinearRegression

In [None]:
def get_shots_info(game_id):
    game = requests.get("https://statsapi.web.nhl.com/api/v1/game/"+str(game_id)+"/feed/live")
    game = game.json()
    playTypes = ["Blocked Shot","Shot","Goal","Missed Shot"]
    all_shots = []
    for play in game['liveData']['plays']['allPlays']:
        if play['result']['event'] in playTypes:
            all_shots.append(play)
    away_team_id = game['gameData']['teams']['away']['id']
    home_team_id = game['gameData']['teams']['home']['id']
    shots_df = pd.DataFrame(columns=["gameID","teamID","opponentID","playerName","playerID","result","Xcoord","Ycoord","period","time","shooterTeamGoals","opponentGoals", "goalie"])
    for shot in all_shots:
        # print(json.dumps(shot,indent=2))
        # print(num)
        dict_to_append = { }
        dict_to_append['gameID'] = game_id
        for player in shot['players']:
            if (player['playerType']=='Shooter' or player['playerType']=='Scorer'):
                dict_to_append['playerName'] = player['player']['fullName']
                dict_to_append['playerID'] = player['player']['id']
            if (player['playerType']=='Goalie'):
                dict_to_append['goalie'] = player['player']['fullName']
        if ('event' in shot['result']):
            dict_to_append['result'] = shot['result']['event']
        if ('x' in shot['coordinates'] and 'y' in shot['coordinates']):
            dict_to_append['Xcoord'] = shot['coordinates']['x']
            dict_to_append['Ycoord'] = shot['coordinates']['y']
        if ('period' in shot['about']):
            dict_to_append['period'] = shot['about']['period']
        if ('periodTime' in shot['about']):
            dict_to_append['time'] = shot['about']['periodTime']
        if ('goals' in shot['about'] and 'id' in shot['team'] and shot['team']['id'] == home_team_id):
            dict_to_append['shooterTeamGoals'] = shot['about']['goals']['home']
            dict_to_append['opponentGoals'] = shot['about']['goals']['away']
            dict_to_append['opponentID'] = away_team_id
            dict_to_append['teamID'] = home_team_id
        elif ('goals' in shot['about']):
            dict_to_append['shooterTeamGoals'] = shot['about']['goals']['away']
            dict_to_append['opponentGoals'] = shot['about']['goals']['home']
            dict_to_append['opponentID'] = home_team_id
            dict_to_append['teamID'] = away_team_id
        # print(pd.DataFrame(dict_to_append, columns=dict_to_append.keys(),index=[0]))
        shots_df = shots_df.append(dict_to_append, ignore_index=True)
        # print(shots_df)
    return shots_df

In [None]:
def main():
    teams = requests.get("https://statsapi.web.nhl.com/api/v1/teams")
    teams = teams.json()
    teams_by_id = {}
    for team in teams['teams']:
        teams_by_id[team['id']] = team['name']
    schedule = requests.get("https://statsapi.web.nhl.com/api/v1/schedule?startDate=2018-10-03&endDate=2019-04-06")
    schedule = schedule.json()
    shots_df = pd.DataFrame(columns=["gameID","teamID","opponentID","playerName","playerID","result","Xcoord","Ycoord","period","time","shooterTeamGoals","opponentGoals", "goalie"])
    for date in schedule['dates']:
        for game in date['games']:
            game_id = game['gamePk']
            shots_df = shots_df.append(get_shots_info(game_id), ignore_index=True)
    return shots_df


In [None]:
shots = main()
shots.head

In [40]:
get_boxscores()

{'copyright': 'NHL and the NHL Shield are registered trademarks of the National Hockey League. NHL and NHL team marks are the property of the NHL and its teams. © NHL 2020. All Rights Reserved.',
 'teams': {'away': {'team': {'id': 14,
    'name': 'Tampa Bay Lightning',
    'link': '/api/v1/teams/14'},
   'teamStats': {'teamSkaterStats': {'goals': 5,
     'pim': 10,
     'shots': 26,
     'powerPlayPercentage': '66.7',
     'powerPlayGoals': 2.0,
     'powerPlayOpportunities': 3.0,
     'faceOffWinPercentage': '50.8',
     'blocked': 7,
     'takeaways': 8,
     'giveaways': 8,
     'hits': 19}},
   'players': {'ID8471958': {'person': {'id': 8471958,
      'fullName': 'Dan Girardi',
      'link': '/api/v1/people/8471958',
      'firstName': 'Dan',
      'lastName': 'Girardi',
      'primaryNumber': '5',
      'birthDate': '1984-04-29',
      'currentAge': 35,
      'birthCity': 'Welland',
      'birthStateProvince': 'ON',
      'birthCountry': 'CAN',
      'nationality': 'CAN',
      'h

In [58]:
get_boxscores('20142015')

Unnamed: 0,game_id,playerName,playerID,playerTeam,TOI,team_goals
0,2014020026,Mark Giordano,8470966,away,21:13,1
1,2014020026,Deryk Engelland,8468674,away,14:31,1
2,2014020026,Kris Russell,8471729,away,19:57,1
3,2014020026,Mason Raymond,8471664,away,17:02,1
4,2014020026,Paul Byron,8474038,away,16:38,1
5,2014020026,Ladislav Smid,8471222,away,20:19,1
6,2014020026,Lance Bouma,8474642,away,13:56,1
7,2014020026,Joe Colborne,8474577,away,16:06,1
8,2014020026,Brandon Bollig,8475650,away,11:09,1
9,2014020026,TJ Brodie,8474673,away,22:20,1


In [2]:
# get all box scores for season passed in (string key in dict season_dates) 
def get_boxscores(season):
    teams = requests.get("https://statsapi.web.nhl.com/api/v1/teams")
    teams = teams.json()
    teams_by_id = {}
    for team in teams['teams']:
        teams_by_id[team['id']] = team['name']
    schedule = requests.get("https://statsapi.web.nhl.com/api/v1/schedule?startDate={}&endDate={}".format(season_dates[season]['start'], season_dates[season]['end']))
    schedule = schedule.json()
    df = pd.DataFrame(columns=['game_id','playerName','playerID','playerTeam','TOI','team_goals'])
    for date in tqdm(schedule['dates']):
        for game in date['games']:
            game_id = game['gamePk']
            box = requests.get("https://statsapi.web.nhl.com/api/v1/game/{0}/boxscore".format(game_id)).json()
            df = df.append(get_game(df,box,game_id),ignore_index=True)
    return df
            #return box

In [9]:
# get player TOI, team goals for a single game
def get_game(df, box, game_id):
    dict_to_append = { }
    dict_to_append['game_id'] = game_id
    
    for team in box['teams']:
        dict_to_append['playerTeam'] = team
        dict_to_append['team_goals'] = box['teams'][team]['teamStats']['teamSkaterStats']['goals']
        count = 0
        for player in box['teams'][team]['players']:
            if (box['teams'][team]['players'][player]['position']['code'] == 'G' or
               len(box['teams'][team]['players'][player]['stats']) == 0):
                continue
            dict_to_append['playerID'] = box['teams'][team]['players'][player]['person']['id']
            dict_to_append['playerName'] = box['teams'][team]['players'][player]['person']['fullName']
            dict_to_append['TOI'] = box['teams'][team]['players'][player]['stats']['skaterStats']['timeOnIce']
            df = df.append(dict_to_append, ignore_index=True)
    return df

In [4]:
season_dates = {'20142015': {'start': '2014-10-11', 'end': '2015-04-11'},
                '20152016': {'start': '2015-10-07', 'end': '2016-04-10'},
                '20162017': {'start': '2016-10-12', 'end': '2017-04-09'},
                '20172018': {'start': '2017-10-04', 'end': '2018-04-08'},
                '20182019': {'start': '2018-10-03', 'end': '2019-04-06'}}

In [5]:
# get the players sorted by teams for last 5 seasons up to & including season
# lets store these in a dataframe with index1 as year, index2 as team
# should i replace ixG with team xG?
def get_skater_stats():
    pl_b_t_s = pd.read_csv('20142019.csv',usecols=['Player', 'Season', 'Team', 'GP', 'TOI', 'ixG', 'GIVE', 'TAKE'],index_col=['Team', 'Season'])
    pl_b_t_s['TOIpG'] = pl_b_t_s['TOI']/pl_b_t_s['GP'] # time on ice per game
    pl_b_t_s['ixGpG'] = pl_b_t_s['ixG']/pl_b_t_s['GP'] # individual expected goals per game (should be minute?)
    pl_b_t_s['GIVEpG'] = pl_b_t_s['GIVE']/pl_b_t_s['GP'] # penalties given per game (should be minute?)
    pl_b_t_s['TAKEpG'] = pl_b_t_s['TAKE']/pl_b_t_s['GP'] # penalties taken per game (should be minute?)
    pl_b_t_s.drop(['TOI','GP','GIVE','TAKE','Player','ixG'], axis=1, inplace=True)
    return pl_b_t_s

In [6]:
def get_goalie_stats():
    gl_stats = pd.read_csv('goalies_20142019.csv',usecols=['Player', 'Season', 'Team', 'TOI', 'xGA'], index_col=['Team', 'Season'])
    gl_stats['xGApMin'] = gl_stats['xGA']/gl_stats['TOI']
    return gl_stats

In [7]:
def get_team_stats():
    skater_stats = get_skater_stats()
    team_stats = skater_stats.groupby(level=['Team','Season']).sum()
    return team_stats

In [None]:
# get total xGF (expected goals for) for players by team, weighted by avg TOI, 
# for up to & including the season in the parameter
#def get_xGF(season):
#    if(season=='2019'):
#        pl_b_t_s = pd.read_csv('GAR20142019.csv',usecols=['Player', 'Season', 'Team', 'GP', 'TOI', ])
#    xGAR_team

In [None]:
get_team_stats()

In [None]:
get_skater_stats()

In [None]:
# do regression to determine weights to give each of the last 5 seasons of PIM
# split into: players in 2nd season, players in 3rd ssn, players in 4th ssn, 
# under 30 w/ 5+ seasons, 30-33 w/ 5+ seasons, 34+ w/5+season
def PIM_weights():

In [None]:
# get penalty minutes per season for players 
# ***do a regression of last 5 seasons to determine weights**** 
# for passed in data frame
def get_PIM(df):

In [None]:
# do regression to determine weights to give each of the last 5 seasons of PIM
# split into: players in 2nd season, players in 3rd ssn, players in 4th ssn, 
# under 30 w/ 5+ seasons, 30-33 w/ 5+ seasons, 34+ w/5+season
def xGA_weights():

In [None]:
# get expected goals against for goalies for the passed in dataframe
# use regression to determine weights
def get_xGA(df):

In [None]:
# get total number of goals, minutes played (?) per player 
def get_game_results():
    

In [None]:
# run model - target is num goals, 
# parameters are home xG, away xG, home goalie xGA, away goalie xGA, home penalty minutes, away penalty minutes, home rest, away rest
# how can I take skater defense into account?
def regress(df):
    y = df['goals']

In [None]:
def basic():
    boxes = get_boxscores('20142015')
    boxes = boxes.append(get_boxscores('20152016'), ignore_index=True)
    boxes = boxes.append(get_boxscores('20162017'), ignore_index=True)
    boxes = boxes.append(get_boxscores('20172018'), ignore_index=True)
    boxes = boxes.append(get_boxscores('20182019'), ignore_index=True)
    return boxes

basic()



  0%|          | 0/176 [00:00<?, ?it/s][A[A

  1%|          | 1/176 [00:26<1:16:15, 26.14s/it][A[A

  1%|          | 2/176 [01:12<1:33:34, 32.27s/it][A[A

  2%|▏         | 3/176 [4:13:11<219:03:04, 4558.29s/it][A[A

In [None]:
# test model on 2019-2020 season data
def test():