In [2]:
import os
import sys
import getopt
import json
import requests
import math
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [4]:
def get_shots_info(game_id):
    game = requests.get("https://statsapi.web.nhl.com/api/v1/game/"+str(game_id)+"/feed/live")
    game = game.json()
    playTypes = ["Blocked Shot","Shot","Goal","Missed Shot"]
    all_shots = []
    for play in game['liveData']['plays']['allPlays']:
        if play['result']['event'] in playTypes:
            all_shots.append(play)
    away_team_id = game['gameData']['teams']['away']['id']
    home_team_id = game['gameData']['teams']['home']['id']
    shots_df = pd.DataFrame(columns=["gameID","teamID","opponentID","playerName","playerID","result","Xcoord","Ycoord","period","time","shooterTeamGoals","opponentGoals", "goalie"])
    for shot in all_shots:
        # print(json.dumps(shot,indent=2))
        # print(num)
        dict_to_append = { }
        dict_to_append['gameID'] = game_id
        for player in shot['players']:
            if (player['playerType']=='Shooter' or player['playerType']=='Scorer'):
                dict_to_append['playerName'] = player['player']['fullName']
                dict_to_append['playerID'] = player['player']['id']
            if (player['playerType']=='Goalie'):
                dict_to_append['goalie'] = player['player']['fullName']
        if ('event' in shot['result']):
            dict_to_append['result'] = shot['result']['event']
        if ('x' in shot['coordinates'] and 'y' in shot['coordinates']):
            dict_to_append['Xcoord'] = shot['coordinates']['x']
            dict_to_append['Ycoord'] = shot['coordinates']['y']
        if ('period' in shot['about']):
            dict_to_append['period'] = shot['about']['period']
        if ('periodTime' in shot['about']):
            dict_to_append['time'] = shot['about']['periodTime']
        if ('goals' in shot['about'] and 'id' in shot['team'] and shot['team']['id'] == home_team_id):
            dict_to_append['shooterTeamGoals'] = shot['about']['goals']['home']
            dict_to_append['opponentGoals'] = shot['about']['goals']['away']
            dict_to_append['opponentID'] = away_team_id
            dict_to_append['teamID'] = home_team_id
        elif ('goals' in shot['about']):
            dict_to_append['shooterTeamGoals'] = shot['about']['goals']['away']
            dict_to_append['opponentGoals'] = shot['about']['goals']['home']
            dict_to_append['opponentID'] = home_team_id
            dict_to_append['teamID'] = away_team_id
        # print(pd.DataFrame(dict_to_append, columns=dict_to_append.keys(),index=[0]))
        shots_df = shots_df.append(dict_to_append, ignore_index=True)
        # print(shots_df)
    return shots_df

In [5]:
def main():
    teams = requests.get("https://statsapi.web.nhl.com/api/v1/teams")
    teams = teams.json()
    teams_by_id = {}
    for team in teams['teams']:
        teams_by_id[team['id']] = team['name']
    schedule = requests.get("https://statsapi.web.nhl.com/api/v1/schedule?startDate=2018-10-03&endDate=2019-04-06")
    schedule = schedule.json()
    shots_df = pd.DataFrame(columns=["gameID","teamID","opponentID","playerName","playerID","result","Xcoord","Ycoord","period","time","shooterTeamGoals","opponentGoals", "goalie"])
    for date in schedule['dates']:
        for game in date['games']:
            game_id = game['gamePk']
            shots_df = shots_df.append(get_shots_info(game_id), ignore_index=True)
    return shots_df


In [6]:
shots = main()
shots.head

<bound method NDFrame.head of             gameID teamID opponentID          playerName playerID  \
0       2018020001      8         10    Artturi Lehkonen  8477476   
1       2018020001     10          8       Morgan Rielly  8476853   
2       2018020001      8         10   Brendan Gallagher  8475848   
3       2018020001     10          8  Jesperi Kotkaniemi  8480829   
4       2018020001     10          8          Jeff Petry  8473507   
...            ...    ...        ...                 ...      ...   
147196  2018021271     21         28    Nathan MacKinnon  8477492   
147197  2018021271     21         28   Gabriel Landeskog  8476455   
147198  2018021271     21         28   Gabriel Landeskog  8476455   
147199  2018021271     21         28      Carl Soderberg  8471262   
147200  2018021271     28         21       Micheal Haley  8474230   

              result  Xcoord  Ycoord period   time shooterTeamGoals  \
0               Shot    78.0   -19.0      1  00:29                0   

Unnamed: 0_level_0,Unnamed: 1_level_0,Player
Team,Season,Unnamed: 2_level_1
COL,16-17,A.J. Greer
COL,17-18,A.J. Greer
COL,18-19,A.J. Greer
FLA,14-15,Aaron Ekblad
FLA,15-16,Aaron Ekblad
...,...,...
BUF,14-15,Zemgus Girgensons
BUF,15-16,Zemgus Girgensons
BUF,16-17,Zemgus Girgensons
BUF,17-18,Zemgus Girgensons


Unnamed: 0_level_0,Unnamed: 1_level_0,xGA/60
Team,Season,Unnamed: 2_level_1
ANA,14-15,2.20
ARI,14-15,2.44
BOS,14-15,1.99
BUF,14-15,2.63
CAR,14-15,2.35
...,...,...
TOR,18-19,2.63
VAN,18-19,2.56
VGK,18-19,2.28
WPG,18-19,2.55


In [59]:
# get the players sorted by teams for last 5 seasons up to & including season
# lets store these in a dataframe with index1 as year, index2 as team
# should i replace ixG with team xG?
def get_skater_stats():
    pl_b_t_s = pd.read_csv('20142019.csv',usecols=['Player', 'Season', 'Team', 'GP', 'TOI', 'ixG', 'GIVE', 'TAKE'],index_col=['Team', 'Season'])
    pl_b_t_s['TOIpG'] = pl_b_t_s['TOI']/pl_b_t_s['GP'] # time on ice per game
    pl_b_t_s['ixGpG'] = pl_b_t_s['ixG']/pl_b_t_s['GP'] # individual expected goals per game (should be minute?)
    pl_b_t_s['GIVEpG'] = pl_b_t_s['GIVE']/pl_b_t_s['GP'] # penalties given per game (should be minute?)
    pl_b_t_s['TAKEpG'] = pl_b_t_s['TAKE']/pl_b_t_s['GP'] # penalties taken per game (should be minute?)
    pl_b_t_s.drop(['TOI','GP','GIVE','TAKE','Player','ixG'], axis=1, inplace=True)
    return pl_b_t_s

In [60]:
def get_goalie_stats():
    gl_stats = pd.read_csv('goalies_20142019.csv',usecols=['Player', 'Season', 'Team', 'TOI', 'xGA'], index_col=['Team', 'Season'])
    gl_stats['xGApMin'] = gl_stats['xGA']/gl_stats['TOI']
    return gl_stats

In [63]:
def get_team_stats():
    skater_stats = get_skater_stats()
    team_stats = skater_stats.groupby(level=['Team','Season']).sum()
    return team_stats

In [64]:
# get total xGF (expected goals for) for players by team, weighted by avg TOI, 
# for up to & including the season in the parameter
#def get_xGF(season):
#    if(season=='2019'):
#        pl_b_t_s = pd.read_csv('GAR20142019.csv',usecols=['Player', 'Season', 'Team', 'GP', 'TOI', ])
#    xGAR_team

In [65]:
get_team_stats()

Unnamed: 0_level_0,Unnamed: 1_level_0,TOIpG,ixGpG,GIVEpG,TAKEpG
Team,Season,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ANA,14-15,447.802023,3.098800,13.137067,8.125733
ANA,15-16,364.522430,2.914753,11.621591,6.201971
ANA,16-17,353.145437,2.822349,9.964968,5.507809
ANA,17-18,385.938425,2.934956,13.468714,7.652497
ANA,18-19,520.704017,3.874048,17.206454,10.838892
...,...,...,...,...,...
WSH,14-15,305.888736,2.279482,8.033977,7.392817
WSH,15-16,334.090355,2.467307,9.113687,7.553425
WSH,16-17,304.368227,2.341737,10.799457,8.714015
WSH,17-18,329.603939,2.278966,11.501763,7.067806


In [58]:
get_skater_stats()

Unnamed: 0_level_0,Unnamed: 1_level_0,TOIpG,ixGpMin,GIVEpMin,TAKEpMin
Team,Season,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
COL,16-17,12.760000,0.011129,0.031348,0.015674
COL,17-18,7.192353,0.005398,0.016357,0.024536
COL,18-19,6.253333,0.013433,0.010661,0.042644
FLA,14-15,18.342346,0.003069,0.025577,0.014134
FLA,15-16,17.667949,0.004586,0.026849,0.014513
...,...,...,...,...,...
BUF,14-15,14.658197,0.010233,0.013421,0.017894
BUF,15-16,12.866197,0.008057,0.018610,0.014231
BUF,16-17,11.895067,0.010413,0.017935,0.016814
BUF,17-18,11.820704,0.009318,0.013107,0.013107


In [None]:
# do regression to determine weights to give each of the last 5 seasons of PIM
# split into: players in 2nd season, players in 3rd ssn, players in 4th ssn, 
# under 30 w/ 5+ seasons, 30-33 w/ 5+ seasons, 34+ w/5+season
def PIM_weights():

In [None]:
# get penalty minutes per season for players 
# ***do a regression of last 5 seasons to determine weights**** 
# for passed in data frame
def get_PIM(df):

In [None]:
# do regression to determine weights to give each of the last 5 seasons of PIM
# split into: players in 2nd season, players in 3rd ssn, players in 4th ssn, 
# under 30 w/ 5+ seasons, 30-33 w/ 5+ seasons, 34+ w/5+season
def xGA_weights():

In [None]:
# get expected goals against for goalies for the passed in dataframe
# use regression to determine weights
def get_xGA(df):

In [None]:
# run model - target is num goals, 
# parameters are home xG, away xG, home goalie xGA, away goalie xGA, home penalty minutes, away penalty minutes, home rest, away rest
# how can I take skater defense into account?
def regress():

In [None]:
# test model on 2019-2020 season data
def test():