## Testing out some code to hit the NHL API


Resources

* https://towardsdatascience.com/nhl-analytics-with-python-6390c5d3206d
* https://nhl-api-explorer.herokuapp.com/
* https://gitlab.com/dword4/nhlapi
* https://gitlab.com/dword4/nhlapi/-/blob/master/stats-api.md#game-ids
* https://github.com/dword4/nhlapi

Game IDs
* The first 4 digits identify the season of the game (ie. 2017 for the 2017-2018 season). 
* The next 2 digits give the type of game, where 01 = preseason, 02 = regular season, 03 = playoffs, 04 = all-star. 
* The final 4 digits identify the specific game number. 
* For regular season and preseason games, this ranges from 0001 to the number of games played. (1271 for seasons with 31 teams (2017 and onwards) and 1230 for seasons with 30 teams). 
* For playoff games, the 2nd digit of the specific number gives the round of the playoffs, the 3rd digit specifies the matchup, and the 4th digit specifies the game (out of 7).

In [8]:
# Load dependencies
import requests
import pickle
import pandas as pd
import json

In [9]:
# Set up the API call variables
year = '2021'
season_type = '02' 
max_game_id = 1290
boxscore_url = '/boxscore'
livefeed_url = '/feed/live'
game_url = 'http://statsapi.web.nhl.com/api/v1/game/'

In [10]:
# Test hitting the boxscore and live feed endpoints, save as JSON files
def request_get_data(url, 
                    year, 
                    season_type, 
                    max_game_id, 
                    endpoint,
                    empty_list = [],
                    zfill = 4, 
                    multiple_games = True):

    # If multiple_games param is true, the get data for n number of games (uses for loop)
    if multiple_games == True:    
        for i in range(1, max_game_id, 1):
            game_id = str(year) + str(season_type) + str(i).zfill(zfill)
            game_id_endpoint = str(url) + game_id + str(endpoint)
            r = requests.get(game_id_endpoint)
            data = r.json()
            data['game_id'] = game_id
            empty_list.append(data)

    # If multiple_games param is not true, then get data for the 1 game (no for loop)
    else:
        game_id = str(year) + str(season_type) + str(max_game_id).zfill(zfill)
        game_id_endpoint = str(url) +  game_id + str(endpoint)
        r = requests.get(game_id_endpoint)
        data = r.json()
        data['game_id'] = game_id
        empty_list.append(data)

    # Rename to game_data_json
    game_data_list = empty_list

    return game_data_list

# Function that takes in a list, converts to json, then saves a copy of that json file in the local folder
def save_as_json(list, 
                 file_name = 'data'):

    # Save as json file in same folder the notebook is run (relative)
    with open(f'{file_name}.json', 'w') as f:
        json.dump(list, f)

    # Convert list to json
    j_data = json.dumps(list)
 
    
    return j_data

In [11]:
# For a single game, save json files for both the boxscore and livefeed api
single_game_id = 5

# Boxscore
boxscore_list = request_get_data(game_url, 
                                year, 
                                season_type, 
                                single_game_id, 
                                boxscore_url,
                                multiple_games = True)
boxscore_json = save_as_json(boxscore_list, 'test_data/boxscore')

# Livefeed
livefeed_list = request_get_data(game_url, 
                                year, 
                                season_type, 
                                single_game_id, 
                                livefeed_url,
                                multiple_games = True)
livefeed_json = save_as_json(livefeed_list, 'test_data/livefeed')

In [12]:
# Uncomment the below code to print the game ids used to get each game
'''
max_game_id = 1290

#z_id = str(game_url) + str(year) + str(season_type) + str(max_game_id).zfill(4) + str(boxscore_url)
#for i in range(0, max_game_id, 1):
#    z_id = str(game_url) + str(year) + str(season_type) + str(i).zfill(4) + str(boxscore_url)
#    print(f'game: {i}, game_id: {z_id}')
'''

"\nmax_game_id = 1290\n\n#z_id = str(game_url) + str(year) + str(season_type) + str(max_game_id).zfill(4) + str(boxscore_url)\n#for i in range(0, max_game_id, 1):\n#    z_id = str(game_url) + str(year) + str(season_type) + str(i).zfill(4) + str(boxscore_url)\n#    print(f'game: {i}, game_id: {z_id}')\n"

In [13]:
# Setup json object
json_object = json.loads(boxscore_json)

# Set up iterable variables
games = len(json_object)
#player_id = list(json_object[0]['teams']['home']['players'].keys())
raw = []

# Loop over every game
for game in range(0, (games), 1):
    game_id = json_object[game]["game_id"]

    # Loop over each home player in each team
    for player in list(json_object[game]['teams']['home']['players'].keys()):
        player_data = json_object[game]['teams']['home']['players'][player]['person']
        player_data ["home_team"] = 1
        player_data["pid"] = player
        player_data["game_id"] = game_id
        raw.append(player_data)

    # Loop over each away player in each team
    for player in list(json_object[game]['teams']['away']['players'].keys()):
        player_data = json_object[game]['teams']['away']['players'][player]['person']
        player_data ["home_team"] = 0
        player_data["pid"] = player
        player_data["game_id"] = game_id
        raw.append(player_data)

# Convert to pandas dataframe
df1 = pd.DataFrame.from_dict(raw)
df1 = df1.rename(columns={"id": "pid_num"})


# Flatten currentTeam
df2 = pd.concat([df1.drop(['currentTeam'], axis=1), df1['currentTeam'].apply(pd.Series)], axis=1)
df2 = df2.rename(columns={"id": "team_id", "name": "team_name", "link": "team_link"})

# Flatten primaryPosition
df2 = pd.concat([df2.drop(['primaryPosition'], axis=1), df2['primaryPosition'].apply(pd.Series)], axis=1)
df2 = df2.rename(columns={"code": "pos_code", "name": "pos_name", "type": "pos_type", "abbreviation": "pos_abbr"})

# Copy to player_stats
player_data = df2

# See the data
player_data.head(3)

Unnamed: 0,pid_num,fullName,team_link,firstName,lastName,primaryNumber,birthDate,currentAge,birthCity,birthCountry,...,pid,game_id,birthStateProvince,team_id,team_name,team_link.1,pos_code,pos_name,pos_type,pos_abbr
0,8480172,Jan Rutta,/api/v1/people/8480172,Jan,Rutta,44,1990-07-29,31,Pisek,CZE,...,ID8480172,2021020001,,14,Tampa Bay Lightning,/api/v1/teams/14,D,Defenseman,Defenseman,D
1,8478519,Anthony Cirelli,/api/v1/people/8478519,Anthony,Cirelli,71,1997-07-15,24,Etobicoke,CAN,...,ID8478519,2021020001,ON,14,Tampa Bay Lightning,/api/v1/teams/14,C,Center,Forward,C
2,8478416,Erik Cernak,/api/v1/people/8478416,Erik,Cernak,81,1997-05-28,24,Kosice,SVK,...,ID8478416,2021020001,,14,Tampa Bay Lightning,/api/v1/teams/14,D,Defenseman,Defenseman,D


In [14]:
#df2.groupby('team_name').count()

In [15]:
# Setup json object
json_object = json.loads(boxscore_json)

# Set up iterable variables
games = len(json_object)
#player_id = list(json_object[0]['teams']['home']['players'].keys())
raw = []

# Loop over every game
for game in range(0, (games), 1):
    game_id = json_object[game]["game_id"]

    # Loop over each home player in each team
    for player in list(json_object[game]['teams']['home']['players'].keys()):
        player_stats = json_object[game]['teams']['home']['players'][player]['stats']
        player_stats ["home_team"] = 1
        player_stats["pid"] = player
        player_stats["game_id"] = game_id
        raw.append(player_stats)

    # Loop over each away player in each team
    for player in list(json_object[game]['teams']['away']['players'].keys()):
        player_stats = json_object[game]['teams']['away']['players'][player]['stats']
        player_stats ["home_team"] = 0
        player_stats["pid"] = player
        player_stats["game_id"] = game_id
        raw.append(player_stats)

# Convert to pandas dataframe
df1 = pd.DataFrame.from_dict(raw)
df1.head(3)

# Flatten the pandas dataframe
df1 = pd.json_normalize(raw, sep='_')

# Copy to player_stats
player_stats = df1

# Show first few rows
player_stats.head(3)

Unnamed: 0,home_team,pid,game_id,skaterStats_timeOnIce,skaterStats_assists,skaterStats_goals,skaterStats_shots,skaterStats_hits,skaterStats_powerPlayGoals,skaterStats_powerPlayAssists,...,goalieStats_shortHandedSaves,goalieStats_evenSaves,goalieStats_shortHandedShotsAgainst,goalieStats_evenShotsAgainst,goalieStats_powerPlayShotsAgainst,goalieStats_decision,goalieStats_savePercentage,goalieStats_evenStrengthSavePercentage,goalieStats_powerPlaySavePercentage,goalieStats_shortHandedSavePercentage
0,1,ID8480172,2021020001,15:55,0.0,0.0,3.0,0.0,0.0,0.0,...,,,,,,,,,,
1,1,ID8478519,2021020001,17:38,1.0,1.0,2.0,3.0,0.0,0.0,...,,,,,,,,,,
2,1,ID8478416,2021020001,20:16,0.0,0.0,4.0,2.0,0.0,0.0,...,,,,,,,,,,


In [16]:
player_boxscore =  pd.merge(left = player_stats, 
                            right = player_data, 
                            how = 'left',
                            left_on = ['pid', 'game_id', 'home_team'],
                            right_on = ['pid', 'game_id', 'home_team'])

player_boxscore.head(3)

Unnamed: 0,home_team,pid,game_id,skaterStats_timeOnIce,skaterStats_assists,skaterStats_goals,skaterStats_shots,skaterStats_hits,skaterStats_powerPlayGoals,skaterStats_powerPlayAssists,...,shootsCatches,rosterStatus,birthStateProvince,team_id,team_name,team_link,pos_code,pos_name,pos_type,pos_abbr
0,1,ID8480172,2021020001,15:55,0.0,0.0,3.0,0.0,0.0,0.0,...,R,Y,,14,Tampa Bay Lightning,/api/v1/teams/14,D,Defenseman,Defenseman,D
1,1,ID8478519,2021020001,17:38,1.0,1.0,2.0,3.0,0.0,0.0,...,L,Y,ON,14,Tampa Bay Lightning,/api/v1/teams/14,C,Center,Forward,C
2,1,ID8478416,2021020001,20:16,0.0,0.0,4.0,2.0,0.0,0.0,...,R,Y,,14,Tampa Bay Lightning,/api/v1/teams/14,D,Defenseman,Defenseman,D


In [17]:
player_boxscore.columns

Index(['home_team', 'pid', 'game_id', 'skaterStats_timeOnIce',
       'skaterStats_assists', 'skaterStats_goals', 'skaterStats_shots',
       'skaterStats_hits', 'skaterStats_powerPlayGoals',
       'skaterStats_powerPlayAssists', 'skaterStats_penaltyMinutes',
       'skaterStats_faceOffWins', 'skaterStats_faceoffTaken',
       'skaterStats_takeaways', 'skaterStats_giveaways',
       'skaterStats_shortHandedGoals', 'skaterStats_shortHandedAssists',
       'skaterStats_blocked', 'skaterStats_plusMinus',
       'skaterStats_evenTimeOnIce', 'skaterStats_powerPlayTimeOnIce',
       'skaterStats_shortHandedTimeOnIce', 'skaterStats_faceOffPct',
       'goalieStats_timeOnIce', 'goalieStats_assists', 'goalieStats_goals',
       'goalieStats_pim', 'goalieStats_shots', 'goalieStats_saves',
       'goalieStats_powerPlaySaves', 'goalieStats_shortHandedSaves',
       'goalieStats_evenSaves', 'goalieStats_shortHandedShotsAgainst',
       'goalieStats_evenShotsAgainst', 'goalieStats_powerPlayShotsAga

In [18]:
mini_player_boxscore = player_boxscore[['pid', 'game_id','fullName', 'pos_type', 'pos_name', 'rookie', 'home_team', 'team_id',
       'team_name', 'skaterStats_timeOnIce',
       'skaterStats_assists', 'skaterStats_goals', 'skaterStats_shots',
       'skaterStats_hits', 'skaterStats_powerPlayGoals',
       'skaterStats_powerPlayAssists', 'skaterStats_penaltyMinutes',
       'skaterStats_faceOffWins', 'skaterStats_faceoffTaken',
       'skaterStats_takeaways', 'skaterStats_giveaways',
       'skaterStats_shortHandedGoals', 'skaterStats_shortHandedAssists',
       'skaterStats_blocked', 'skaterStats_plusMinus',
       'skaterStats_evenTimeOnIce', 'skaterStats_powerPlayTimeOnIce',
       'skaterStats_shortHandedTimeOnIce', 'skaterStats_faceOffPct']]

mini_player_boxscore.head(5)

Unnamed: 0,pid,game_id,fullName,pos_type,pos_name,rookie,home_team,team_id,team_name,skaterStats_timeOnIce,...,skaterStats_takeaways,skaterStats_giveaways,skaterStats_shortHandedGoals,skaterStats_shortHandedAssists,skaterStats_blocked,skaterStats_plusMinus,skaterStats_evenTimeOnIce,skaterStats_powerPlayTimeOnIce,skaterStats_shortHandedTimeOnIce,skaterStats_faceOffPct
0,ID8480172,2021020001,Jan Rutta,Defenseman,Defenseman,False,1,14,Tampa Bay Lightning,15:55,...,0.0,0.0,0.0,0.0,1.0,0.0,15:55,0:00,0:00,
1,ID8478519,2021020001,Anthony Cirelli,Forward,Center,False,1,14,Tampa Bay Lightning,17:38,...,1.0,0.0,0.0,0.0,0.0,0.0,16:06,0:27,1:05,66.67
2,ID8478416,2021020001,Erik Cernak,Defenseman,Defenseman,False,1,14,Tampa Bay Lightning,20:16,...,0.0,0.0,0.0,0.0,0.0,-1.0,19:37,0:00,0:39,
3,ID8474567,2021020001,Zach Bogosian,Defenseman,Defenseman,False,1,14,Tampa Bay Lightning,14:14,...,0.0,1.0,0.0,0.0,0.0,-2.0,14:14,0:00,0:00,
4,ID8470621,2021020001,Corey Perry,Forward,Right Wing,False,1,14,Tampa Bay Lightning,12:40,...,0.0,0.0,0.0,0.0,1.0,-2.0,12:13,0:27,0:00,
