In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress
from pprint import pprint
from datetime import datetime, timedelta, timezone
import pytz
import scipy.stats
from dateutil.relativedelta import relativedelta

import warnings

# Suppress the specific warnings
warnings.filterwarnings("ignore")

today = datetime.utcnow()

from api_keys import espn_link

last_scrape = '2023-12-04'

# Get current standings

In [2]:
teamURL = 'https://api-web.nhle.com/v1/standings/now'

teams = requests.get(teamURL).json()
teams = teams['standings']

team_names = pd.DataFrame()

for team in teams:
    abbreviation = team['teamAbbrev']['default']
    name = team['teamName']['default']
    logoURL = team['teamLogo']
    gamesPlayed = team['gamesPlayed']
    gamesRemaining = 82 - gamesPlayed
    goalsFor = team['goalFor']
    goalsAgainst = team['goalAgainst']
    conference = team['conferenceName']
    division = team['divisionName']

    temp = pd.DataFrame({'abbreviation': [abbreviation], 'name': [name], 'gamesPlayed': [gamesPlayed],
                         'gamesRemaining': [gamesRemaining], 'goalsFor': [goalsFor], 'goalsAgainst': [goalsAgainst], 'logo': [logoURL],
                        'conference': [conference], 'division': [division]})

    team_names = pd.concat([team_names, temp], ignore_index=True)

    
teamList = team_names['abbreviation'].tolist()
team_names

team_names.to_csv('data/team_names.csv', index=False)
team_names

Unnamed: 0,abbreviation,name,gamesPlayed,gamesRemaining,goalsFor,goalsAgainst,logo,conference,division
0,VGK,Vegas Golden Knights,27,55,89,64,https://assets.nhle.com/logos/nhl/svg/VGK_ligh...,Western,Pacific
1,NYR,New York Rangers,24,58,82,66,https://assets.nhle.com/logos/nhl/svg/NYR_ligh...,Eastern,Metropolitan
2,BOS,Boston Bruins,24,58,81,59,https://assets.nhle.com/logos/nhl/svg/BOS_ligh...,Eastern,Atlantic
3,COL,Colorado Avalanche,25,57,90,73,https://assets.nhle.com/logos/nhl/svg/COL_ligh...,Western,Central
4,LAK,Los Angeles Kings,22,60,85,53,https://assets.nhle.com/logos/nhl/svg/LAK_ligh...,Western,Pacific
5,VAN,Vancouver Canucks,26,56,101,70,https://assets.nhle.com/logos/nhl/svg/VAN_ligh...,Western,Pacific
6,FLA,Florida Panthers,25,57,77,66,https://assets.nhle.com/logos/nhl/svg/FLA_ligh...,Eastern,Atlantic
7,DET,Detroit Red Wings,24,58,91,71,https://assets.nhle.com/logos/nhl/svg/DET_ligh...,Eastern,Atlantic
8,DAL,Dallas Stars,24,58,83,71,https://assets.nhle.com/logos/nhl/svg/DAL_ligh...,Western,Central
9,WPG,Winnipeg Jets,24,58,77,66,https://assets.nhle.com/logos/nhl/svg/WPG_ligh...,Western,Central


# Get schedule

This includes the winning goalie

In [3]:
baseURL = 'https://api-web.nhle.com/v1/club-schedule-season/'
season = '/20232024'

completeSked = pd.DataFrame()
homeOnlySked = pd.DataFrame()

date_format = "%d-%m-%Y"
eastern_timezone = pytz.timezone('US/Eastern')
today = datetime.utcnow()

for team in teamList:
    
    skedURL = baseURL + team + season
    sked = requests.get(skedURL).json()
    sked = sked['games']

    sked = [entry for entry in sked if entry.get('gameType') == 2]
    
    for game in sked:
        gameID = game['id']
        gameDate = game['startTimeUTC']
        awayTeam = game['awayTeam']['abbrev']
        homeTeam = game['homeTeam']['abbrev']
        
        # GET WINNING GOALIE
        
        if game['gameState'] == 'OFF':
            winningGoalie = game['winningGoalie']['playerId']
        else:
            winningGoalie = 0
        
        datetime_obj = datetime.strptime(gameDate, '%Y-%m-%dT%H:%M:%SZ')
        utc_timezone = pytz.timezone('UTC')
        utc_datetime = utc_timezone.localize(datetime_obj)
        eastern_timezone = pytz.timezone('US/Eastern')
        eastern_datetime = utc_datetime.astimezone(eastern_timezone)
        formatted_date_string = eastern_datetime.strftime('%d-%m-%Y')
        game_date = datetime.strptime(formatted_date_string, date_format)
        game_time = eastern_datetime.strftime("%A %I:%M %p")
        

        gameTemp = pd.DataFrame({'gameID': [gameID], 'gameDate': [game_date], 'gameTime': [game_time],
                                 'awayTeam': [awayTeam], 'homeTeam': [homeTeam], 'gameDT': [datetime_obj],
                                'winningGoalie': [winningGoalie]})
        completeSked = pd.concat([completeSked, gameTemp], ignore_index=True)

        if homeTeam == team:
            homeOnlySked = pd.concat([homeOnlySked, gameTemp], ignore_index=True)
    
homeOnlySked.to_csv('data/sked.csv', index=False)

completeSked.to_csv('data/sked_full.csv', index=False)

## Show remaining sked. Adjust time if needed

In [4]:
# remainSked = homeOnlySked.loc[homeOnlySked['gameDT'] >= (datetime.utcnow() - timedelta(days=0.5))]

remainSked = homeOnlySked.loc[homeOnlySked['gameDT'] >= (datetime.utcnow())]
remainSked.sort_values('gameDate')

Unnamed: 0,gameID,gameDate,gameTime,awayTeam,homeTeam,gameDT,winningGoalie
709,2023020396,2023-12-07,Thursday 08:00 PM,DAL,WSH,2023-12-08 01:00:00,0
1163,2023020395,2023-12-07,Thursday 07:00 PM,TOR,OTT,2023-12-08 00:00:00,0
134,2023020402,2023-12-07,Thursday 09:00 PM,WPG,COL,2023-12-08 02:00:00,0
217,2023020404,2023-12-07,Thursday 10:00 PM,MIN,VAN,2023-12-08 03:00:00,0
1036,2023020403,2023-12-07,Thursday 10:30 PM,NJD,SEA,2023-12-08 03:30:00,0
...,...,...,...,...,...,...,...
409,2023021308,2024-04-18,Thursday 08:00 PM,VAN,WPG,2024-04-19 00:00:00,0
942,2023021309,2024-04-18,Thursday 09:00 PM,SJS,CGY,2024-04-19 01:00:00,0
40,2023021311,2024-04-18,Thursday 10:00 PM,ANA,VGK,2024-04-19 02:00:00,0
983,2023021307,2024-04-18,Thursday 07:00 PM,SEA,MIN,2024-04-18 23:00:00,0


## Show completed sked. Adjust time if needed

In [5]:
# completedSked = homeOnlySked.loc[homeOnlySked['gameDT'] <= (datetime.utcnow() - timedelta(days=0.5))]

completedSked = homeOnlySked.loc[homeOnlySked['gameDT'] <= (datetime.utcnow())]
completedSked.sort_values('gameDate')

Unnamed: 0,gameID,gameDate,gameTime,awayTeam,homeTeam,gameDT,winningGoalie
0,2023020003,2023-10-10,Tuesday 10:30 PM,SEA,VGK,2023-10-11 02:30:00,8478499
451,2023020001,2023-10-10,Tuesday 05:30 PM,NSH,TBL,2023-10-10 21:30:00,8477992
820,2023020002,2023-10-10,Tuesday 08:00 PM,CHI,PIT,2023-10-11 00:00:00,8475852
902,2023020007,2023-10-11,Wednesday 10:00 PM,WPG,CGY,2023-10-12 02:00:00,8474593
410,2023020004,2023-10-11,Wednesday 07:00 PM,OTT,CAR,2023-10-11 23:00:00,8475883
...,...,...,...,...,...,...,...
995,2023020380,2023-12-05,Tuesday 07:30 PM,DET,BUF,2023-12-06 00:30:00,8479312
257,2023020388,2023-12-06,Wednesday 07:00 PM,DAL,FLA,2023-12-07 00:00:00,8475683
666,2023020390,2023-12-06,Wednesday 09:00 PM,VGK,STL,2023-12-07 02:00:00,8480313
464,2023020389,2023-12-06,Wednesday 07:00 PM,PIT,TBL,2023-12-07 00:00:00,8476883


# Bring in the most recent game logs. Manually adjust date, likely

In [6]:
# file_name = f"data/allG_df_raw-{today.strftime('%Y-%m-%d')}.csv"
file_name = f"data/allG_df_raw-{last_scrape}.csv"
allG_df_raw = pd.read_csv(file_name, index_col=False)
allG_df_raw

# file_name = f"data/all_df_raw-{today.strftime('%Y-%m-%d')}.csv"
file_name = f"data/all_df_raw-{last_scrape}.csv"
all_df_raw = pd.read_csv(file_name, index_col=False)
all_df_raw

Unnamed: 0,playerId,sweaterNumber,name,position,goals,assists,points,plusMinus,pim,hits,...,toi,powerPlayToi,shorthandedToi,team,opponent,secondaryPosition,tertiaryPosition,gameDate,gameTime,gameId
0,8478178,43,D. Raddysh,D,0,0,0,1,0,1,...,1084,5,19,TBL,NSH,D,D,2023-10-10,Tuesday 05:30 PM,2023020001
1,8475177,44,C. de Haan,D,0,0,0,0,0,2,...,821,0,172,TBL,NSH,D,D,2023-10-10,Tuesday 05:30 PM,2023020001
2,8480246,48,N. Perbix,D,0,0,0,-1,2,0,...,702,0,0,TBL,NSH,D,D,2023-10-10,Tuesday 05:30 PM,2023020001
3,8475167,77,V. Hedman,D,0,1,1,-1,0,0,...,1615,386,248,TBL,NSH,D,D,2023-10-10,Tuesday 05:30 PM,2023020001
4,8478416,81,E. Cernak,D,0,0,0,0,0,6,...,1160,0,276,TBL,NSH,D,D,2023-10-10,Tuesday 05:30 PM,2023020001
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13417,8477425,28,M. Wood,L,0,0,0,-1,0,0,...,841,0,14,COL,LAK,W,F,2023-12-03 00:00:00,Sunday 08:00 PM,2023020373
13418,8477492,29,N. MacKinnon,C,0,1,1,-1,0,1,...,1269,183,0,COL,LAK,C,F,2023-12-03 00:00:00,Sunday 08:00 PM,2023020373
13419,8475193,90,T. Tatar,L,0,0,0,0,0,1,...,631,57,0,COL,LAK,W,F,2023-12-03 00:00:00,Sunday 08:00 PM,2023020373
13420,8481641,94,J. Kiviranta,L,0,0,0,-1,0,2,...,708,0,0,COL,LAK,W,F,2023-12-03 00:00:00,Sunday 08:00 PM,2023020373


# Show games missing from logs

In [7]:
all_games = homeOnlySked['gameID'].unique().tolist()

games_saved = all_df_raw['gameId'].unique().tolist()

games_done = completedSked['gameID'].unique().tolist()

missing_games = list(set(games_done) - set(games_saved))

missing_games

[2023020384,
 2023020385,
 2023020386,
 2023020387,
 2023020388,
 2023020389,
 2023020390,
 2023020391,
 2023020374,
 2023020375,
 2023020376,
 2023020377,
 2023020378,
 2023020379,
 2023020380,
 2023020381,
 2023020382,
 2023020383]

# Scrape for nex game logs only

In [8]:
baseURL = 'https://api-web.nhle.com/v1/gamecenter/'
appendix = '/boxscore'

playsBaseURL = 'https://api-web.nhle.com/v1/gamecenter/'
playsAppendix = '/play-by-play'

date_format = "%d-%m-%Y"
eastern_timezone = pytz.timezone('US/Eastern')
today = datetime.utcnow()

awayLogs = {}
awayLogsG = {}
homeLogs = {}
homeLogsG = {}

for Id in missing_games:
    
    gameURL = baseURL + str(Id) + appendix
    
    print(gameURL)
    
    game = requests.get(gameURL).json()

    # pprint(game)

    numPeriods = len(game['boxscore']['linescore']['byPeriod'])
    score = game['boxscore']['linescore']['totals']
    awayTeam = game['awayTeam']
    homeTeam = game['homeTeam']
    awayTeamName = game['awayTeam']['abbrev']
    homeTeamName = game['homeTeam']['abbrev']
    awayTeamId = game['awayTeam']['id']
    homeTeamId = game['homeTeam']['id']
    gameDate = game['startTimeUTC']
    gameOutcome = game['gameOutcome']
    gameID = game['id']

    playerStatsAway = game['boxscore']['playerByGameStats']['awayTeam']
    playerStatsHome = game['boxscore']['playerByGameStats']['homeTeam']

    # pprint(playerStatsHome)

    # GET THE STARTING GOALTENDERS

    
    playsURL = playsBaseURL + str(Id) + playsAppendix
    plays = requests.get(playsURL).json()
    plays = plays['plays']

    homeStartingG = next((play['details']['goalieInNetId'] for play in plays if 
                          ((play['typeDescKey'] == 'shot-on-goal') and (play['details']['awaySOG'] == 1))), None)
    awayStartingG = next((play['details']['goalieInNetId'] for play in plays if 
                          ((play['typeDescKey'] == 'shot-on-goal') and (play['details']['homeSOG'] == 1))), None)

    homeEndingG = next((play['details']['goalieInNetId'] for play in reversed(plays) if 
                        ((play['typeDescKey'] == 'shot-on-goal') and (play['details']['eventOwnerTeamId'] == awayTeamId))), None)
    awayEndingG = next((play['details']['goalieInNetId'] for play in reversed(plays) if 
                        ((play['typeDescKey'] == 'shot-on-goal') and (play['details']['eventOwnerTeamId'] == homeTeamId))), None)

    # Get THE WINNING GOALTENDER


    winningGoalie = completedSked.loc[completedSked['gameID'] == Id]['winningGoalie'].iloc[0]


    # FIGURE OUT THE TYPE OF FINISH

    if numPeriods > 3:
        OT = 1
    else:
        OT = 0

    if numPeriods > 4:
        SO = 1
    else:
        SO = 0

    if score['home'] > score['away']:
        homeTeamWin = 1
        awayTeamWin = 0
    else:
        homeTeamWin = 0
        awayTeamWin = 1

    # FORMAT THE DATES AND TIMES

    datetime_obj = datetime.strptime(gameDate, '%Y-%m-%dT%H:%M:%SZ')
    utc_timezone = pytz.timezone('UTC')
    utc_datetime = utc_timezone.localize(datetime_obj)
    eastern_timezone = pytz.timezone('US/Eastern')
    eastern_datetime = utc_datetime.astimezone(eastern_timezone)
    formatted_date_string = eastern_datetime.strftime('%d-%m-%Y')
    game_date = datetime.strptime(formatted_date_string, date_format)
    game_time = eastern_datetime.strftime("%A %I:%M %p")

    # AWAY LOG ASSEMBLER
    
    awaySOGcheck = 0

    for defender in playerStatsAway['defense']:
        logName = str(gameID) + str(defender['playerId'])
        awayLogs[logName] = defender
        awayLogs[logName]['name'] = awayLogs[logName]['name']['default']
        awayLogs[logName]['team'] = awayTeamName
        awayLogs[logName]['opponent'] = homeTeamName
        awayLogs[logName]['secondaryPosition'] = 'D'
        awayLogs[logName]['tertiaryPosition'] = 'D'
        awayLogs[logName]['gameDate'] = game_date
        awayLogs[logName]['gameTime'] = game_time
        awayLogs[logName]['toi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(awayLogs[logName]['toi'].split(':'))))
        awayLogs[logName]['powerPlayToi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(awayLogs[logName]['powerPlayToi'].split(':'))))
        awayLogs[logName]['shorthandedToi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(awayLogs[logName]['shorthandedToi'].split(':'))))
        awayLogs[logName]['gameId'] = Id
        
        awaySOGcheck = awaySOGcheck + awayLogs[logName]['shots']

    for forward in playerStatsAway['forwards']:
        logName = str(gameID) + str(forward['playerId'])
        awayLogs[logName] = forward
        awayLogs[logName]['name'] = awayLogs[logName]['name']['default']
        awayLogs[logName]['team'] = awayTeamName
        awayLogs[logName]['opponent'] = homeTeamName

        if forward['position'] == 'C':
            awayLogs[logName]['secondaryPosition'] = 'C'
        else:
            awayLogs[logName]['secondaryPosition'] = 'W'
        awayLogs[logName]['tertiaryPosition'] = 'F'

        awayLogs[logName]['gameDate'] = game_date
        awayLogs[logName]['gameTime'] = game_time
        awayLogs[logName]['toi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(awayLogs[logName]['toi'].split(':'))))
        awayLogs[logName]['powerPlayToi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(awayLogs[logName]['powerPlayToi'].split(':'))))
        awayLogs[logName]['shorthandedToi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(awayLogs[logName]['shorthandedToi'].split(':'))))
        awayLogs[logName]['gameId'] = Id
        
        awaySOGcheck = awaySOGcheck + awayLogs[logName]['shots']
        
    awayGshots = 0
        
    for goalie in playerStatsAway['goalies']:
        if goalie['toi'] != '00:00':
            logName = str(gameID) + str(goalie['playerId'])
            awayLogsG[logName] = goalie
            awayLogsG[logName]['name'] = awayLogsG[logName]['name']['default']
            awayLogsG[logName]['team'] = awayTeamName
            awayLogsG[logName]['opponent'] = homeTeamName
            awayLogsG[logName]['gameDate'] = game_date
            awayLogsG[logName]['gameTime'] = game_time
            awayLogsG[logName]['toi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(awayLogsG[logName]['toi'].split(':'))))
            awayLogsG[logName]['gameId'] = Id
            
            # FIND THE STARTER

            if goalie['playerId'] == awayStartingG:
                awayLogsG[logName]['start'] = 1
            else:
                awayLogsG[logName]['start'] = 0

            # DETERMINE SHUTOUT

            shots = awayLogsG[logName]['saveShotsAgainst'].split('/')[1]
            saves = awayLogsG[logName]['saveShotsAgainst'].split('/')[0]
            if (shots == saves) & (awayLogsG[logName]['toi'] > 3446):
                awayLogsG[logName]['shutout'] = 1
            else:
                awayLogsG[logName]['shutout'] = 0
                
            awayGshots = awayGshots + int(shots)

            # DETERMINE WIN

            if goalie['playerId'] == winningGoalie:
                awayLogsG[logName]['win'] = 4
            elif (awayTeamWin == 0) & (OT == 1) & (awayEndingG == goalie['playerId']):
                awayLogsG[logName]['win'] = 1
            else:
                awayLogsG[logName]['win'] = 0
                
            # CHECK FOR GOALIE ABBERATION
            
            if int(shots) - int(saves) != awayLogsG[logName]['goalsAgainst']:
                print(f"Error for goalies: {awayLogsG[logName]['name']}")



    # HOME LOG ASSEMBLER
                      
    homeSOGcheck = 0

    for defender in playerStatsHome['defense']:
        logName = str(gameID) + str(defender['playerId'])
        homeLogs[logName] = defender
        homeLogs[logName]['name'] = homeLogs[logName]['name']['default']
        homeLogs[logName]['team'] = homeTeamName
        homeLogs[logName]['opponent'] = awayTeamName
        homeLogs[logName]['secondaryPosition'] = 'D'
        homeLogs[logName]['tertiaryPosition'] = 'D'
        homeLogs[logName]['gameDate'] = game_date
        homeLogs[logName]['gameTime'] = game_time
        homeLogs[logName]['toi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(homeLogs[logName]['toi'].split(':'))))
        homeLogs[logName]['powerPlayToi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(homeLogs[logName]['powerPlayToi'].split(':'))))
        homeLogs[logName]['shorthandedToi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(homeLogs[logName]['shorthandedToi'].split(':'))))
        homeLogs[logName]['gameId'] = Id
        
        homeSOGcheck = homeSOGcheck + homeLogs[logName]['shots']
                      
    for forward in playerStatsHome['forwards']:
        logName = str(gameID) + str(forward['playerId'])
        homeLogs[logName] = forward
        homeLogs[logName]['name'] = homeLogs[logName]['name']['default']
        homeLogs[logName]['team'] = homeTeamName
        homeLogs[logName]['opponent'] = awayTeamName

        if forward['position'] == 'C':
            homeLogs[logName]['secondaryPosition'] = 'C'
        else:
            homeLogs[logName]['secondaryPosition'] = 'W'
        homeLogs[logName]['tertiaryPosition'] = 'F'

        homeLogs[logName]['gameDate'] = game_date
        homeLogs[logName]['gameTime'] = game_time
        homeLogs[logName]['toi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(homeLogs[logName]['toi'].split(':'))))
        homeLogs[logName]['powerPlayToi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(homeLogs[logName]['powerPlayToi'].split(':'))))
        homeLogs[logName]['shorthandedToi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(homeLogs[logName]['shorthandedToi'].split(':'))))
        homeLogs[logName]['gameId'] = Id
        
        homeSOGcheck = homeSOGcheck + homeLogs[logName]['shots']
        
    homeGshots = 0
    
    for goalie in playerStatsHome['goalies']:
        if goalie['toi'] != '00:00':
            logName = str(gameID) + str(goalie['playerId'])
            homeLogsG[logName] = goalie
            homeLogsG[logName]['name'] = homeLogsG[logName]['name']['default']
            homeLogsG[logName]['team'] = homeTeamName
            homeLogsG[logName]['opponent'] = awayTeamName
            homeLogsG[logName]['gameDate'] = game_date
            homeLogsG[logName]['gameTime'] = game_time
            homeLogsG[logName]['toi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(homeLogsG[logName]['toi'].split(':'))))
            homeLogsG[logName]['gameId'] = Id
            
            # FIND THE STARTER

            if goalie['playerId'] == homeStartingG:
                homeLogsG[logName]['start'] = 1
            else:
                homeLogsG[logName]['start'] = 0

            # DETERMINE SHUTOUT

            shots = homeLogsG[logName]['saveShotsAgainst'].split('/')[1]
            saves = homeLogsG[logName]['saveShotsAgainst'].split('/')[0]
            if (shots == saves) & (homeLogsG[logName]['toi'] > 3446):
                homeLogsG[logName]['shutout'] = 1
            else:
                homeLogsG[logName]['shutout'] = 0
                
            homeGshots = homeGshots + int(shots)

            # DETERMINE WIN

            if goalie['playerId'] == winningGoalie:
                homeLogsG[logName]['win'] = 4
            elif (homeTeamWin == 0) & (OT == 1) & (homeEndingG == goalie['playerId']):
                homeLogsG[logName]['win'] = 1
            else:
                homeLogsG[logName]['win'] = 0
                
            # CHECK FOR GOALIE ABBERATION
            
            if int(shots) - int(saves) != homeLogsG[logName]['goalsAgainst']:
                print(f"Error for goalies: {homeLogsG[logName]['name']} vs {homeGshots}")
        

https://api-web.nhle.com/v1/gamecenter/2023020384/boxscore
https://api-web.nhle.com/v1/gamecenter/2023020385/boxscore
https://api-web.nhle.com/v1/gamecenter/2023020386/boxscore
https://api-web.nhle.com/v1/gamecenter/2023020387/boxscore
https://api-web.nhle.com/v1/gamecenter/2023020388/boxscore
https://api-web.nhle.com/v1/gamecenter/2023020389/boxscore
https://api-web.nhle.com/v1/gamecenter/2023020390/boxscore
https://api-web.nhle.com/v1/gamecenter/2023020391/boxscore
https://api-web.nhle.com/v1/gamecenter/2023020374/boxscore
https://api-web.nhle.com/v1/gamecenter/2023020375/boxscore
https://api-web.nhle.com/v1/gamecenter/2023020376/boxscore
https://api-web.nhle.com/v1/gamecenter/2023020377/boxscore
https://api-web.nhle.com/v1/gamecenter/2023020378/boxscore
https://api-web.nhle.com/v1/gamecenter/2023020379/boxscore
https://api-web.nhle.com/v1/gamecenter/2023020380/boxscore
https://api-web.nhle.com/v1/gamecenter/2023020381/boxscore
https://api-web.nhle.com/v1/gamecenter/2023020382/boxsco

# Assemble and append new game logs

In [9]:
homeG_df = pd.DataFrame(homeLogsG)
homeG_df = homeG_df.transpose()

awayG_df = pd.DataFrame(awayLogsG)
awayG_df = awayG_df.transpose()

allG_df_raw = pd.concat([allG_df_raw, homeG_df, awayG_df], axis=0)

file_name = f"data/allG_df_raw-{today.strftime('%Y-%m-%d')}.csv"
allG_df_raw.to_csv(file_name, index=False)

home_df = pd.DataFrame(homeLogs)
home_df = home_df.transpose()

away_df = pd.DataFrame(awayLogs)
away_df = away_df.transpose()

all_df_raw = pd.concat([all_df_raw, home_df, away_df], axis=0)

file_name = f"data/all_df_raw-{today.strftime('%Y-%m-%d')}.csv"
all_df_raw.to_csv(file_name, index=False)

# Clean and add FP to goalies; show summary stats

In [10]:
allG_df = allG_df_raw.copy()

allG_df[['saves', 'shots']] = allG_df['saveShotsAgainst'].str.split('/', expand=True).apply(pd.to_numeric)
allG_df[['evSaves', 'evShots']] = allG_df['evenStrengthShotsAgainst'].str.split('/', expand=True).apply(pd.to_numeric)
allG_df[['ppSaves', 'ppShots']] = allG_df['powerPlayShotsAgainst'].str.split('/', expand=True).apply(pd.to_numeric)

allG_df = allG_df.drop(columns=(['saveShotsAgainst', 'savePctg', 'evenStrengthShotsAgainst',
                                   'powerPlayShotsAgainst', 'shorthandedShotsAgainst', 'evenStrengthGoalsAgainst',
                                  'powerPlayGoalsAgainst', 'shorthandedGoalsAgainst']))

columns_to_convert1 = ['toi', 'start', 'shutout', 'win', 'pim']
allG_df[columns_to_convert1] = allG_df[columns_to_convert1].apply(pd.to_numeric)

for index, row in allG_df.iterrows():
    decisionPoints = 0
    shutoutPoints = 0
    decisionType = 0

    # ADD A GAMES PLAYED TALLY
    
    if row['toi'] > 0:
        allG_df.at[index, 'gamesPlayed'] = 1

    # CALCULATE SHUTOUT POINTS

    if (row['shutout'] == 1):
        shutoutPoints = 3
    else:
        shutoutPoints = 0
        
    # CALCULATE DECISION POINTS
    
    if row['win'] == 4:
        decisionType = 'W'
    elif row['win'] == 1:
        decisionType = 'OTL'
    else:
        decisionType = 'L'
    
    decisionPoints = row['win']

    goalsAgainstPoints = (row['goalsAgainst']) * -2
    
    savesPoints = (row['shots'] - row['goalsAgainst']) * 0.2
    
    # TOTAL FANTASY POINTS
    
    fantasyPoints = decisionPoints + shutoutPoints + goalsAgainstPoints + savesPoints
    
    # ADD TO DATAFRAME
    
    allG_df.at[index, 'fantasyPoints'] = fantasyPoints
    allG_df.at[index, 'decisionType'] = decisionType
    
columns_to_convert3 = ['gamesPlayed']
allG_df[columns_to_convert3] = allG_df[columns_to_convert3].astype(int)

summary_statsG = allG_df.groupby('playerId').agg({
    'name': 'first',
    'team': 'last',
    'position': 'first',
    'toi': 'sum',
    'gamesPlayed': 'sum',
    'saves': 'sum',
    'shots': 'sum',
#     'evenSaves': 'sum',
    'shutout': 'sum',
    'fantasyPoints': 'sum'
}).reset_index()

summary_statsG.sort_values('fantasyPoints', ascending=False).head(25)

Unnamed: 0,playerId,name,team,position,toi,gamesPlayed,saves,shots,shutout,fantasyPoints
47,8478499,A. Hill,VGK,G,50083,14,378,404,2,71.6
50,8478971,C. Ingram,ARI,G,50020,15,412,443,2,70.4
7,8475660,C. Talbot,LAK,G,56970,16,409,440,1,67.8
34,8477967,T. Demko,VAN,G,67310,19,511,557,2,64.2
61,8480280,J. Swayman,BOS,G,44988,13,357,383,2,63.4
8,8475683,S. Bobrovsky,FLA,G,67861,19,476,523,2,56.2
30,8477465,T. Jarry,PIT,G,64690,19,495,540,3,52.0
24,8476945,C. Hellebuyck,WPG,G,64950,18,469,514,1,51.8
63,8480382,A. Georgiev,COL,G,74374,21,535,593,1,48.8
58,8479979,J. Oettinger,DAL,G,64749,18,491,539,1,47.2


# Function to caluclate missed games from injury based on sked

In [11]:
def missed_games (team, returnDate):
    tempDate = datetime.strptime(returnDate, '%Y-%m-%d')
#     utc_datetime = datetime.strptime(returnDate, '%Y-%m-%d').replace(tzinfo=timezone.utc)
    temp = homeOnlySked.loc[(homeOnlySked['awayTeam'] == team) | (homeOnlySked['homeTeam'] == team)]
    temp = temp.loc[(temp['gameDT'] >= datetime.utcnow()) & (temp['gameDate'] < tempDate)]
                     
    return len(temp)

# Set all players to 0 missed games; then manually adjust dictionary

In [12]:
summary_statsG['missedGames'] = 0

In [13]:
injuries_listG = {
    'F. Andersen': '2023-12-30',
#     'V. Husso': '2023-11-22',
#     'J. Korpisalo': '2023-11-27',
    'A. Hill': '2023-12-10',
    'I. Samsonov': '2023-12-07'

}

for player, date in injuries_listG.items():
    team = summary_statsG.loc[summary_statsG['name'] == player]['team'].iloc[0]
    missedGames = missed_games(team, date)
    summary_statsG.loc[summary_statsG['name'] == player, 'missedGames'] = missedGames

# Add crease minutes to team_names to calculate share %

In [14]:
for index, row in team_names.iterrows():
    team_names.at[index, 'creaseMins'] = allG_df.loc[allG_df['team'] == row['abbreviation']]['toi'].sum()

team_names['creaseMins'] = team_names['creaseMins'].astype(int)

for index, row in summary_statsG.iterrows():
    
    creaseShare = row['toi'] / team_names.loc[team_names['abbreviation'] == row['team']]['creaseMins'].sum()
    summary_statsG.at[index, 'creaseShare'] = round(creaseShare * 100, 2)
    
    gamesRemaining = team_names.loc[team_names['abbreviation'] == row['team']]['gamesRemaining'].iloc[0] - row['missedGames']
    summary_statsG.at[index, 'gamesRemaining'] = round(gamesRemaining * creaseShare, 2)
    
    FPP60 = row['fantasyPoints'] / row['toi'] * 3600
    summary_statsG.at[index, 'FPP60'] = round(FPP60, 2)
    
    FPPG = row['fantasyPoints'] / row['gamesPlayed']
    summary_statsG.at[index, 'FPPG'] = round(FPPG, 2)
    
    FPremain = FPP60 * gamesRemaining * creaseShare
    summary_statsG.at[index, 'fantasyPointsRemain'] = round(FPremain, 2)

summary_statsG['gamesRemaining'] = summary_statsG['gamesRemaining'].astype(int)

summary_statsG.loc[summary_statsG['team'] == 'VGK']

Unnamed: 0,playerId,name,team,position,toi,gamesPlayed,saves,shots,shutout,fantasyPoints,missedGames,creaseShare,gamesRemaining,FPP60,FPPG,fantasyPointsRemain
47,8478499,A. Hill,VGK,G,50083,14,378,404,2,71.6,1,50.85,27,5.15,5.11,141.31
62,8480313,L. Thompson,VGK,G,48414,14,354,386,0,37.8,0,49.15,27,2.81,2.7,75.99


In [15]:
# Fix errors

# Georgiev versus Coyotes
# Hill versus Pittsburgh
# deSmith versus ... someone Flordia?

summary_statsG.loc[summary_statsG['team'] == 'COL']

Unnamed: 0,playerId,name,team,position,toi,gamesPlayed,saves,shots,shutout,fantasyPoints,missedGames,creaseShare,gamesRemaining,FPP60,FPPG,fantasyPointsRemain
63,8480382,A. Georgiev,COL,G,74374,21,535,593,1,48.8,0,82.16,46,2.36,2.32,110.63
68,8481031,I. Prosvetov,COL,G,16145,6,125,136,0,12.0,0,17.84,10,2.68,2.0,27.2


# Function for summary statistics

In [16]:
def summary_statistics(df):
    temp_build = df.groupby('playerId').agg({
        'name': 'first',
        'team': 'last',
        'position': 'first',
        'toi': 'sum',
        'gamesPlayed': 'sum',
        'goals': 'sum',
        'assists': 'sum',
        'specialTeams': 'sum',
        'shots': 'sum',
        'hits': 'sum',
        'blockedShots': 'sum',
        'powerPlayToi': 'sum',
        'shorthandedToi': 'sum',
        'plusMinus': 'sum',
        'fantasyPoints': 'sum',
        'secondaryPosition': 'first',
        'tertiaryPosition': 'first'
    }).reset_index()


    temp_build['FPP60'] = (temp_build['fantasyPoints'] / temp_build['toi'] * 3600).round(2)
    temp_build['FPPG'] = (temp_build['fantasyPoints'] / temp_build['gamesPlayed']).round(2)

    temp_build = temp_build.sort_values(by='FPPG', ascending=False)
    
    return temp_build

# Clean the skaters; add FP and show summary stats

In [17]:
all_df = all_df_raw.copy()

all_df = all_df.drop(columns=(['faceoffWinningPctg']))

columns_to_convert1 = ['goals', 'assists', 'points', 'plusMinus', 'pim', 'hits', 'blockedShots',
                      'powerPlayGoals', 'powerPlayPoints', 'shorthandedGoals', 'shPoints', 'shots',
                       'toi', 'powerPlayToi', 'shorthandedToi']
all_df[columns_to_convert1] = all_df[columns_to_convert1].apply(pd.to_numeric)


for index, row in all_df.iterrows():
    
    if row['toi'] > 0:
        all_df.at[index, 'gamesPlayed'] = 1
    
    specialTeams = row['powerPlayPoints'] + row['shPoints']
    fantasyPoints = (row['goals']*2) + row['assists'] + (specialTeams * .5) + (row['blockedShots'] * .5) + ((row['hits'] + row['shots']) * .1)
    all_df.at[index, 'fantasyPoints'] = fantasyPoints
    all_df.at[index, 'specialTeams'] = specialTeams
    
columns_to_convert4 = ['specialTeams', 'gamesPlayed']
all_df[columns_to_convert4] = all_df[columns_to_convert4].astype(int)

summary_stats = summary_statistics(all_df).sort_values('fantasyPoints', ascending=False)
all_df

Unnamed: 0,playerId,sweaterNumber,name,position,goals,assists,points,plusMinus,pim,hits,...,team,opponent,secondaryPosition,tertiaryPosition,gameDate,gameTime,gameId,gamesPlayed,fantasyPoints,specialTeams
0,8478178,43,D. Raddysh,D,0,0,0,1,0,1,...,TBL,NSH,D,D,2023-10-10,Tuesday 05:30 PM,2023020001,1,0.9,0
1,8475177,44,C. de Haan,D,0,0,0,0,0,2,...,TBL,NSH,D,D,2023-10-10,Tuesday 05:30 PM,2023020001,1,0.7,0
2,8480246,48,N. Perbix,D,0,0,0,-1,2,0,...,TBL,NSH,D,D,2023-10-10,Tuesday 05:30 PM,2023020001,1,0.6,0
3,8475167,77,V. Hedman,D,0,1,1,-1,0,0,...,TBL,NSH,D,D,2023-10-10,Tuesday 05:30 PM,2023020001,1,1.9,0
4,8478416,81,E. Cernak,D,0,0,0,0,0,6,...,TBL,NSH,D,D,2023-10-10,Tuesday 05:30 PM,2023020001,1,1.3,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20230203838475798,8475798,64,M. Granlund,C,0,2,2,2,0,0,...,SJS,NYI,C,F,2023-12-05 00:00:00,Tuesday 07:30 PM,2023020383,1,3.4,0
20230203838474884,8474884,68,M. Hoffman,C,0,0,0,0,0,0,...,SJS,NYI,C,F,2023-12-05 00:00:00,Tuesday 07:30 PM,2023020383,1,0.1,0
20230203838482667,8482667,72,W. Eklund,L,1,2,3,1,0,0,...,SJS,NYI,W,F,2023-12-05 00:00:00,Tuesday 07:30 PM,2023020383,1,4.8,0
20230203838477473,8477473,90,J. Bailey,R,0,0,0,1,0,1,...,SJS,NYI,W,F,2023-12-05 00:00:00,Tuesday 07:30 PM,2023020383,1,0.7,0


# Set missed games as 0 and manually adjust injury dictionary

In [18]:
summary_stats['missedGames'] = 0

In [19]:
injuries_list = {
    'T. Zegras': '2023-12-05',
    'J. Drysdale': '2023-12-05',
    'T. Boyd': '2023-12-11',
    'J. McBain': '2023-12-11',
    'B. Hayton': '2023-12-22',
#     'J. Valimaki': '2023-11-28',
    'T. Dermott': '2023-12-04',
#     'M. Dumba': '2023-11-28',
    'M. Lucic': '2023-12-17',
#     'M. Grzelcyk': '2023-11-25',
    'T. Thompson': '2023-12-05',
    'Z. Girgensons': '2023-12-09',
    'J. Greenway': '2023-12-05',
#     'Z. Benson': '2023-11-17',
    'T. Hall': '2024-04-20',
    'A. Athanasiou': '2023-12-07',
    'C. Perry': '2024-04-20',
    'A. Lehkonen': '2023-12-17',
    'S. Girard': '2024-01-16',
    'D. Severson': '2024-01-02',
    'J. Roslovic': '2023-12-14',
#     'M. Janmark': '2023-11-22',
    'D. Holloway': '2023-12-10',
#     'A. Barkov': '2023-11-22',
#     'J. Mahura': '2023-11-27',
    'R. Harvey-Pinard': '2024-01-17',
    'A. Newhook': '2024-02-17',
    'D. Savard': '2023-12-07',
    'J. Harris': '2023-12-07',
    'A. Xhekaj': '2023-12-07',
    'K. Dach': '2024-04-20',
#     'L. Schenn': '2023-11-26',
    'T. Novak': '2023-12-12',
    'A. Carrier': '2023-12-16',
#     'N. Hischier': '2023-11-22',
#     'T. Meier': '2023-11-28',
    'D. Hamilton': '2024-01-11',
    'T. Nosek': '2023-12-16',
#     'J. Hughes': '2023-11-14',
    'A. Pelech': '2023-12-19',
    'F. Chytil': '2023-10-08',
#     'A. Fox': '2023-11-29',
    'K. Kakko': '2023-12-22',
    'R. Greig': '2023-12-05',
#     'T. Chabot': '2023-12-01',
    'N. Cates': '2024-01-20',
#     'B. Rust': '2023-11-28',
    'R. Rakell': '2023-12-16',
    'A. Barabanov': '2023-12-05',
    'L. Kunin': '2023-12-10',
    'J. Rutta': '2023-12-05',
    'A. Burakovsky': '2023-12-04',
    'J. Schwartz': '2024-01-11',
#     'L. Brown': '2023-12-06',
    'T. Liljegren': '2023-12-07',
    'M. Giordano': '2023-12-16',
    'J. Klingberg': '2023-12-12',
    'C. Soucy': '2023-12-28',
    'P. Suter': '2023-12-30',
    'S. Theodore': '2023-12-17',
    'A. Martinez': '2023-12-12',
#     'M. Fehervary': '2023-11-27',
    'T. Oshie': '2023-12-07',
    'N. Backstrom': '2024-04-20',
#     'A. Mantha': '2023-11-18',
#     'G. Vilardi': '2023-11-28',
    'R. Kupari': '2023-12-16'
    
}

for player, date in injuries_list.items():
#     print(player)
    team = summary_stats.loc[summary_stats['name'] == player]['team'].iloc[0]
    missedGames = missed_games(team, date)
    summary_stats.loc[summary_stats['name'] == player, 'missedGames'] = missedGames

# Add fantasy stats and missing games

In [20]:
for index, row in summary_stats.iterrows():
    
    gamesRemaining = team_names.loc[team_names['abbreviation'] == row['team']]['gamesRemaining']

    summary_stats.loc[index, 'gamesRemaining'] = int(gamesRemaining) - row['missedGames']

    summary_stats.loc[index, 'fantasyPointsRemain'] = row['FPPG'] * (int(gamesRemaining) - row['missedGames'])
    
summary_stats['gamesRemaining'] = summary_stats['gamesRemaining'].astype(int)


summary_stats.loc[summary_stats['team'] == 'NYR']

Unnamed: 0,playerId,name,team,position,toi,gamesPlayed,goals,assists,specialTeams,shots,...,shorthandedToi,plusMinus,fantasyPoints,secondaryPosition,tertiaryPosition,FPP60,FPPG,missedGames,gamesRemaining,fantasyPointsRemain
396,8478550,A. Panarin,NYR,L,28278,24,16,21,15,96,...,4,1,71.7,W,F,9.13,2.99,0,58,173.42
196,8476885,J. Trouba,NYR,D,32589,24,2,10,1,46,...,4984,10,65.1,D,D,7.19,2.71,0,58,157.18
81,8475184,C. Kreider,NYR,L,26954,24,14,8,11,59,...,2663,6,53.0,W,F,7.08,2.21,0,58,128.18
145,8476389,V. Trocheck,NYR,C,29183,24,6,16,7,56,...,1886,2,50.8,C,F,6.27,2.12,0,58,122.96
162,8476459,M. Zibanejad,NYR,C,28275,24,6,14,11,60,...,3271,6,50.0,C,F,6.37,2.08,0,58,120.64
567,8480817,K. Miller,NYR,D,32284,24,5,7,1,31,...,3161,7,39.3,D,D,4.38,1.64,0,58,95.12
219,8476979,E. Gustafsson,NYR,D,26538,24,3,12,6,36,...,505,3,38.7,D,D,5.25,1.61,0,58,93.38
424,8479323,A. Fox,NYR,D,17974,14,3,13,12,22,...,1339,-1,37.6,D,D,7.53,2.69,0,58,156.02
685,8482109,A. Lafrenière,NYR,L,24465,24,8,8,2,59,...,35,-2,34.8,W,F,5.12,1.45,0,58,84.1
671,8482073,B. Schneider,NYR,D,22614,24,1,5,0,31,...,1949,-2,32.5,D,D,5.17,1.35,0,58,78.3


# Single Frame to add bio details

In [21]:
trim_stats = summary_stats.drop(columns=['goals', 'assists', 'specialTeams', 'shots', 'hits', 'blockedShots', 'powerPlayToi',
                                        'shorthandedToi', 'plusMinus', 'secondaryPosition', 'tertiaryPosition'])

trim_statsG = summary_statsG.drop(columns=['saves', 'shots', 'shutout'])

column_order = ['playerId', 'name', 'team', 'position', 'toi', 'gamesPlayed', 'fantasyPoints', 'FPP60', 'FPPG', 'missedGames', 'gamesRemaining', 'fantasyPointsRemain']

trim_stats = trim_stats[column_order]
trim_statsG = trim_statsG[column_order]

print(trim_statsG.columns.to_list())
print(trim_stats.columns.to_list())

trimmed_stats = pd.concat([trim_stats, trim_statsG])
trimmed_stats = trimmed_stats.sort_values('fantasyPointsRemain', ascending = False)
trimmed_stats

['playerId', 'name', 'team', 'position', 'toi', 'gamesPlayed', 'fantasyPoints', 'FPP60', 'FPPG', 'missedGames', 'gamesRemaining', 'fantasyPointsRemain']
['playerId', 'name', 'team', 'position', 'toi', 'gamesPlayed', 'fantasyPoints', 'FPP60', 'FPPG', 'missedGames', 'gamesRemaining', 'fantasyPointsRemain']


Unnamed: 0,playerId,name,team,position,toi,gamesPlayed,fantasyPoints,FPP60,FPPG,missedGames,gamesRemaining,fantasyPointsRemain
638,8481559,J. Hughes,NJD,C,22761,18,62.7,9.92,3.48,0,59,205.32
421,8479318,A. Matthews,TOR,C,28457,22,71.4,9.03,3.25,0,60,195.00
157,8476453,N. Kucherov,TBL,R,33535,26,90.5,9.72,3.48,0,55,191.40
520,8480069,C. Makar,COL,D,33757,23,75.9,8.09,3.30,0,57,188.10
7,8475660,C. Talbot,LAK,G,56970,16,67.8,4.28,4.24,0,42,184.10
...,...,...,...,...,...,...,...,...,...,...,...,...
73,8482221,D. Levi,BUF,G,29916,9,-5.0,-0.60,-0.56,0,18,-10.86
67,8480992,M. Chrona,SJS,G,1830,1,-5.4,-10.62,-5.40,0,1,-11.64
10,8475789,J. Campbell,EDM,G,16007,5,-8.6,-1.93,-1.72,0,11,-22.10
75,8482821,A. Soderblom,CHI,G,39789,12,-9.6,-0.87,-0.80,0,26,-23.28


# Get ESPN ownership %

In [22]:
rosters = requests.get(espn_link).json()

# pprint(rosters[9])

rostership = pd.DataFrame()

for player in rosters:
    n = player['fullName']
    try:
        percent = player['ownership']['percentOwned']
    except:
        percent = 0
    pos = player['eligibleSlots']
    dpos = player['defaultPositionId']
    if (n == 'Sebastian Aho') & (dpos == 4):
        n = 'Sebastian Aho (D)'
#     print(n)
    temp = pd.DataFrame({'fullName': [n], 'rostered': [percent], 'pos': [dpos]})
    rostership = pd.concat([rostership, temp], ignore_index=True)
    
rostership.head()

fixes = {
    'Tim Stutzle': 'Tim Stützle',
    'Alex Barre-Boulet': 'Alex Barré-Boulet',
    'Jani Hakanpaa': 'Jani Hakanpää',
    'Jesse Ylonen': 'Jesse Ylönen',
    'Alexis Lafreniere': 'Alexis Lafrenière',
    'Gustav Lindstrom': 'Gustav Lindström',
    'Alexander Kerfoot': 'Alex Kerfoot',
    'Johnny Beecher': 'John Beecher',
    'Samuel Walker': 'Sammy Walker',
    'Maxime Lajoie': 'Max Lajoie'
}

rostership['fullName'].replace(fixes, inplace=True)

pos_fixes = {
    1: 'C',
    2: 'LW',
    3: 'RW',
    4: 'D',
    5: 'G'
}

rostership['pos'].replace(pos_fixes, inplace=True)

rostership.loc[rostership['fullName'] == 'Sebastian Aho (D)']
rostership.loc[rostership['fullName'] == 'Andrei Vasilevskiy']

Unnamed: 0,fullName,rostered,pos
1361,Andrei Vasilevskiy,94.032193,G


# Read in saved player bios

In [23]:
player_bios = pd.read_csv('data/playerbios.csv', encoding='utf-8')
player_bios = player_bios.rename(columns={'id': 'playerId', 'name': 'fullName'})
player_bios

Unnamed: 0,playerId,fullName,birthDate,shootsCatches,height,weight
0,8477967,Thatcher Demko,1995-12-08,L,76,192
1,8481559,Jack Hughes,2001-05-14,L,71,175
2,8479318,Auston Matthews,1997-09-17,L,75,215
3,8476453,Nikita Kucherov,1993-06-17,L,71,182
4,8480012,Elias Pettersson,1998-11-12,L,74,176
...,...,...,...,...,...,...
827,8479249,Andrew Poturalski,1994-01-14,R,70,187
828,8480990,Declan Chisholm,2000-01-12,L,73,190
829,8484287,Cole McWard,2001-06-09,R,73,192
830,8476873,Mark Jankowski,1994-09-13,L,76,212


# Function for getting bios of missing players

In [24]:
def add_player_bio (missing_id_list):
    
    new_player_bios = pd.DataFrame()
    
    for player in missing_id_list:
        URL = 'https://api-web.nhle.com/v1/player/' + str(player) + '/landing'
        data = requests.get(URL).json()

        first = data['firstName']['default']
        last = data['lastName']['default']
        birthDate = data['birthDate']
        hand = data['shootsCatches']
        height = data['heightInInches']
        weight = data['weightInPounds']

        temp_df = pd.DataFrame({'playerId': [player], 
                                'fullName': first + ' ' + last, 
                                'birthDate': [birthDate], 
                                'shootsCatches': [hand],
                                'height': [height],
                                'weight': [weight]
                               })

#         print(temp_df['name'])

        new_player_bios = pd.concat([new_player_bios, temp_df])
        
    return new_player_bios

# Get missing players bios

In [25]:
player_bios_list = player_bios['playerId'].to_list()
rankings_list = trimmed_stats['playerId'].to_list()

ids_not_in_bios = set(rankings_list) - set(player_bios_list)

list(ids_not_in_bios)

[8482712, 8476323]

In [26]:
missing_players = add_player_bio(list(ids_not_in_bios))
missing_players

Unnamed: 0,playerId,fullName,birthDate,shootsCatches,height,weight
0,8482712,Oskar Olausson,2002-11-10,L,73,180
0,8476323,Max McCormick,1992-05-01,L,71,188


# Add missing bios and re-save bios file

In [27]:
updated_player_bios = pd.concat([player_bios, missing_players], axis=0)
updated_player_bios['playerId'] = updated_player_bios['playerId'].astype(int)

updated_player_bios.tail(18)

Unnamed: 0,playerId,fullName,birthDate,shootsCatches,height,weight
816,8476883,Andrei Vasilevskiy,1994-07-25,L,76,220
817,8481593,Jayden Struble,2001-09-08,L,72,202
818,8480890,Jan Jenik,2000-09-15,L,73,185
819,8477499,Rasmus Ristolainen,1994-10-27,R,76,208
820,8479423,Alex Nylander,1998-03-02,R,73,192
821,8477473,Justin Bailey,1995-07-01,R,76,214
822,8480226,Marián Studenic,1998-10-28,L,73,190
823,8483495,Simon Nemec,2004-02-15,R,73,190
824,8478956,Samuel Laberge,1997-04-10,L,74,206
825,8479533,Joona Koppanen,1998-02-25,L,77,216


In [28]:
file_name = f"data/playerbios.csv"
updated_player_bios.to_csv(file_name, index=False)

# Get ESPN Ids

In [29]:
espn_ids = pd.read_csv('data/espn_ids.csv', encoding='utf-8')
# espn_ids = espn_ids.rename(columns={'id': 'espnId'})

for index, row in espn_ids.iterrows():
    playerId = row['id,fullName'].split(',')[0]
    fullName = row['id,fullName'].split(',')[1]
    
    espn_ids.at[index, 'espnId'] = playerId
    espn_ids.at[index, 'fullName'] = fullName

    
fixes = {
    'Tim Stutzle': 'Tim Stützle',
    'Jani Hakanpaa': 'Jani Hakanpää',
    'Benoit-Olivier Groulx': 'Bo Groulx',
    'Jesse Ylonen': 'Jesse Ylönen',
    'Alexis Lafreniere': 'Alexis Lafrenière',
    'Gustav Lindstrom': 'Gustav Lindström',
    'Alexander Kerfoot': 'Alex Kerfoot',
#     'Johnny Beecher': 'John Beecher',
    'Samuel Walker': 'Sammy Walker',
    'Alex Barre-Boulet': 'Alex Barré-Boulet'
}

inverse_fixes = {value: key for key, value in fixes.items()}

espn_ids['fullName'].replace(fixes, inplace=True)
    
espn_ids.loc[espn_ids['fullName'] == 'Sebastian Aho (D)']
espn_ids.loc[espn_ids['fullName'] == 'Andrei Vasilevskiy']

Unnamed: 0,"id,fullName",espnId,fullName
440,"2976847,Andrei Vasilevskiy",2976847,Andrei Vasilevskiy


# Add Roster Percent to bios

In [30]:
bios_on_date = updated_player_bios.copy()

for index, row in bios_on_date.iterrows():
    try:
        roster_percent = rostership.loc[rostership['fullName'] == row['fullName']]['rostered'].iloc[0]
        default_pos = rostership.loc[rostership['fullName'] == row['fullName']]['pos'].iloc[0]
    except:
        roster_percent = -2
        default_pos = 'S'
        
    bios_on_date.at[index, 'roster_percent'] = roster_percent
    bios_on_date.at[index, 'default_pos'] = default_pos
    
bios_on_date.sort_values('roster_percent')

bios_on_date.loc[bios_on_date['fullName'] == 'Sebastian Aho (D)']
bios_on_date.loc[bios_on_date['fullName'] == 'Andrei Vasilevskiy']

Unnamed: 0,playerId,fullName,birthDate,shootsCatches,height,weight,roster_percent,default_pos
816,8476883,Andrei Vasilevskiy,1994-07-25,L,76,220,94.032193,G


# Add ESPNID to Bios

In [31]:
for index, row in bios_on_date.iterrows():
    try:
        espnId = espn_ids.loc[espn_ids['fullName'] == row['fullName']]['espnId'].iloc[0]
    
        bios_on_date.at[index, 'espnId'] = espnId
    except:
        bios_on_date.at[index, 'espnId'] = 0
    
bios_on_date['espnId'] = bios_on_date['espnId'].astype(int)

bios_on_date.loc[bios_on_date['espnId'] == 0]

Unnamed: 0,playerId,fullName,birthDate,shootsCatches,height,weight,roster_percent,default_pos,espnId
633,8479320,Max Lajoie,1997-11-05,L,73,191,0.014378,D,0
641,8482411,Hunter Shepard,1995-11-07,L,72,215,0.071988,G,0
651,8483482,Tristan Luneau,2004-01-12,R,73,195,0.086274,D,0
654,8483489,Fraser Minten,2004-07-05,L,74,192,0.056071,C,0
656,8482470,Ilya Solovyov,2000-07-20,L,75,208,0.018693,D,0
663,8483512,Matt Savoie,2004-01-01,R,69,179,0.175401,C,0
672,8482511,Mason Lohrei,2001-01-17,L,77,211,0.255924,D,0
677,8480992,Magnus Chrona,2000-08-28,L,76,194,0.018719,G,0
693,8481534,Raphael Lavoie,2000-09-25,R,76,215,0.024443,C,0
697,8481028,Martin Pospisil,1999-11-19,L,74,173,0.867015,LW,0


# Create summary snapshot file and save

In [32]:
summary_stats_snapshot = pd.merge(trimmed_stats, bios_on_date, how='left', on='playerId')
summary_stats_snapshot

Unnamed: 0,playerId,name,team,position,toi,gamesPlayed,fantasyPoints,FPP60,FPPG,missedGames,gamesRemaining,fantasyPointsRemain,fullName,birthDate,shootsCatches,height,weight,roster_percent,default_pos,espnId
0,8481559,J. Hughes,NJD,C,22761,18,62.7,9.92,3.48,0,59,205.32,Jack Hughes,2001-05-14,L,71,175,99.752714,C,4565222
1,8479318,A. Matthews,TOR,C,28457,22,71.4,9.03,3.25,0,60,195.00,Auston Matthews,1997-09-17,L,75,215,99.905111,C,4024123
2,8476453,N. Kucherov,TBL,R,33535,26,90.5,9.72,3.48,0,55,191.40,Nikita Kucherov,1993-06-17,L,71,182,99.882108,RW,2563060
3,8480069,C. Makar,COL,D,33757,23,75.9,8.09,3.30,0,57,188.10,Cale Makar,1998-10-30,R,71,187,99.906537,D,4233563
4,8475660,C. Talbot,LAK,G,56970,16,67.8,4.28,4.24,0,42,184.10,Cam Talbot,1987-07-05,L,76,200,83.668092,G,5734
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
829,8482221,D. Levi,BUF,G,29916,9,-5.0,-0.60,-0.56,0,18,-10.86,Devon Levi,2001-12-27,L,72,184,13.227079,G,4894487
830,8480992,M. Chrona,SJS,G,1830,1,-5.4,-10.62,-5.40,0,1,-11.64,Magnus Chrona,2000-08-28,L,76,194,0.018719,G,0
831,8475789,J. Campbell,EDM,G,16007,5,-8.6,-1.93,-1.72,0,11,-22.10,Jack Campbell,1992-01-09,L,75,200,2.385889,G,5473
832,8482821,A. Soderblom,CHI,G,39789,12,-9.6,-0.87,-0.80,0,26,-23.28,Arvid Soderblom,1999-08-19,L,75,180,0.814975,G,4894729


In [33]:
fileName = f"data/summary_stats-{today.strftime('%Y-%m-%d')}.csv"
summary_stats_snapshot.to_csv(fileName, index=False)

In [34]:
inverse_fixes

{'Tim Stützle': 'Tim Stutzle',
 'Jani Hakanpää': 'Jani Hakanpaa',
 'Bo Groulx': 'Benoit-Olivier Groulx',
 'Jesse Ylönen': 'Jesse Ylonen',
 'Alexis Lafrenière': 'Alexis Lafreniere',
 'Gustav Lindström': 'Gustav Lindstrom',
 'Alex Kerfoot': 'Alexander Kerfoot',
 'Sammy Walker': 'Samuel Walker',
 'Alex Barré-Boulet': 'Alex Barre-Boulet'}

In [35]:
file_name = f"data/allG_df_fp-{today.strftime('%Y-%m-%d')}.csv"
allG_df.to_csv(file_name, index=False)

file_name = f"data/all_df_fp-{today.strftime('%Y-%m-%d')}.csv"
all_df.to_csv(file_name, index=False)

In [36]:
file_name = f"data/summary_statsG-{today.strftime('%Y-%m-%d')}.csv"
summary_statsG.to_csv(file_name, index=False)

file_name = f"data/summary_statsS-{today.strftime('%Y-%m-%d')}.csv"
summary_stats.to_csv(file_name, index=False)

In [37]:

# file_name = f"data/goaliesSummary-{today.strftime('%Y-%m-%d')}.csv"
# summary_statsG.to_csv(file_name, encoding='utf-8')

# file_name = f"data/skatersSummary-{today.strftime('%Y-%m-%d')}.csv"
# summary_stats.to_csv(file_name, encoding='utf-8')

# file_name = f"data/goaliesLog-{today.strftime('%Y-%m-%d')}.csv"
# allG_df.to_csv(file_name, encoding='utf-8')

# file_name = f"data/skatersLog-{today.strftime('%Y-%m-%d')}.csv"
# all_df.to_csv(file_name, encoding='utf-8')