In [76]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress
from pprint import pprint
from datetime import datetime, timedelta, timezone
import pytz
import scipy.stats
from dateutil.relativedelta import relativedelta

import warnings

# Suppress the specific warnings
warnings.filterwarnings("ignore")

today = datetime.utcnow()

from api_keys import espn_link

# Get current standings

In [77]:
teamURL = 'https://api-web.nhle.com/v1/standings/now'

teams = requests.get(teamURL).json()
teams = teams['standings']

team_names = pd.DataFrame()

for team in teams:
    abbreviation = team['teamAbbrev']['default']
    name = team['teamName']['default']
    logoURL = team['teamLogo']
    gamesPlayed = team['gamesPlayed']
    gamesRemaining = 82 - gamesPlayed
    goalsFor = team['goalFor']
    goalsAgainst = team['goalAgainst']
    conference = team['conferenceName']
    division = team['divisionName']

    temp = pd.DataFrame({'abbreviation': [abbreviation], 'name': [name], 'gamesPlayed': [gamesPlayed],
                         'gamesRemaining': [gamesRemaining], 'goalsFor': [goalsFor], 'goalsAgainst': [goalsAgainst], 'logo': [logoURL],
                        'conference': [conference], 'division': [division]})

    team_names = pd.concat([team_names, temp], ignore_index=True)

    
teamList = team_names['abbreviation'].tolist()
team_names

team_names.to_csv('data/team_names.csv', index=False)
team_names

Unnamed: 0,abbreviation,name,gamesPlayed,gamesRemaining,goalsFor,goalsAgainst,logo,conference,division
0,BOS,Boston Bruins,16,66,56,32,https://assets.nhle.com/logos/nhl/svg/BOS_ligh...,Eastern,Atlantic
1,VGK,Vegas Golden Knights,19,63,67,47,https://assets.nhle.com/logos/nhl/svg/VGK_ligh...,Western,Pacific
2,NYR,New York Rangers,15,67,52,34,https://assets.nhle.com/logos/nhl/svg/NYR_ligh...,Eastern,Metropolitan
3,VAN,Vancouver Canucks,18,64,75,45,https://assets.nhle.com/logos/nhl/svg/VAN_ligh...,Western,Pacific
4,LAK,Los Angeles Kings,16,66,63,44,https://assets.nhle.com/logos/nhl/svg/LAK_ligh...,Western,Pacific
5,DAL,Dallas Stars,16,66,55,45,https://assets.nhle.com/logos/nhl/svg/DAL_ligh...,Western,Central
6,FLA,Florida Panthers,17,65,52,46,https://assets.nhle.com/logos/nhl/svg/FLA_ligh...,Eastern,Atlantic
7,COL,Colorado Avalanche,16,66,62,49,https://assets.nhle.com/logos/nhl/svg/COL_ligh...,Western,Central
8,WPG,Winnipeg Jets,17,65,63,54,https://assets.nhle.com/logos/nhl/svg/WPG_ligh...,Western,Central
9,TOR,Toronto Maple Leafs,17,65,62,59,https://assets.nhle.com/logos/nhl/svg/TOR_ligh...,Eastern,Atlantic


# Get schedule

This includes the winning goalie

In [78]:
baseURL = 'https://api-web.nhle.com/v1/club-schedule-season/'
season = '/20232024'

completeSked = pd.DataFrame()
homeOnlySked = pd.DataFrame()

date_format = "%d-%m-%Y"
eastern_timezone = pytz.timezone('US/Eastern')
today = datetime.utcnow()

for team in teamList:
    
    skedURL = baseURL + team + season
    sked = requests.get(skedURL).json()
    sked = sked['games']

    sked = [entry for entry in sked if entry.get('gameType') == 2]
    
    for game in sked:
        gameID = game['id']
        gameDate = game['startTimeUTC']
        awayTeam = game['awayTeam']['abbrev']
        homeTeam = game['homeTeam']['abbrev']
        
        # GET WINNING GOALIE
        
        if game['gameState'] == 'OFF':
            winningGoalie = game['winningGoalie']['playerId']
        else:
            winningGoalie = 0
        
        datetime_obj = datetime.strptime(gameDate, '%Y-%m-%dT%H:%M:%SZ')
        utc_timezone = pytz.timezone('UTC')
        utc_datetime = utc_timezone.localize(datetime_obj)
        eastern_timezone = pytz.timezone('US/Eastern')
        eastern_datetime = utc_datetime.astimezone(eastern_timezone)
        formatted_date_string = eastern_datetime.strftime('%d-%m-%Y')
        game_date = datetime.strptime(formatted_date_string, date_format)
        game_time = eastern_datetime.strftime("%A %I:%M %p")
        

        gameTemp = pd.DataFrame({'gameID': [gameID], 'gameDate': [game_date], 'gameTime': [game_time],
                                 'awayTeam': [awayTeam], 'homeTeam': [homeTeam], 'gameDT': [datetime_obj],
                                'winningGoalie': [winningGoalie]})
        completeSked = pd.concat([completeSked, gameTemp], ignore_index=True)

        if homeTeam == team:
            homeOnlySked = pd.concat([homeOnlySked, gameTemp], ignore_index=True)
    
homeOnlySked.to_csv('data/sked.csv', index=False)

completeSked.to_csv('data/sked_full.csv', index=False)

## Show remaining sked. Adjust time if needed

In [79]:
# remainSked = homeOnlySked.loc[homeOnlySked['gameDT'] >= (datetime.utcnow() + timedelta(days=0.5))]

remainSked = homeOnlySked.loc[homeOnlySked['gameDT'] >= (datetime.utcnow())]
remainSked.sort_values('gameDate')

Unnamed: 0,gameID,gameDate,gameTime,awayTeam,homeTeam,gameDT,winningGoalie
703,2023020276,2023-11-20,Monday 09:00 PM,LAK,ARI,2023-11-21 02:00:00,0
1115,2023020275,2023-11-20,Monday 08:00 PM,COL,NSH,2023-11-21 01:00:00,0
212,2023020274,2023-11-20,Monday 08:00 PM,NYR,DAL,2023-11-21 01:00:00,0
253,2023020272,2023-11-20,Monday 07:00 PM,EDM,FLA,2023-11-21 00:00:00,0
787,2023020277,2023-11-20,Monday 10:00 PM,CGY,SEA,2023-11-21 03:00:00,0
...,...,...,...,...,...,...,...
368,2023021308,2024-04-18,Thursday 08:00 PM,VAN,WPG,2024-04-19 00:00:00,0
81,2023021311,2024-04-18,Thursday 10:00 PM,ANA,VGK,2024-04-19 02:00:00,0
1065,2023021309,2024-04-18,Thursday 09:00 PM,SJS,CGY,2024-04-19 01:00:00,0
1106,2023021307,2024-04-18,Thursday 07:00 PM,SEA,MIN,2024-04-18 23:00:00,0


## Show completed sked. Adjust time if needed

In [80]:
# completedSked = homeOnlySked.loc[homeOnlySked['gameDT'] <= (datetime.utcnow() - timedelta(days=0.5))]

completedSked = homeOnlySked.loc[homeOnlySked['gameDT'] <= (datetime.utcnow())]
completedSked.sort_values('gameDate')

Unnamed: 0,gameID,gameDate,gameTime,awayTeam,homeTeam,gameDT,winningGoalie
533,2023020001,2023-10-10,Tuesday 05:30 PM,NSH,TBL,2023-10-10 21:30:00,8477992
41,2023020003,2023-10-10,Tuesday 10:30 PM,SEA,VGK,2023-10-11 02:30:00,8478499
656,2023020002,2023-10-10,Tuesday 08:00 PM,CHI,PIT,2023-10-11 00:00:00,8475852
0,2023020006,2023-10-11,Wednesday 07:30 PM,CHI,BOS,2023-10-11 23:30:00,8476999
164,2023020008,2023-10-11,Wednesday 10:00 PM,COL,LAK,2023-10-12 02:00:00,8480382
...,...,...,...,...,...,...,...
1073,2023020267,2023-11-19,Sunday 08:00 AM,TOR,MIN,2023-11-19 13:00:00,8479361
1235,2023020270,2023-11-19,Sunday 07:00 PM,BUF,CHI,2023-11-20 00:00:00,8480045
418,2023020268,2023-11-19,Sunday 05:30 PM,CBJ,PHI,2023-11-19 22:30:00,8481035
664,2023020269,2023-11-19,Sunday 06:00 PM,VGK,PIT,2023-11-19 23:00:00,8477968


# Bring in the most recent game logs. Manually adjust date, likely

In [81]:
# file_name = f"data/allG_df_raw-{today.strftime('%Y-%m-%d')}.csv"
file_name = f"data/allG_df_raw-2023-11-19.csv"
allG_df_raw = pd.read_csv(file_name, index_col=False)
allG_df_raw

# file_name = f"data/all_df_raw-{today.strftime('%Y-%m-%d')}.csv"
file_name = f"data/all_df_raw-2023-11-19.csv"
all_df_raw = pd.read_csv(file_name, index_col=False)
all_df_raw

Unnamed: 0,playerId,sweaterNumber,name,position,goals,assists,points,plusMinus,pim,hits,...,toi,powerPlayToi,shorthandedToi,team,opponent,secondaryPosition,tertiaryPosition,gameDate,gameTime,gameId
0,8478178,43,D. Raddysh,D,0,0,0,1,0,1,...,1084,5,19,TBL,NSH,D,D,2023-10-10,Tuesday 05:30 PM,2023020001
1,8475177,44,C. de Haan,D,0,0,0,0,0,2,...,821,0,172,TBL,NSH,D,D,2023-10-10,Tuesday 05:30 PM,2023020001
2,8480246,48,N. Perbix,D,0,0,0,-1,2,0,...,702,0,0,TBL,NSH,D,D,2023-10-10,Tuesday 05:30 PM,2023020001
3,8475167,77,V. Hedman,D,0,1,1,-1,0,0,...,1615,386,248,TBL,NSH,D,D,2023-10-10,Tuesday 05:30 PM,2023020001
4,8478416,81,E. Cernak,D,0,0,0,0,0,6,...,1160,0,276,TBL,NSH,D,D,2023-10-10,Tuesday 05:30 PM,2023020001
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9601,8480144,64,D. Kampf,C,0,0,0,0,0,0,...,811,0,165,TOR,MIN,C,F,2023-11-19 00:00:00,Sunday 08:00 AM,2023020267
9602,8482259,74,B. McMann,C,0,0,0,0,0,0,...,493,0,0,TOR,MIN,C,F,2023-11-19 00:00:00,Sunday 08:00 AM,2023020267
9603,8477939,88,W. Nylander,R,1,1,2,0,0,0,...,1389,88,76,TOR,MIN,W,F,2023-11-19 00:00:00,Sunday 08:00 AM,2023020267
9604,8481582,89,N. Robertson,L,0,0,0,-1,0,1,...,462,0,2,TOR,MIN,W,F,2023-11-19 00:00:00,Sunday 08:00 AM,2023020267


# Show games missing from logs

In [82]:
all_games = homeOnlySked['gameID'].unique().tolist()

games_saved = all_df_raw['gameId'].unique().tolist()

games_done = completedSked['gameID'].unique().tolist()

missing_games = list(set(games_done) - set(games_saved))

missing_games

[2023020268, 2023020269, 2023020270, 2023020271]

# Scrape for nex game logs only

In [83]:
baseURL = 'https://api-web.nhle.com/v1/gamecenter/'
appendix = '/boxscore'

playsBaseURL = 'https://api-web.nhle.com/v1/gamecenter/'
playsAppendix = '/play-by-play'

date_format = "%d-%m-%Y"
eastern_timezone = pytz.timezone('US/Eastern')
today = datetime.utcnow()

awayLogs = {}
awayLogsG = {}
homeLogs = {}
homeLogsG = {}

for Id in missing_games:
    
    gameURL = baseURL + str(Id) + appendix
    
    print(gameURL)
    
    game = requests.get(gameURL).json()

    # pprint(game)

    numPeriods = len(game['boxscore']['linescore']['byPeriod'])
    score = game['boxscore']['linescore']['totals']
    awayTeam = game['awayTeam']
    homeTeam = game['homeTeam']
    awayTeamName = game['awayTeam']['abbrev']
    homeTeamName = game['homeTeam']['abbrev']
    awayTeamId = game['awayTeam']['id']
    homeTeamId = game['homeTeam']['id']
    gameDate = game['startTimeUTC']
    gameOutcome = game['gameOutcome']
    gameID = game['id']

    playerStatsAway = game['boxscore']['playerByGameStats']['awayTeam']
    playerStatsHome = game['boxscore']['playerByGameStats']['homeTeam']

    # pprint(playerStatsHome)

    # GET THE STARTING GOALTENDERS

    
    playsURL = playsBaseURL + str(Id) + playsAppendix
    plays = requests.get(playsURL).json()
    plays = plays['plays']

    homeStartingG = next((play['details']['goalieInNetId'] for play in plays if 
                          ((play['typeDescKey'] == 'shot-on-goal') and (play['details']['awaySOG'] == 1))), None)
    awayStartingG = next((play['details']['goalieInNetId'] for play in plays if 
                          ((play['typeDescKey'] == 'shot-on-goal') and (play['details']['homeSOG'] == 1))), None)

    homeEndingG = next((play['details']['goalieInNetId'] for play in reversed(plays) if 
                        ((play['typeDescKey'] == 'shot-on-goal') and (play['details']['eventOwnerTeamId'] == awayTeamId))), None)
    awayEndingG = next((play['details']['goalieInNetId'] for play in reversed(plays) if 
                        ((play['typeDescKey'] == 'shot-on-goal') and (play['details']['eventOwnerTeamId'] == homeTeamId))), None)

    # Get THE WINNING GOALTENDER


    winningGoalie = completedSked.loc[completedSked['gameID'] == Id]['winningGoalie'].iloc[0]


    # FIGURE OUT THE TYPE OF FINISH

    if numPeriods > 3:
        OT = 1
    else:
        OT = 0

    if numPeriods > 4:
        SO = 1
    else:
        SO = 0

    if score['home'] > score['away']:
        homeTeamWin = 1
        awayTeamWin = 0
    else:
        homeTeamWin = 0
        awayTeamWin = 1

    # FORMAT THE DATES AND TIMES

    datetime_obj = datetime.strptime(gameDate, '%Y-%m-%dT%H:%M:%SZ')
    utc_timezone = pytz.timezone('UTC')
    utc_datetime = utc_timezone.localize(datetime_obj)
    eastern_timezone = pytz.timezone('US/Eastern')
    eastern_datetime = utc_datetime.astimezone(eastern_timezone)
    formatted_date_string = eastern_datetime.strftime('%d-%m-%Y')
    game_date = datetime.strptime(formatted_date_string, date_format)
    game_time = eastern_datetime.strftime("%A %I:%M %p")

    # AWAY LOG ASSEMBLER
    
    awaySOGcheck = 0

    for defender in playerStatsAway['defense']:
        logName = str(gameID) + str(defender['playerId'])
        awayLogs[logName] = defender
        awayLogs[logName]['name'] = awayLogs[logName]['name']['default']
        awayLogs[logName]['team'] = awayTeamName
        awayLogs[logName]['opponent'] = homeTeamName
        awayLogs[logName]['secondaryPosition'] = 'D'
        awayLogs[logName]['tertiaryPosition'] = 'D'
        awayLogs[logName]['gameDate'] = game_date
        awayLogs[logName]['gameTime'] = game_time
        awayLogs[logName]['toi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(awayLogs[logName]['toi'].split(':'))))
        awayLogs[logName]['powerPlayToi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(awayLogs[logName]['powerPlayToi'].split(':'))))
        awayLogs[logName]['shorthandedToi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(awayLogs[logName]['shorthandedToi'].split(':'))))
        awayLogs[logName]['gameId'] = Id
        
        awaySOGcheck = awaySOGcheck + awayLogs[logName]['shots']

    for forward in playerStatsAway['forwards']:
        logName = str(gameID) + str(forward['playerId'])
        awayLogs[logName] = forward
        awayLogs[logName]['name'] = awayLogs[logName]['name']['default']
        awayLogs[logName]['team'] = awayTeamName
        awayLogs[logName]['opponent'] = homeTeamName

        if forward['position'] == 'C':
            awayLogs[logName]['secondaryPosition'] = 'C'
        else:
            awayLogs[logName]['secondaryPosition'] = 'W'
        awayLogs[logName]['tertiaryPosition'] = 'F'

        awayLogs[logName]['gameDate'] = game_date
        awayLogs[logName]['gameTime'] = game_time
        awayLogs[logName]['toi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(awayLogs[logName]['toi'].split(':'))))
        awayLogs[logName]['powerPlayToi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(awayLogs[logName]['powerPlayToi'].split(':'))))
        awayLogs[logName]['shorthandedToi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(awayLogs[logName]['shorthandedToi'].split(':'))))
        awayLogs[logName]['gameId'] = Id
        
        awaySOGcheck = awaySOGcheck + awayLogs[logName]['shots']
        
    awayGshots = 0
        
    for goalie in playerStatsAway['goalies']:
        if goalie['toi'] != '00:00':
            logName = str(gameID) + str(goalie['playerId'])
            awayLogsG[logName] = goalie
            awayLogsG[logName]['name'] = awayLogsG[logName]['name']['default']
            awayLogsG[logName]['team'] = awayTeamName
            awayLogsG[logName]['opponent'] = homeTeamName
            awayLogsG[logName]['gameDate'] = game_date
            awayLogsG[logName]['gameTime'] = game_time
            awayLogsG[logName]['toi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(awayLogsG[logName]['toi'].split(':'))))
            awayLogsG[logName]['gameId'] = Id
            
            # FIND THE STARTER

            if goalie['playerId'] == awayStartingG:
                awayLogsG[logName]['start'] = 1
            else:
                awayLogsG[logName]['start'] = 0

            # DETERMINE SHUTOUT

            shots = awayLogsG[logName]['saveShotsAgainst'].split('/')[1]
            saves = awayLogsG[logName]['saveShotsAgainst'].split('/')[0]
            if (shots == saves) & (awayLogsG[logName]['toi'] > 3446):
                awayLogsG[logName]['shutout'] = 1
            else:
                awayLogsG[logName]['shutout'] = 0
                
            awayGshots = awayGshots + int(shots)

            # DETERMINE WIN

            if goalie['playerId'] == winningGoalie:
                awayLogsG[logName]['win'] = 4
            elif (awayTeamWin == 0) & (OT == 1) & (awayEndingG == goalie['playerId']):
                awayLogsG[logName]['win'] = 1
            else:
                awayLogsG[logName]['win'] = 0
                
            # CHECK FOR GOALIE ABBERATION
            
            if int(shots) - int(saves) != awayLogsG[logName]['goalsAgainst']:
                print(f"Error for goalies: {awayLogsG[logName]['name']}")



    # HOME LOG ASSEMBLER
                      
    homeSOGcheck = 0

    for defender in playerStatsHome['defense']:
        logName = str(gameID) + str(defender['playerId'])
        homeLogs[logName] = defender
        homeLogs[logName]['name'] = homeLogs[logName]['name']['default']
        homeLogs[logName]['team'] = homeTeamName
        homeLogs[logName]['opponent'] = awayTeamName
        homeLogs[logName]['secondaryPosition'] = 'D'
        homeLogs[logName]['tertiaryPosition'] = 'D'
        homeLogs[logName]['gameDate'] = game_date
        homeLogs[logName]['gameTime'] = game_time
        homeLogs[logName]['toi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(homeLogs[logName]['toi'].split(':'))))
        homeLogs[logName]['powerPlayToi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(homeLogs[logName]['powerPlayToi'].split(':'))))
        homeLogs[logName]['shorthandedToi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(homeLogs[logName]['shorthandedToi'].split(':'))))
        homeLogs[logName]['gameId'] = Id
        
        homeSOGcheck = homeSOGcheck + homeLogs[logName]['shots']
                      
    for forward in playerStatsHome['forwards']:
        logName = str(gameID) + str(forward['playerId'])
        homeLogs[logName] = forward
        homeLogs[logName]['name'] = homeLogs[logName]['name']['default']
        homeLogs[logName]['team'] = homeTeamName
        homeLogs[logName]['opponent'] = awayTeamName

        if forward['position'] == 'C':
            homeLogs[logName]['secondaryPosition'] = 'C'
        else:
            homeLogs[logName]['secondaryPosition'] = 'W'
        homeLogs[logName]['tertiaryPosition'] = 'F'

        homeLogs[logName]['gameDate'] = game_date
        homeLogs[logName]['gameTime'] = game_time
        homeLogs[logName]['toi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(homeLogs[logName]['toi'].split(':'))))
        homeLogs[logName]['powerPlayToi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(homeLogs[logName]['powerPlayToi'].split(':'))))
        homeLogs[logName]['shorthandedToi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(homeLogs[logName]['shorthandedToi'].split(':'))))
        homeLogs[logName]['gameId'] = Id
        
        homeSOGcheck = homeSOGcheck + homeLogs[logName]['shots']
        
    homeGshots = 0
    
    for goalie in playerStatsHome['goalies']:
        if goalie['toi'] != '00:00':
            logName = str(gameID) + str(goalie['playerId'])
            homeLogsG[logName] = goalie
            homeLogsG[logName]['name'] = homeLogsG[logName]['name']['default']
            homeLogsG[logName]['team'] = homeTeamName
            homeLogsG[logName]['opponent'] = awayTeamName
            homeLogsG[logName]['gameDate'] = game_date
            homeLogsG[logName]['gameTime'] = game_time
            homeLogsG[logName]['toi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(homeLogsG[logName]['toi'].split(':'))))
            homeLogsG[logName]['gameId'] = Id
            
            # FIND THE STARTER

            if goalie['playerId'] == homeStartingG:
                homeLogsG[logName]['start'] = 1
            else:
                homeLogsG[logName]['start'] = 0

            # DETERMINE SHUTOUT

            shots = homeLogsG[logName]['saveShotsAgainst'].split('/')[1]
            saves = homeLogsG[logName]['saveShotsAgainst'].split('/')[0]
            if (shots == saves) & (homeLogsG[logName]['toi'] > 3446):
                homeLogsG[logName]['shutout'] = 1
            else:
                homeLogsG[logName]['shutout'] = 0
                
            homeGshots = homeGshots + int(shots)

            # DETERMINE WIN

            if goalie['playerId'] == winningGoalie:
                homeLogsG[logName]['win'] = 4
            elif (homeTeamWin == 0) & (OT == 1) & (homeEndingG == goalie['playerId']):
                homeLogsG[logName]['win'] = 1
            else:
                homeLogsG[logName]['win'] = 0
                
            # CHECK FOR GOALIE ABBERATION
            
            if int(shots) - int(saves) != homeLogsG[logName]['goalsAgainst']:
                print(f"Error for goalies: {homeLogsG[logName]['name']} vs {homeGshots}")
        

https://api-web.nhle.com/v1/gamecenter/2023020268/boxscore
https://api-web.nhle.com/v1/gamecenter/2023020269/boxscore
https://api-web.nhle.com/v1/gamecenter/2023020270/boxscore
https://api-web.nhle.com/v1/gamecenter/2023020271/boxscore


# Assemble and append new game logs

In [84]:
homeG_df = pd.DataFrame(homeLogsG)
homeG_df = homeG_df.transpose()

awayG_df = pd.DataFrame(awayLogsG)
awayG_df = awayG_df.transpose()

allG_df_raw = pd.concat([allG_df_raw, homeG_df, awayG_df], axis=0)

file_name = f"data/allG_df_raw-{today.strftime('%Y-%m-%d')}.csv"
allG_df_raw.to_csv(file_name, index=False)

home_df = pd.DataFrame(homeLogs)
home_df = home_df.transpose()

away_df = pd.DataFrame(awayLogs)
away_df = away_df.transpose()

all_df_raw = pd.concat([all_df_raw, home_df, away_df], axis=0)

file_name = f"data/all_df_raw-{today.strftime('%Y-%m-%d')}.csv"
all_df_raw.to_csv(file_name, index=False)

# Clean and add FP to goalies; show summary stats

In [85]:
allG_df = allG_df_raw.copy()

allG_df[['saves', 'shots']] = allG_df['saveShotsAgainst'].str.split('/', expand=True).apply(pd.to_numeric)
allG_df[['evSaves', 'evShots']] = allG_df['evenStrengthShotsAgainst'].str.split('/', expand=True).apply(pd.to_numeric)
allG_df[['ppSaves', 'ppShots']] = allG_df['powerPlayShotsAgainst'].str.split('/', expand=True).apply(pd.to_numeric)

allG_df = allG_df.drop(columns=(['saveShotsAgainst', 'savePctg', 'evenStrengthShotsAgainst',
                                   'powerPlayShotsAgainst', 'shorthandedShotsAgainst', 'evenStrengthGoalsAgainst',
                                  'powerPlayGoalsAgainst', 'shorthandedGoalsAgainst']))

columns_to_convert1 = ['toi', 'start', 'shutout', 'win', 'pim']
allG_df[columns_to_convert1] = allG_df[columns_to_convert1].apply(pd.to_numeric)

for index, row in allG_df.iterrows():
    decisionPoints = 0
    shutoutPoints = 0
    decisionType = 0

    # ADD A GAMES PLAYED TALLY
    
    if row['toi'] > 0:
        allG_df.at[index, 'gamesPlayed'] = 1

    # CALCULATE SHUTOUT POINTS

    if (row['shutout'] == 1):
        shutoutPoints = 3
    else:
        shutoutPoints = 0
        
    # CALCULATE DECISION POINTS
    
    if row['win'] == 4:
        decisionType = 'W'
    elif row['win'] == 1:
        decisionType = 'OTL'
    else:
        decisionType = 'L'
    
    decisionPoints = row['win']

    goalsAgainstPoints = (row['goalsAgainst']) * -2
    
    savesPoints = (row['shots'] - row['goalsAgainst']) * 0.2
    
    # TOTAL FANTASY POINTS
    
    fantasyPoints = decisionPoints + shutoutPoints + goalsAgainstPoints + savesPoints
    
    # ADD TO DATAFRAME
    
    allG_df.at[index, 'fantasyPoints'] = fantasyPoints
    allG_df.at[index, 'decisionType'] = decisionType
    
columns_to_convert3 = ['gamesPlayed']
allG_df[columns_to_convert3] = allG_df[columns_to_convert3].astype(int)

summary_statsG = allG_df.groupby('playerId').agg({
    'name': 'first',
    'team': 'last',
    'position': 'first',
    'toi': 'sum',
    'gamesPlayed': 'sum',
    'saves': 'sum',
    'shots': 'sum',
#     'evenSaves': 'sum',
    'shutout': 'sum',
    'fantasyPoints': 'sum'
}).reset_index()

summary_statsG.sort_values('fantasyPoints', ascending=False).head(25)

Unnamed: 0,playerId,name,team,position,toi,gamesPlayed,saves,shots,shutout,fantasyPoints
7,8475660,C. Talbot,LAK,G,46333,13,347,373,1,57.4
45,8478499,A. Hill,VGK,G,40091,11,299,322,2,52.8
32,8477967,T. Demko,VAN,G,42179,12,325,351,2,51.0
59,8480280,J. Swayman,BOS,G,29134,8,224,238,1,48.8
56,8479979,J. Oettinger,DAL,G,43263,12,343,373,0,41.6
23,8476999,L. Ullmark,BOS,G,29045,8,233,251,0,35.6
17,8476412,J. Binnington,STL,G,42250,12,344,373,1,34.8
8,8475683,S. Bobrovsky,FLA,G,46488,13,330,364,1,34.0
28,8477465,T. Jarry,PIT,G,43012,13,313,344,3,33.6
53,8479394,C. Hart,PHI,G,32949,10,248,270,1,32.6


# Function to caluclate missed games from injury based on sked

In [86]:
def missed_games (team, returnDate):
    tempDate = datetime.strptime(returnDate, '%Y-%m-%d')
#     utc_datetime = datetime.strptime(returnDate, '%Y-%m-%d').replace(tzinfo=timezone.utc)
    temp = homeOnlySked.loc[(homeOnlySked['awayTeam'] == team) | (homeOnlySked['homeTeam'] == team)]
    temp = temp.loc[(temp['gameDT'] >= datetime.utcnow()) & (temp['gameDate'] < tempDate)]
                     
    return len(temp)

# Set all players to 0 missed games; then manually adjust dictionary

In [87]:
summary_statsG['missedGames'] = 0

In [88]:
injuries_listG = {
    'F. Andersen': '2023-12-02',
    'V. Husso': '2023-11-22'

}

for player, date in injuries_listG.items():
    team = summary_statsG.loc[summary_statsG['name'] == player]['team'].iloc[0]
    missedGames = missed_games(team, date)
    summary_statsG.loc[summary_statsG['name'] == player, 'missedGames'] = missedGames

# Add crease minutes to team_names to calculate share %

In [89]:
for index, row in team_names.iterrows():
    team_names.at[index, 'creaseMins'] = allG_df.loc[allG_df['team'] == row['abbreviation']]['toi'].sum()

team_names['creaseMins'] = team_names['creaseMins'].astype(int)

for index, row in summary_statsG.iterrows():
    
    gamesRemaining = team_names.loc[team_names['abbreviation'] == row['team']]['gamesRemaining'].iloc[0] - row['missedGames']
    summary_statsG.at[index, 'gamesRemaining'] = gamesRemaining
    
    creaseShare = row['toi'] / team_names.loc[team_names['abbreviation'] == row['team']]['creaseMins'].sum()
    summary_statsG.at[index, 'creaseShare'] = round(creaseShare * 100, 2)
    
    FPP60 = row['fantasyPoints'] / row['toi'] * 3600
    summary_statsG.at[index, 'FPP60'] = round(FPP60, 2)
    
    FPPG = row['fantasyPoints'] / row['gamesPlayed']
    summary_statsG.at[index, 'FPPG'] = round(FPPG, 2)
    
    FPremain = FPP60 * gamesRemaining * creaseShare
    summary_statsG.at[index, 'fantasyPointsRemain'] = round(FPremain, 2)

summary_statsG['gamesRemaining'] = summary_statsG['gamesRemaining'].astype(int)

summary_statsG.loc[summary_statsG['team'] == 'NYR']

Unnamed: 0,playerId,name,team,position,toi,gamesPlayed,saves,shots,shutout,fantasyPoints,missedGames,gamesRemaining,creaseShare,FPP60,FPPG,fantasyPointsRemain
1,8471734,J. Quick,NYR,G,20028,6,141,152,1,26.2,0,67,36.64,4.71,4.37,115.61
12,8475839,L. Domingue,NYR,G,3600,1,25,26,0,7.0,0,67,6.59,7.0,7.0,30.89
40,8478048,I. Shesterkin,NYR,G,31035,9,218,239,0,29.6,0,67,56.78,3.43,3.29,130.61


# Function for summary statistics

In [90]:
def summary_statistics(df):
    temp_build = df.groupby('playerId').agg({
        'name': 'first',
        'team': 'last',
        'position': 'first',
        'toi': 'sum',
        'gamesPlayed': 'sum',
        'goals': 'sum',
        'assists': 'sum',
        'specialTeams': 'sum',
        'shots': 'sum',
        'hits': 'sum',
        'blockedShots': 'sum',
        'powerPlayToi': 'sum',
        'shorthandedToi': 'sum',
        'plusMinus': 'sum',
        'fantasyPoints': 'sum',
        'secondaryPosition': 'first',
        'tertiaryPosition': 'first'
    }).reset_index()


    temp_build['FPP60'] = (temp_build['fantasyPoints'] / temp_build['toi'] * 3600).round(2)
    temp_build['FPPG'] = (temp_build['fantasyPoints'] / temp_build['gamesPlayed']).round(2)

    temp_build = temp_build.sort_values(by='FPPG', ascending=False)
    
    return temp_build

# Clean the skaters; add FP and show summary stats

In [91]:
all_df = all_df_raw.copy()

all_df = all_df.drop(columns=(['faceoffWinningPctg']))

columns_to_convert1 = ['goals', 'assists', 'points', 'plusMinus', 'pim', 'hits', 'blockedShots',
                      'powerPlayGoals', 'powerPlayPoints', 'shorthandedGoals', 'shPoints', 'shots',
                       'toi', 'powerPlayToi', 'shorthandedToi']
all_df[columns_to_convert1] = all_df[columns_to_convert1].apply(pd.to_numeric)


for index, row in all_df.iterrows():
    
    if row['toi'] > 0:
        all_df.at[index, 'gamesPlayed'] = 1
    
    specialTeams = row['powerPlayPoints'] + row['shPoints']
    fantasyPoints = (row['goals']*2) + row['assists'] + (specialTeams * .5) + (row['blockedShots'] * .5) + ((row['hits'] + row['shots']) * .1)
    all_df.at[index, 'fantasyPoints'] = fantasyPoints
    all_df.at[index, 'specialTeams'] = specialTeams
    
columns_to_convert4 = ['specialTeams', 'gamesPlayed']
all_df[columns_to_convert4] = all_df[columns_to_convert4].astype(int)

summary_stats = summary_statistics(all_df).sort_values('fantasyPoints', ascending=False)
all_df

Unnamed: 0,playerId,sweaterNumber,name,position,goals,assists,points,plusMinus,pim,hits,...,team,opponent,secondaryPosition,tertiaryPosition,gameDate,gameTime,gameId,gamesPlayed,fantasyPoints,specialTeams
0,8478178,43,D. Raddysh,D,0,0,0,1,0,1,...,TBL,NSH,D,D,2023-10-10,Tuesday 05:30 PM,2023020001,1,0.9,0
1,8475177,44,C. de Haan,D,0,0,0,0,0,2,...,TBL,NSH,D,D,2023-10-10,Tuesday 05:30 PM,2023020001,1,0.7,0
2,8480246,48,N. Perbix,D,0,0,0,-1,2,0,...,TBL,NSH,D,D,2023-10-10,Tuesday 05:30 PM,2023020001,1,0.6,0
3,8475167,77,V. Hedman,D,0,1,1,-1,0,0,...,TBL,NSH,D,D,2023-10-10,Tuesday 05:30 PM,2023020001,1,1.9,0
4,8478416,81,E. Cernak,D,0,0,0,0,0,6,...,TBL,NSH,D,D,2023-10-10,Tuesday 05:30 PM,2023020001,1,1.3,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20230202718477953,8477953,42,K. Kapanen,R,0,0,0,-1,2,0,...,STL,ANA,W,F,2023-11-19 00:00:00,Sunday 08:00 PM,2023020271,1,0.0,0
20230202718481543,8481543,59,N. Alexandrov,C,0,0,0,1,0,2,...,STL,ANA,C,F,2023-11-19 00:00:00,Sunday 08:00 PM,2023020271,1,0.7,0
20230202718482089,8482089,63,J. Neighbours,L,1,0,1,1,0,0,...,STL,ANA,W,F,2023-11-19 00:00:00,Sunday 08:00 PM,2023020271,1,2.6,0
20230202718476897,8476897,70,O. Sundqvist,C,0,1,1,1,0,0,...,STL,ANA,C,F,2023-11-19 00:00:00,Sunday 08:00 PM,2023020271,1,1.0,0


# Set missed games as 0 and manually adjust injury dictionary

In [94]:
summary_stats['missedGames'] = 0

In [95]:
injuries_list = {
    'T. Zegras': '2023-11-22',
    'J. Drysdale': '2023-11-22',
    'J. McBain': '2023-11-20',
    'B. Hayton': '2023-12-22',
    'J. Valimaki': '2023-11-22',
    'T. Dermott': '2023-11-22',
    'D. Savard': '2023-12-07',
    'M. Grzelcyk': '2023-11-25',
    'T. Thompson': '2023-12-02',
#     'Z. Benson': '2023-11-17',
#     'T. Hall': '2023-11-16',
    'A. Athanasiou': '2023-11-22',
    'A. Lehkonen': '2023-12-05',
    'J. Roslovic': '2023-12-14',
    'M. Janmark': '2023-11-22',
    'D. Holloway': '2023-12-10',
    'A. Barkov': '2023-11-22',
    'J. Mahura': '2023-11-24',
    'K. Dach': '2024-04-20',
    'L. Schenn': '2023-11-24',
    'T. Novak': '2023-12-12',
    'N. Hischier': '2023-11-22',
    'T. Meier': '2023-11-22',
#     'J. Hughes': '2023-11-14',
    'F. Chytil': '2023-11-20',
    'A. Fox': '2023-11-29',
    'R. Greig': '2023-11-24',
    'T. Chabot': '2023-12-01',
    'A. Barabanov': '2023-12-05',
    'A. Burakovsky': '2023-12-02',
    'T. Liljegren': '2023-11-30',
    'J. Klingberg': '2023-11-24',
    'C. Soucy': '2023-12-28',
    'M. Fehervary': '2023-11-22',
#     'A. Mantha': '2023-11-18',
    'G. Vilardi': '2023-11-22'
    
}

for player, date in injuries_list.items():
    team = summary_stats.loc[summary_stats['name'] == player]['team'].iloc[0]
    missedGames = missed_games(team, date)
    summary_stats.loc[summary_stats['name'] == player, 'missedGames'] = missedGames

# Add fantasy stats and missing games

In [96]:
for index, row in summary_stats.iterrows():
    
    gamesRemaining = team_names.loc[team_names['abbreviation'] == row['team']]['gamesRemaining']

    summary_stats.loc[index, 'gamesRemaining'] = int(gamesRemaining) - row['missedGames']

    summary_stats.loc[index, 'fantasyPointsRemain'] = row['FPPG'] * (int(gamesRemaining) - row['missedGames'])
    
summary_stats['gamesRemaining'] = summary_stats['gamesRemaining'].astype(int)


summary_stats.loc[summary_stats['team'] == 'NYR']

Unnamed: 0,playerId,name,team,position,toi,gamesPlayed,goals,assists,specialTeams,shots,...,shorthandedToi,plusMinus,fantasyPoints,secondaryPosition,tertiaryPosition,FPP60,FPPG,missedGames,gamesRemaining,fantasyPointsRemain
391,8478550,A. Panarin,NYR,L,17703,15,10,16,11,64,...,0,5,49.0,W,F,9.96,3.27,0,67,219.09
194,8476885,J. Trouba,NYR,D,20051,15,1,4,0,29,...,2722,3,39.6,D,D,7.11,2.64,0,67,176.88
81,8475184,C. Kreider,NYR,L,16789,15,10,4,8,38,...,1270,5,35.3,W,F,7.57,2.35,0,67,157.45
144,8476389,V. Trocheck,NYR,C,17927,15,4,10,5,26,...,1154,3,32.6,C,F,6.55,2.17,0,67,145.39
416,8479323,A. Fox,NYR,D,12639,10,3,8,9,15,...,839,1,26.4,D,D,7.52,2.64,5,62,163.68
217,8476979,E. Gustafsson,NYR,D,17041,15,3,8,3,23,...,336,7,26.3,D,D,5.56,1.75,0,67,117.25
161,8476459,M. Zibanejad,NYR,C,17704,15,2,8,7,42,...,1725,7,25.2,C,F,5.12,1.68,0,67,112.56
667,8482109,A. Lafrenière,NYR,L,14948,15,7,4,2,27,...,11,3,23.2,W,F,5.59,1.55,0,67,103.85
653,8482073,B. Schneider,NYR,D,14243,15,1,3,0,18,...,963,-1,22.2,D,D,5.61,1.48,0,67,99.16
554,8480817,K. Miller,NYR,D,20283,15,1,6,1,12,...,1812,6,22.0,D,D,3.9,1.47,0,67,98.49


# Single Frame to add bio details

In [97]:
trim_stats = summary_stats.drop(columns=['goals', 'assists', 'specialTeams', 'shots', 'hits', 'blockedShots', 'powerPlayToi',
                                        'shorthandedToi', 'plusMinus', 'secondaryPosition', 'tertiaryPosition'])

trim_statsG = summary_statsG.drop(columns=['saves', 'shots', 'shutout'])

column_order = ['playerId', 'name', 'team', 'position', 'toi', 'gamesPlayed', 'fantasyPoints', 'FPP60', 'FPPG', 'missedGames', 'gamesRemaining', 'fantasyPointsRemain']

trim_stats = trim_stats[column_order]
trim_statsG = trim_statsG[column_order]

print(trim_statsG.columns.to_list())
print(trim_stats.columns.to_list())

trimmed_stats = pd.concat([trim_stats, trim_statsG])
trimmed_stats = trimmed_stats.sort_values('fantasyPointsRemain', ascending = False)
trimmed_stats

['playerId', 'name', 'team', 'position', 'toi', 'gamesPlayed', 'fantasyPoints', 'FPP60', 'FPPG', 'missedGames', 'gamesRemaining', 'fantasyPointsRemain']
['playerId', 'name', 'team', 'position', 'toi', 'gamesPlayed', 'fantasyPoints', 'FPP60', 'FPPG', 'missedGames', 'gamesRemaining', 'fantasyPointsRemain']


Unnamed: 0,playerId,name,team,position,toi,gamesPlayed,fantasyPoints,FPP60,FPPG,missedGames,gamesRemaining,fantasyPointsRemain
623,8481559,J. Hughes,NJD,C,13377,11,40.0,10.76,3.64,0,66,240.24
7,8475660,C. Talbot,LAK,G,46333,13,57.4,4.46,4.42,0,66,234.68
306,8477956,D. Pastrnak,BOS,R,18279,16,55.0,10.83,3.44,0,66,227.04
156,8476453,N. Kucherov,TBL,R,21962,17,59.4,9.74,3.49,0,64,223.36
413,8479318,A. Matthews,TOR,C,21586,17,57.6,9.61,3.39,0,65,220.35
...,...,...,...,...,...,...,...,...,...,...,...,...
65,8480992,M. Chrona,SJS,G,1830,1,-5.4,-10.62,-5.40,0,65,-20.76
55,8479973,S. Skinner,EDM,G,41419,12,-7.4,-0.64,-0.62,0,66,-30.62
73,8482821,A. Soderblom,CHI,G,23314,7,-7.8,-1.20,-1.11,0,66,-32.55
9,8475789,J. Campbell,EDM,G,16007,5,-8.6,-1.93,-1.72,0,66,-35.58


# Get ESPN ownership %

In [98]:
rosters = requests.get(espn_link).json()

# pprint(rosters[9])

rostership = pd.DataFrame()

for player in rosters:
    n = player['fullName']
    percent = player['ownership']['percentOwned']
    pos = player['eligibleSlots']
    dpos = player['defaultPositionId']
    if (n == 'Sebastian Aho') & (dpos == 4):
        n = 'Sebastian Aho (D)'
#     print(n)
    temp = pd.DataFrame({'fullName': [n], 'rostered': [percent], 'pos': [dpos]})
    rostership = pd.concat([rostership, temp], ignore_index=True)
    
rostership.head()

fixes = {
    'Tim Stutzle': 'Tim Stützle',
    'Alex Barre-Boulet': 'Alex Barré-Boulet',
    'Jani Hakanpaa': 'Jani Hakanpää',
    'Jesse Ylonen': 'Jesse Ylönen',
    'Alexis Lafreniere': 'Alexis Lafrenière',
    'Gustav Lindstrom': 'Gustav Lindström',
    'Alexander Kerfoot': 'Alex Kerfoot',
    'Johnny Beecher': 'John Beecher',
    'Samuel Walker': 'Sammy Walker',
    'Maxime Lajoie': 'Max Lajoie'
}

rostership['fullName'].replace(fixes, inplace=True)

pos_fixes = {
    1: 'C',
    2: 'LW',
    3: 'RW',
    4: 'D',
    5: 'G'
}

rostership['pos'].replace(pos_fixes, inplace=True)

rostership.loc[rostership['fullName'] == 'Sebastian Aho (D)']

Unnamed: 0,fullName,rostered,pos
1509,Sebastian Aho (D),0.861892,D


# Read in saved player bios

In [99]:
player_bios = pd.read_csv('data/playerbios.csv', encoding='utf-8')
player_bios = player_bios.rename(columns={'id': 'playerId', 'name': 'fullName'})

# Function for getting bios of missing players

In [100]:
def add_player_bio (missing_id_list):
    
    new_player_bios = pd.DataFrame()
    
    for player in missing_id_list:
        URL = 'https://api-web.nhle.com/v1/player/' + str(player) + '/landing'
        data = requests.get(URL).json()

        first = data['firstName']['default']
        last = data['lastName']['default']
        birthDate = data['birthDate']
        hand = data['shootsCatches']
        height = data['heightInInches']
        weight = data['weightInPounds']

        temp_df = pd.DataFrame({'playerId': [player], 
                                'fullName': first + ' ' + last, 
                                'birthDate': [birthDate], 
                                'shootsCatches': [hand],
                                'height': [height],
                                'weight': [weight]
                               })

#         print(temp_df['name'])

        new_player_bios = pd.concat([new_player_bios, temp_df])
        
    return new_player_bios

# Get missing players bios

In [101]:
player_bios_list = player_bios['playerId'].to_list()
rankings_list = trimmed_stats['playerId'].to_list()

ids_not_in_bios = set(rankings_list) - set(player_bios_list)

list(ids_not_in_bios)

[]

In [102]:
missing_players = add_player_bio(list(ids_not_in_bios))
missing_players

# Add missing bios and re-save bios file

In [103]:
updated_player_bios = pd.concat([player_bios, missing_players], axis=0)
updated_player_bios['playerId'] = updated_player_bios['playerId'].astype(int)

updated_player_bios.tail(18)

Unnamed: 0,playerId,fullName,birthDate,shootsCatches,height,weight
790,8481024,Linus Karlsson,1999-11-16,R,73,178
791,8481056,Spencer Stastney,2000-01-04,L,72,184
792,8477986,Brandon Montour,1994-04-11,R,72,199
793,8476934,Brock McGinn,1994-02-02,L,72,187
794,8477932,Aaron Ekblad,1996-02-07,R,76,215
795,8483597,Milos Kelemen,1999-07-06,L,74,210
796,8479981,Jonah Gadjovich,1998-10-12,L,74,209
797,8476525,Colin Miller,1992-10-29,R,73,200
798,8479312,Alex Lyon,1992-12-09,L,73,196
799,8479536,Axel Jonsson-Fjallby,1998-02-10,L,73,189


In [104]:
file_name = f"data/playerbios.csv"
updated_player_bios.to_csv(file_name, index=False)

# Get ESPN Ids

In [105]:
espn_ids = pd.read_csv('data/espn_ids.csv', encoding='utf-8')
# espn_ids = espn_ids.rename(columns={'id': 'espnId'})

for index, row in espn_ids.iterrows():
    playerId = row['id,fullName'].split(',')[0]
    fullName = row['id,fullName'].split(',')[1]
    
    espn_ids.at[index, 'espnId'] = playerId
    espn_ids.at[index, 'fullName'] = fullName

    
fixes = {
    'Tim Stutzle': 'Tim Stützle',
    'Jani Hakanpaa': 'Jani Hakanpää',
    'Benoit-Olivier Groulx': 'Bo Groulx',
    'Jesse Ylonen': 'Jesse Ylönen',
    'Alexis Lafreniere': 'Alexis Lafrenière',
    'Gustav Lindstrom': 'Gustav Lindström',
    'Alexander Kerfoot': 'Alex Kerfoot',
#     'Johnny Beecher': 'John Beecher',
    'Samuel Walker': 'Sammy Walker',
    'Alex Barre-Boulet': 'Alex Barré-Boulet'
}

inverse_fixes = {value: key for key, value in fixes.items()}

espn_ids['fullName'].replace(fixes, inplace=True)
    
espn_ids.loc[espn_ids['fullName'] == 'Sebastian Aho (D)']

Unnamed: 0,"id,fullName",espnId,fullName
1083,"4272688,Sebastian Aho (D)",4272688,Sebastian Aho (D)


# Add Roster Percent to bios

In [106]:
bios_on_date = player_bios.copy()

for index, row in bios_on_date.iterrows():
    try:
        roster_percent = rostership.loc[rostership['fullName'] == row['fullName']]['rostered'].iloc[0]
        default_pos = rostership.loc[rostership['fullName'] == row['fullName']]['pos'].iloc[0]
    except:
        roster_percent = -2
        default_pos = 'S'
        
    bios_on_date.at[index, 'roster_percent'] = roster_percent
    bios_on_date.at[index, 'default_pos'] = default_pos
    
bios_on_date.sort_values('roster_percent')

bios_on_date.loc[bios_on_date['fullName'] == 'Sebastian Aho (D)']

Unnamed: 0,playerId,fullName,birthDate,shootsCatches,height,weight,roster_percent,default_pos
445,8480222,Sebastian Aho (D),1996-02-17,L,70,180,0.861892,D


# Add ESPNID to Bios

In [107]:
for index, row in bios_on_date.iterrows():
    try:
        espnId = espn_ids.loc[espn_ids['fullName'] == row['fullName']]['espnId'].iloc[0]
    
        bios_on_date.at[index, 'espnId'] = espnId
    except:
        bios_on_date.at[index, 'espnId'] = 0
    
bios_on_date['espnId'] = bios_on_date['espnId'].astype(int)

bios_on_date.loc[bios_on_date['espnId'] == 0]

Unnamed: 0,playerId,fullName,birthDate,shootsCatches,height,weight,roster_percent,default_pos,espnId
633,8479320,Max Lajoie,1997-11-05,L,73,191,0.011746,D,0
641,8482411,Hunter Shepard,1995-11-07,L,72,215,0.069109,G,0
651,8483482,Tristan Luneau,2004-01-12,R,73,195,0.0279,D,0
654,8483489,Fraser Minten,2004-07-05,L,74,192,0.061666,C,0
656,8482470,Ilya Solovyov,2000-07-20,L,75,208,0.007342,D,0
663,8483512,Matt Savoie,2004-01-01,R,69,179,0.221703,C,0
672,8482511,Mason Lohrei,2001-01-17,L,77,211,0.42287,D,0
677,8480992,Magnus Chrona,2000-08-28,L,76,194,0.014705,G,0
693,8481534,Raphael Lavoie,2000-09-25,R,76,215,0.027899,C,0
697,8481028,Martin Pospisil,1999-11-19,L,74,173,1.562339,LW,0


# Create summary snapshot file and save

In [108]:
summary_stats_snapshot = pd.merge(trimmed_stats, bios_on_date, how='left', on='playerId')
summary_stats_snapshot

Unnamed: 0,playerId,name,team,position,toi,gamesPlayed,fantasyPoints,FPP60,FPPG,missedGames,gamesRemaining,fantasyPointsRemain,fullName,birthDate,shootsCatches,height,weight,roster_percent,default_pos,espnId
0,8481559,J. Hughes,NJD,C,13377,11,40.0,10.76,3.64,0,66,240.24,Jack Hughes,2001-05-14,L,71,175,99.528697,C,4565222
1,8475660,C. Talbot,LAK,G,46333,13,57.4,4.46,4.42,0,66,234.68,Cam Talbot,1987-07-05,L,76,200,76.091815,G,5734
2,8477956,D. Pastrnak,BOS,R,18279,16,55.0,10.83,3.44,0,66,227.04,David Pastrnak,1996-05-25,R,72,196,99.906033,RW,3114778
3,8476453,N. Kucherov,TBL,R,21962,17,59.4,9.74,3.49,0,64,223.36,Nikita Kucherov,1993-06-17,L,71,182,99.870795,RW,2563060
4,8479318,A. Matthews,TOR,C,21586,17,57.6,9.61,3.39,0,65,220.35,Auston Matthews,1997-09-17,L,75,215,99.910438,C,4024123
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
803,8480992,M. Chrona,SJS,G,1830,1,-5.4,-10.62,-5.40,0,65,-20.76,Magnus Chrona,2000-08-28,L,76,194,0.014705,G,0
804,8479973,S. Skinner,EDM,G,41419,12,-7.4,-0.64,-0.62,0,66,-30.62,Stuart Skinner,1998-11-01,L,76,230,48.378748,G,4268767
805,8482821,A. Soderblom,CHI,G,23314,7,-7.8,-1.20,-1.11,0,66,-32.55,Arvid Soderblom,1999-08-19,L,75,180,1.049954,G,4894729
806,8475789,J. Campbell,EDM,G,16007,5,-8.6,-1.93,-1.72,0,66,-35.58,Jack Campbell,1992-01-09,L,75,200,2.804288,G,5473


In [109]:
fileName = f"data/summary_stats-{today.strftime('%Y-%m-%d')}.csv"
summary_stats_snapshot.to_csv(fileName, index=False)

In [110]:
inverse_fixes

{'Tim Stützle': 'Tim Stutzle',
 'Jani Hakanpää': 'Jani Hakanpaa',
 'Bo Groulx': 'Benoit-Olivier Groulx',
 'Jesse Ylönen': 'Jesse Ylonen',
 'Alexis Lafrenière': 'Alexis Lafreniere',
 'Gustav Lindström': 'Gustav Lindstrom',
 'Alex Kerfoot': 'Alexander Kerfoot',
 'Sammy Walker': 'Samuel Walker',
 'Alex Barré-Boulet': 'Alex Barre-Boulet'}

In [111]:
file_name = f"data/allG_df_fp-{today.strftime('%Y-%m-%d')}.csv"
allG_df.to_csv(file_name, index=False)

file_name = f"data/all_df_fp-{today.strftime('%Y-%m-%d')}.csv"
all_df.to_csv(file_name, index=False)

In [112]:
all_df

Unnamed: 0,playerId,sweaterNumber,name,position,goals,assists,points,plusMinus,pim,hits,...,team,opponent,secondaryPosition,tertiaryPosition,gameDate,gameTime,gameId,gamesPlayed,fantasyPoints,specialTeams
0,8478178,43,D. Raddysh,D,0,0,0,1,0,1,...,TBL,NSH,D,D,2023-10-10,Tuesday 05:30 PM,2023020001,1,0.9,0
1,8475177,44,C. de Haan,D,0,0,0,0,0,2,...,TBL,NSH,D,D,2023-10-10,Tuesday 05:30 PM,2023020001,1,0.7,0
2,8480246,48,N. Perbix,D,0,0,0,-1,2,0,...,TBL,NSH,D,D,2023-10-10,Tuesday 05:30 PM,2023020001,1,0.6,0
3,8475167,77,V. Hedman,D,0,1,1,-1,0,0,...,TBL,NSH,D,D,2023-10-10,Tuesday 05:30 PM,2023020001,1,1.9,0
4,8478416,81,E. Cernak,D,0,0,0,0,0,6,...,TBL,NSH,D,D,2023-10-10,Tuesday 05:30 PM,2023020001,1,1.3,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20230202718477953,8477953,42,K. Kapanen,R,0,0,0,-1,2,0,...,STL,ANA,W,F,2023-11-19 00:00:00,Sunday 08:00 PM,2023020271,1,0.0,0
20230202718481543,8481543,59,N. Alexandrov,C,0,0,0,1,0,2,...,STL,ANA,C,F,2023-11-19 00:00:00,Sunday 08:00 PM,2023020271,1,0.7,0
20230202718482089,8482089,63,J. Neighbours,L,1,0,1,1,0,0,...,STL,ANA,W,F,2023-11-19 00:00:00,Sunday 08:00 PM,2023020271,1,2.6,0
20230202718476897,8476897,70,O. Sundqvist,C,0,1,1,1,0,0,...,STL,ANA,C,F,2023-11-19 00:00:00,Sunday 08:00 PM,2023020271,1,1.0,0


In [113]:

# file_name = f"data/goaliesSummary-{today.strftime('%Y-%m-%d')}.csv"
# summary_statsG.to_csv(file_name, encoding='utf-8')

# file_name = f"data/skatersSummary-{today.strftime('%Y-%m-%d')}.csv"
# summary_stats.to_csv(file_name, encoding='utf-8')

# file_name = f"data/goaliesLog-{today.strftime('%Y-%m-%d')}.csv"
# allG_df.to_csv(file_name, encoding='utf-8')

# file_name = f"data/skatersLog-{today.strftime('%Y-%m-%d')}.csv"
# all_df.to_csv(file_name, encoding='utf-8')