In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress
from pprint import pprint
from datetime import datetime, timedelta, timezone
import pytz
import scipy.stats
from dateutil.relativedelta import relativedelta

import warnings

# Suppress the specific warnings
warnings.filterwarnings("ignore")

today = datetime.utcnow()

from api_keys import espn_link

last_scrape = '2023-11-27'

# Get current standings

In [2]:
teamURL = 'https://api-web.nhle.com/v1/standings/now'

teams = requests.get(teamURL).json()
teams = teams['standings']

team_names = pd.DataFrame()

for team in teams:
    abbreviation = team['teamAbbrev']['default']
    name = team['teamName']['default']
    logoURL = team['teamLogo']
    gamesPlayed = team['gamesPlayed']
    gamesRemaining = 82 - gamesPlayed
    goalsFor = team['goalFor']
    goalsAgainst = team['goalAgainst']
    conference = team['conferenceName']
    division = team['divisionName']

    temp = pd.DataFrame({'abbreviation': [abbreviation], 'name': [name], 'gamesPlayed': [gamesPlayed],
                         'gamesRemaining': [gamesRemaining], 'goalsFor': [goalsFor], 'goalsAgainst': [goalsAgainst], 'logo': [logoURL],
                        'conference': [conference], 'division': [division]})

    team_names = pd.concat([team_names, temp], ignore_index=True)

    
teamList = team_names['abbreviation'].tolist()
team_names

team_names.to_csv('data/team_names.csv', index=False)
team_names

Unnamed: 0,abbreviation,name,gamesPlayed,gamesRemaining,goalsFor,goalsAgainst,logo,conference,division
0,NYR,New York Rangers,20,62,67,50,https://assets.nhle.com/logos/nhl/svg/NYR_ligh...,Eastern,Metropolitan
1,BOS,Boston Bruins,21,61,71,55,https://assets.nhle.com/logos/nhl/svg/BOS_ligh...,Eastern,Atlantic
2,VGK,Vegas Golden Knights,22,60,70,52,https://assets.nhle.com/logos/nhl/svg/VGK_ligh...,Western,Pacific
3,COL,Colorado Avalanche,21,61,80,59,https://assets.nhle.com/logos/nhl/svg/COL_ligh...,Western,Central
4,LAK,Los Angeles Kings,19,63,76,47,https://assets.nhle.com/logos/nhl/svg/LAK_ligh...,Western,Pacific
5,VAN,Vancouver Canucks,22,60,88,56,https://assets.nhle.com/logos/nhl/svg/VAN_ligh...,Western,Pacific
6,FLA,Florida Panthers,21,61,63,55,https://assets.nhle.com/logos/nhl/svg/FLA_ligh...,Eastern,Atlantic
7,DAL,Dallas Stars,19,63,66,57,https://assets.nhle.com/logos/nhl/svg/DAL_ligh...,Western,Central
8,WPG,Winnipeg Jets,20,62,71,59,https://assets.nhle.com/logos/nhl/svg/WPG_ligh...,Western,Central
9,DET,Detroit Red Wings,20,62,74,60,https://assets.nhle.com/logos/nhl/svg/DET_ligh...,Eastern,Atlantic


# Get schedule

This includes the winning goalie

In [3]:
baseURL = 'https://api-web.nhle.com/v1/club-schedule-season/'
season = '/20232024'

completeSked = pd.DataFrame()
homeOnlySked = pd.DataFrame()

date_format = "%d-%m-%Y"
eastern_timezone = pytz.timezone('US/Eastern')
today = datetime.utcnow()

for team in teamList:
    
    skedURL = baseURL + team + season
    sked = requests.get(skedURL).json()
    sked = sked['games']

    sked = [entry for entry in sked if entry.get('gameType') == 2]
    
    for game in sked:
        gameID = game['id']
        gameDate = game['startTimeUTC']
        awayTeam = game['awayTeam']['abbrev']
        homeTeam = game['homeTeam']['abbrev']
        
        # GET WINNING GOALIE
        
        if game['gameState'] == 'OFF':
            winningGoalie = game['winningGoalie']['playerId']
        else:
            winningGoalie = 0
        
        datetime_obj = datetime.strptime(gameDate, '%Y-%m-%dT%H:%M:%SZ')
        utc_timezone = pytz.timezone('UTC')
        utc_datetime = utc_timezone.localize(datetime_obj)
        eastern_timezone = pytz.timezone('US/Eastern')
        eastern_datetime = utc_datetime.astimezone(eastern_timezone)
        formatted_date_string = eastern_datetime.strftime('%d-%m-%Y')
        game_date = datetime.strptime(formatted_date_string, date_format)
        game_time = eastern_datetime.strftime("%A %I:%M %p")
        

        gameTemp = pd.DataFrame({'gameID': [gameID], 'gameDate': [game_date], 'gameTime': [game_time],
                                 'awayTeam': [awayTeam], 'homeTeam': [homeTeam], 'gameDT': [datetime_obj],
                                'winningGoalie': [winningGoalie]})
        completeSked = pd.concat([completeSked, gameTemp], ignore_index=True)

        if homeTeam == team:
            homeOnlySked = pd.concat([homeOnlySked, gameTemp], ignore_index=True)
    
homeOnlySked.to_csv('data/sked.csv', index=False)

completeSked.to_csv('data/sked_full.csv', index=False)

## Show remaining sked. Adjust time if needed

In [4]:
# remainSked = homeOnlySked.loc[homeOnlySked['gameDT'] >= (datetime.utcnow() - timedelta(days=0.5))]

remainSked = homeOnlySked.loc[homeOnlySked['gameDT'] >= (datetime.utcnow())]
remainSked.sort_values('gameDate')

Unnamed: 0,gameID,gameDate,gameTime,awayTeam,homeTeam,gameDT,winningGoalie
338,2023020332,2023-11-28,Tuesday 08:00 PM,DAL,WPG,2023-11-29 01:00:00,0
1238,2023020333,2023-11-28,Tuesday 08:30 PM,SEA,CHI,2023-11-29 01:30:00,0
501,2023020329,2023-11-28,Tuesday 07:00 PM,FLA,TOR,2023-11-29 00:00:00,0
584,2023020328,2023-11-28,Tuesday 07:30 PM,CAR,PHI,2023-11-29 00:30:00,0
214,2023020336,2023-11-28,Tuesday 10:00 PM,ANA,VAN,2023-11-29 03:00:00,0
...,...,...,...,...,...,...,...
1229,2023021307,2024-04-18,Thursday 07:00 PM,SEA,MIN,2024-04-18 23:00:00,0
368,2023021308,2024-04-18,Thursday 08:00 PM,VAN,WPG,2024-04-19 00:00:00,0
163,2023021310,2024-04-18,Thursday 09:30 PM,EDM,COL,2024-04-19 01:30:00,0
778,2023021309,2024-04-18,Thursday 09:00 PM,SJS,CGY,2024-04-19 01:00:00,0


## Show completed sked. Adjust time if needed

In [5]:
# completedSked = homeOnlySked.loc[homeOnlySked['gameDT'] <= (datetime.utcnow() - timedelta(days=0.5))]

completedSked = homeOnlySked.loc[homeOnlySked['gameDT'] <= (datetime.utcnow())]
completedSked.sort_values('gameDate')

Unnamed: 0,gameID,gameDate,gameTime,awayTeam,homeTeam,gameDT,winningGoalie
410,2023020001,2023-10-10,Tuesday 05:30 PM,NSH,TBL,2023-10-10 21:30:00,8477992
820,2023020002,2023-10-10,Tuesday 08:00 PM,CHI,PIT,2023-10-11 00:00:00,8475852
82,2023020003,2023-10-10,Tuesday 10:30 PM,SEA,VGK,2023-10-11 02:30:00,8478499
492,2023020005,2023-10-11,Wednesday 07:00 PM,MTL,TOR,2023-10-11 23:00:00,8478492
164,2023020008,2023-10-11,Wednesday 10:00 PM,COL,LAK,2023-10-12 02:00:00,8480382
...,...,...,...,...,...,...,...
132,2023020324,2023-11-27,Monday 09:00 PM,TBL,COL,2023-11-28 02:00:00,8480382
1077,2023020321,2023-11-27,Monday 07:00 PM,BOS,CBJ,2023-11-28 00:00:00,8477484
1119,2023020323,2023-11-27,Monday 07:00 PM,FLA,OTT,2023-11-28 00:00:00,8475683
745,2023020325,2023-11-27,Monday 09:30 PM,VGK,CGY,2023-11-28 02:30:00,8478435


# Bring in the most recent game logs. Manually adjust date, likely

In [6]:
# file_name = f"data/allG_df_raw-{today.strftime('%Y-%m-%d')}.csv"
file_name = f"data/allG_df_raw-{last_scrape}.csv"
allG_df_raw = pd.read_csv(file_name, index_col=False)
allG_df_raw

# file_name = f"data/all_df_raw-{today.strftime('%Y-%m-%d')}.csv"
file_name = f"data/all_df_raw-{last_scrape}.csv"
all_df_raw = pd.read_csv(file_name, index_col=False)
all_df_raw

Unnamed: 0,playerId,sweaterNumber,name,position,goals,assists,points,plusMinus,pim,hits,...,toi,powerPlayToi,shorthandedToi,team,opponent,secondaryPosition,tertiaryPosition,gameDate,gameTime,gameId
0,8478178,43,D. Raddysh,D,0,0,0,1,0,1,...,1084,5,19,TBL,NSH,D,D,2023-10-10,Tuesday 05:30 PM,2023020001
1,8475177,44,C. de Haan,D,0,0,0,0,0,2,...,821,0,172,TBL,NSH,D,D,2023-10-10,Tuesday 05:30 PM,2023020001
2,8480246,48,N. Perbix,D,0,0,0,-1,2,0,...,702,0,0,TBL,NSH,D,D,2023-10-10,Tuesday 05:30 PM,2023020001
3,8475167,77,V. Hedman,D,0,1,1,-1,0,0,...,1615,386,248,TBL,NSH,D,D,2023-10-10,Tuesday 05:30 PM,2023020001
4,8478416,81,E. Cernak,D,0,0,0,0,0,6,...,1160,0,276,TBL,NSH,D,D,2023-10-10,Tuesday 05:30 PM,2023020001
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11509,8476460,55,M. Scheifele,C,0,1,1,-1,0,1,...,1251,76,2,WPG,NSH,C,F,2023-11-26 00:00:00,Sunday 08:00 PM,2023020319
11510,8475799,62,N. Niederreiter,R,0,0,0,1,0,2,...,893,39,0,WPG,NSH,W,F,2023-11-26 00:00:00,Sunday 08:00 PM,2023020319
11511,8479536,71,A. Jonsson-Fjallby,L,0,0,0,0,0,2,...,491,0,22,WPG,NSH,W,F,2023-11-26 00:00:00,Sunday 08:00 PM,2023020319
11512,8478398,81,K. Connor,L,0,1,1,-2,0,0,...,1340,77,2,WPG,NSH,W,F,2023-11-26 00:00:00,Sunday 08:00 PM,2023020319


# Show games missing from logs

In [7]:
all_games = homeOnlySked['gameID'].unique().tolist()

games_saved = all_df_raw['gameId'].unique().tolist()

games_done = completedSked['gameID'].unique().tolist()

missing_games = list(set(games_done) - set(games_saved))

missing_games

[2023020321, 2023020322, 2023020323, 2023020324, 2023020325, 2023020326]

# Scrape for nex game logs only

In [8]:
baseURL = 'https://api-web.nhle.com/v1/gamecenter/'
appendix = '/boxscore'

playsBaseURL = 'https://api-web.nhle.com/v1/gamecenter/'
playsAppendix = '/play-by-play'

date_format = "%d-%m-%Y"
eastern_timezone = pytz.timezone('US/Eastern')
today = datetime.utcnow()

awayLogs = {}
awayLogsG = {}
homeLogs = {}
homeLogsG = {}

for Id in missing_games:
    
    gameURL = baseURL + str(Id) + appendix
    
    print(gameURL)
    
    game = requests.get(gameURL).json()

    # pprint(game)

    numPeriods = len(game['boxscore']['linescore']['byPeriod'])
    score = game['boxscore']['linescore']['totals']
    awayTeam = game['awayTeam']
    homeTeam = game['homeTeam']
    awayTeamName = game['awayTeam']['abbrev']
    homeTeamName = game['homeTeam']['abbrev']
    awayTeamId = game['awayTeam']['id']
    homeTeamId = game['homeTeam']['id']
    gameDate = game['startTimeUTC']
    gameOutcome = game['gameOutcome']
    gameID = game['id']

    playerStatsAway = game['boxscore']['playerByGameStats']['awayTeam']
    playerStatsHome = game['boxscore']['playerByGameStats']['homeTeam']

    # pprint(playerStatsHome)

    # GET THE STARTING GOALTENDERS

    
    playsURL = playsBaseURL + str(Id) + playsAppendix
    plays = requests.get(playsURL).json()
    plays = plays['plays']

    homeStartingG = next((play['details']['goalieInNetId'] for play in plays if 
                          ((play['typeDescKey'] == 'shot-on-goal') and (play['details']['awaySOG'] == 1))), None)
    awayStartingG = next((play['details']['goalieInNetId'] for play in plays if 
                          ((play['typeDescKey'] == 'shot-on-goal') and (play['details']['homeSOG'] == 1))), None)

    homeEndingG = next((play['details']['goalieInNetId'] for play in reversed(plays) if 
                        ((play['typeDescKey'] == 'shot-on-goal') and (play['details']['eventOwnerTeamId'] == awayTeamId))), None)
    awayEndingG = next((play['details']['goalieInNetId'] for play in reversed(plays) if 
                        ((play['typeDescKey'] == 'shot-on-goal') and (play['details']['eventOwnerTeamId'] == homeTeamId))), None)

    # Get THE WINNING GOALTENDER


    winningGoalie = completedSked.loc[completedSked['gameID'] == Id]['winningGoalie'].iloc[0]


    # FIGURE OUT THE TYPE OF FINISH

    if numPeriods > 3:
        OT = 1
    else:
        OT = 0

    if numPeriods > 4:
        SO = 1
    else:
        SO = 0

    if score['home'] > score['away']:
        homeTeamWin = 1
        awayTeamWin = 0
    else:
        homeTeamWin = 0
        awayTeamWin = 1

    # FORMAT THE DATES AND TIMES

    datetime_obj = datetime.strptime(gameDate, '%Y-%m-%dT%H:%M:%SZ')
    utc_timezone = pytz.timezone('UTC')
    utc_datetime = utc_timezone.localize(datetime_obj)
    eastern_timezone = pytz.timezone('US/Eastern')
    eastern_datetime = utc_datetime.astimezone(eastern_timezone)
    formatted_date_string = eastern_datetime.strftime('%d-%m-%Y')
    game_date = datetime.strptime(formatted_date_string, date_format)
    game_time = eastern_datetime.strftime("%A %I:%M %p")

    # AWAY LOG ASSEMBLER
    
    awaySOGcheck = 0

    for defender in playerStatsAway['defense']:
        logName = str(gameID) + str(defender['playerId'])
        awayLogs[logName] = defender
        awayLogs[logName]['name'] = awayLogs[logName]['name']['default']
        awayLogs[logName]['team'] = awayTeamName
        awayLogs[logName]['opponent'] = homeTeamName
        awayLogs[logName]['secondaryPosition'] = 'D'
        awayLogs[logName]['tertiaryPosition'] = 'D'
        awayLogs[logName]['gameDate'] = game_date
        awayLogs[logName]['gameTime'] = game_time
        awayLogs[logName]['toi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(awayLogs[logName]['toi'].split(':'))))
        awayLogs[logName]['powerPlayToi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(awayLogs[logName]['powerPlayToi'].split(':'))))
        awayLogs[logName]['shorthandedToi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(awayLogs[logName]['shorthandedToi'].split(':'))))
        awayLogs[logName]['gameId'] = Id
        
        awaySOGcheck = awaySOGcheck + awayLogs[logName]['shots']

    for forward in playerStatsAway['forwards']:
        logName = str(gameID) + str(forward['playerId'])
        awayLogs[logName] = forward
        awayLogs[logName]['name'] = awayLogs[logName]['name']['default']
        awayLogs[logName]['team'] = awayTeamName
        awayLogs[logName]['opponent'] = homeTeamName

        if forward['position'] == 'C':
            awayLogs[logName]['secondaryPosition'] = 'C'
        else:
            awayLogs[logName]['secondaryPosition'] = 'W'
        awayLogs[logName]['tertiaryPosition'] = 'F'

        awayLogs[logName]['gameDate'] = game_date
        awayLogs[logName]['gameTime'] = game_time
        awayLogs[logName]['toi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(awayLogs[logName]['toi'].split(':'))))
        awayLogs[logName]['powerPlayToi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(awayLogs[logName]['powerPlayToi'].split(':'))))
        awayLogs[logName]['shorthandedToi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(awayLogs[logName]['shorthandedToi'].split(':'))))
        awayLogs[logName]['gameId'] = Id
        
        awaySOGcheck = awaySOGcheck + awayLogs[logName]['shots']
        
    awayGshots = 0
        
    for goalie in playerStatsAway['goalies']:
        if goalie['toi'] != '00:00':
            logName = str(gameID) + str(goalie['playerId'])
            awayLogsG[logName] = goalie
            awayLogsG[logName]['name'] = awayLogsG[logName]['name']['default']
            awayLogsG[logName]['team'] = awayTeamName
            awayLogsG[logName]['opponent'] = homeTeamName
            awayLogsG[logName]['gameDate'] = game_date
            awayLogsG[logName]['gameTime'] = game_time
            awayLogsG[logName]['toi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(awayLogsG[logName]['toi'].split(':'))))
            awayLogsG[logName]['gameId'] = Id
            
            # FIND THE STARTER

            if goalie['playerId'] == awayStartingG:
                awayLogsG[logName]['start'] = 1
            else:
                awayLogsG[logName]['start'] = 0

            # DETERMINE SHUTOUT

            shots = awayLogsG[logName]['saveShotsAgainst'].split('/')[1]
            saves = awayLogsG[logName]['saveShotsAgainst'].split('/')[0]
            if (shots == saves) & (awayLogsG[logName]['toi'] > 3446):
                awayLogsG[logName]['shutout'] = 1
            else:
                awayLogsG[logName]['shutout'] = 0
                
            awayGshots = awayGshots + int(shots)

            # DETERMINE WIN

            if goalie['playerId'] == winningGoalie:
                awayLogsG[logName]['win'] = 4
            elif (awayTeamWin == 0) & (OT == 1) & (awayEndingG == goalie['playerId']):
                awayLogsG[logName]['win'] = 1
            else:
                awayLogsG[logName]['win'] = 0
                
            # CHECK FOR GOALIE ABBERATION
            
            if int(shots) - int(saves) != awayLogsG[logName]['goalsAgainst']:
                print(f"Error for goalies: {awayLogsG[logName]['name']}")



    # HOME LOG ASSEMBLER
                      
    homeSOGcheck = 0

    for defender in playerStatsHome['defense']:
        logName = str(gameID) + str(defender['playerId'])
        homeLogs[logName] = defender
        homeLogs[logName]['name'] = homeLogs[logName]['name']['default']
        homeLogs[logName]['team'] = homeTeamName
        homeLogs[logName]['opponent'] = awayTeamName
        homeLogs[logName]['secondaryPosition'] = 'D'
        homeLogs[logName]['tertiaryPosition'] = 'D'
        homeLogs[logName]['gameDate'] = game_date
        homeLogs[logName]['gameTime'] = game_time
        homeLogs[logName]['toi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(homeLogs[logName]['toi'].split(':'))))
        homeLogs[logName]['powerPlayToi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(homeLogs[logName]['powerPlayToi'].split(':'))))
        homeLogs[logName]['shorthandedToi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(homeLogs[logName]['shorthandedToi'].split(':'))))
        homeLogs[logName]['gameId'] = Id
        
        homeSOGcheck = homeSOGcheck + homeLogs[logName]['shots']
                      
    for forward in playerStatsHome['forwards']:
        logName = str(gameID) + str(forward['playerId'])
        homeLogs[logName] = forward
        homeLogs[logName]['name'] = homeLogs[logName]['name']['default']
        homeLogs[logName]['team'] = homeTeamName
        homeLogs[logName]['opponent'] = awayTeamName

        if forward['position'] == 'C':
            homeLogs[logName]['secondaryPosition'] = 'C'
        else:
            homeLogs[logName]['secondaryPosition'] = 'W'
        homeLogs[logName]['tertiaryPosition'] = 'F'

        homeLogs[logName]['gameDate'] = game_date
        homeLogs[logName]['gameTime'] = game_time
        homeLogs[logName]['toi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(homeLogs[logName]['toi'].split(':'))))
        homeLogs[logName]['powerPlayToi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(homeLogs[logName]['powerPlayToi'].split(':'))))
        homeLogs[logName]['shorthandedToi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(homeLogs[logName]['shorthandedToi'].split(':'))))
        homeLogs[logName]['gameId'] = Id
        
        homeSOGcheck = homeSOGcheck + homeLogs[logName]['shots']
        
    homeGshots = 0
    
    for goalie in playerStatsHome['goalies']:
        if goalie['toi'] != '00:00':
            logName = str(gameID) + str(goalie['playerId'])
            homeLogsG[logName] = goalie
            homeLogsG[logName]['name'] = homeLogsG[logName]['name']['default']
            homeLogsG[logName]['team'] = homeTeamName
            homeLogsG[logName]['opponent'] = awayTeamName
            homeLogsG[logName]['gameDate'] = game_date
            homeLogsG[logName]['gameTime'] = game_time
            homeLogsG[logName]['toi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(homeLogsG[logName]['toi'].split(':'))))
            homeLogsG[logName]['gameId'] = Id
            
            # FIND THE STARTER

            if goalie['playerId'] == homeStartingG:
                homeLogsG[logName]['start'] = 1
            else:
                homeLogsG[logName]['start'] = 0

            # DETERMINE SHUTOUT

            shots = homeLogsG[logName]['saveShotsAgainst'].split('/')[1]
            saves = homeLogsG[logName]['saveShotsAgainst'].split('/')[0]
            if (shots == saves) & (homeLogsG[logName]['toi'] > 3446):
                homeLogsG[logName]['shutout'] = 1
            else:
                homeLogsG[logName]['shutout'] = 0
                
            homeGshots = homeGshots + int(shots)

            # DETERMINE WIN

            if goalie['playerId'] == winningGoalie:
                homeLogsG[logName]['win'] = 4
            elif (homeTeamWin == 0) & (OT == 1) & (homeEndingG == goalie['playerId']):
                homeLogsG[logName]['win'] = 1
            else:
                homeLogsG[logName]['win'] = 0
                
            # CHECK FOR GOALIE ABBERATION
            
            if int(shots) - int(saves) != homeLogsG[logName]['goalsAgainst']:
                print(f"Error for goalies: {homeLogsG[logName]['name']} vs {homeGshots}")
        

https://api-web.nhle.com/v1/gamecenter/2023020321/boxscore
https://api-web.nhle.com/v1/gamecenter/2023020322/boxscore
https://api-web.nhle.com/v1/gamecenter/2023020323/boxscore
https://api-web.nhle.com/v1/gamecenter/2023020324/boxscore
https://api-web.nhle.com/v1/gamecenter/2023020325/boxscore
https://api-web.nhle.com/v1/gamecenter/2023020326/boxscore


# Assemble and append new game logs

In [9]:
homeG_df = pd.DataFrame(homeLogsG)
homeG_df = homeG_df.transpose()

awayG_df = pd.DataFrame(awayLogsG)
awayG_df = awayG_df.transpose()

allG_df_raw = pd.concat([allG_df_raw, homeG_df, awayG_df], axis=0)

file_name = f"data/allG_df_raw-{today.strftime('%Y-%m-%d')}.csv"
allG_df_raw.to_csv(file_name, index=False)

home_df = pd.DataFrame(homeLogs)
home_df = home_df.transpose()

away_df = pd.DataFrame(awayLogs)
away_df = away_df.transpose()

all_df_raw = pd.concat([all_df_raw, home_df, away_df], axis=0)

file_name = f"data/all_df_raw-{today.strftime('%Y-%m-%d')}.csv"
all_df_raw.to_csv(file_name, index=False)

# Clean and add FP to goalies; show summary stats

In [10]:
allG_df = allG_df_raw.copy()

allG_df[['saves', 'shots']] = allG_df['saveShotsAgainst'].str.split('/', expand=True).apply(pd.to_numeric)
allG_df[['evSaves', 'evShots']] = allG_df['evenStrengthShotsAgainst'].str.split('/', expand=True).apply(pd.to_numeric)
allG_df[['ppSaves', 'ppShots']] = allG_df['powerPlayShotsAgainst'].str.split('/', expand=True).apply(pd.to_numeric)

allG_df = allG_df.drop(columns=(['saveShotsAgainst', 'savePctg', 'evenStrengthShotsAgainst',
                                   'powerPlayShotsAgainst', 'shorthandedShotsAgainst', 'evenStrengthGoalsAgainst',
                                  'powerPlayGoalsAgainst', 'shorthandedGoalsAgainst']))

columns_to_convert1 = ['toi', 'start', 'shutout', 'win', 'pim']
allG_df[columns_to_convert1] = allG_df[columns_to_convert1].apply(pd.to_numeric)

for index, row in allG_df.iterrows():
    decisionPoints = 0
    shutoutPoints = 0
    decisionType = 0

    # ADD A GAMES PLAYED TALLY
    
    if row['toi'] > 0:
        allG_df.at[index, 'gamesPlayed'] = 1

    # CALCULATE SHUTOUT POINTS

    if (row['shutout'] == 1):
        shutoutPoints = 3
    else:
        shutoutPoints = 0
        
    # CALCULATE DECISION POINTS
    
    if row['win'] == 4:
        decisionType = 'W'
    elif row['win'] == 1:
        decisionType = 'OTL'
    else:
        decisionType = 'L'
    
    decisionPoints = row['win']

    goalsAgainstPoints = (row['goalsAgainst']) * -2
    
    savesPoints = (row['shots'] - row['goalsAgainst']) * 0.2
    
    # TOTAL FANTASY POINTS
    
    fantasyPoints = decisionPoints + shutoutPoints + goalsAgainstPoints + savesPoints
    
    # ADD TO DATAFRAME
    
    allG_df.at[index, 'fantasyPoints'] = fantasyPoints
    allG_df.at[index, 'decisionType'] = decisionType
    
columns_to_convert3 = ['gamesPlayed']
allG_df[columns_to_convert3] = allG_df[columns_to_convert3].astype(int)

summary_statsG = allG_df.groupby('playerId').agg({
    'name': 'first',
    'team': 'last',
    'position': 'first',
    'toi': 'sum',
    'gamesPlayed': 'sum',
    'saves': 'sum',
    'shots': 'sum',
#     'evenSaves': 'sum',
    'shutout': 'sum',
    'fantasyPoints': 'sum'
}).reset_index()

summary_statsG.sort_values('fantasyPoints', ascending=False).head(25)

Unnamed: 0,playerId,name,team,position,toi,gamesPlayed,saves,shots,shutout,fantasyPoints
47,8478499,A. Hill,VGK,G,47683,13,362,388,2,64.4
7,8475660,C. Talbot,LAK,G,49933,14,376,404,1,63.2
34,8477967,T. Demko,VAN,G,52944,15,395,427,2,61.0
8,8475683,S. Bobrovsky,FLA,G,60661,17,426,468,2,48.2
30,8477465,T. Jarry,PIT,G,50175,15,379,413,3,44.8
61,8480280,J. Swayman,BOS,G,37814,11,307,332,1,44.4
63,8480382,A. Georgiev,COL,G,63387,18,441,491,1,43.2
24,8476945,C. Hellebuyck,WPG,G,54295,15,394,434,1,42.8
1,8471734,J. Quick,NYR,G,27196,8,200,215,2,41.0
39,8478009,I. Sorokin,NYI,G,47586,13,431,470,2,39.2


# Function to caluclate missed games from injury based on sked

In [11]:
def missed_games (team, returnDate):
    tempDate = datetime.strptime(returnDate, '%Y-%m-%d')
#     utc_datetime = datetime.strptime(returnDate, '%Y-%m-%d').replace(tzinfo=timezone.utc)
    temp = homeOnlySked.loc[(homeOnlySked['awayTeam'] == team) | (homeOnlySked['homeTeam'] == team)]
    temp = temp.loc[(temp['gameDT'] >= datetime.utcnow()) & (temp['gameDate'] < tempDate)]
                     
    return len(temp)

# Set all players to 0 missed games; then manually adjust dictionary

In [12]:
summary_statsG['missedGames'] = 0

In [13]:
injuries_listG = {
    'F. Andersen': '2023-12-02',
#     'V. Husso': '2023-11-22',
    'J. Korpisalo': '2023-11-27'

}

for player, date in injuries_listG.items():
    team = summary_statsG.loc[summary_statsG['name'] == player]['team'].iloc[0]
    missedGames = missed_games(team, date)
    summary_statsG.loc[summary_statsG['name'] == player, 'missedGames'] = missedGames

# Add crease minutes to team_names to calculate share %

In [14]:
for index, row in team_names.iterrows():
    team_names.at[index, 'creaseMins'] = allG_df.loc[allG_df['team'] == row['abbreviation']]['toi'].sum()

team_names['creaseMins'] = team_names['creaseMins'].astype(int)

for index, row in summary_statsG.iterrows():
    
    creaseShare = row['toi'] / team_names.loc[team_names['abbreviation'] == row['team']]['creaseMins'].sum()
    summary_statsG.at[index, 'creaseShare'] = round(creaseShare * 100, 2)
    
    gamesRemaining = team_names.loc[team_names['abbreviation'] == row['team']]['gamesRemaining'].iloc[0] - row['missedGames']
    summary_statsG.at[index, 'gamesRemaining'] = round(gamesRemaining * creaseShare, 2)
    
    FPP60 = row['fantasyPoints'] / row['toi'] * 3600
    summary_statsG.at[index, 'FPP60'] = round(FPP60, 2)
    
    FPPG = row['fantasyPoints'] / row['gamesPlayed']
    summary_statsG.at[index, 'FPPG'] = round(FPPG, 2)
    
    FPremain = FPP60 * gamesRemaining * creaseShare
    summary_statsG.at[index, 'fantasyPointsRemain'] = round(FPremain, 2)

summary_statsG['gamesRemaining'] = summary_statsG['gamesRemaining'].astype(int)

summary_statsG.loc[summary_statsG['team'] == 'VGK']

Unnamed: 0,playerId,name,team,position,toi,gamesPlayed,saves,shots,shutout,fantasyPoints,missedGames,creaseShare,gamesRemaining,FPP60,FPPG,fantasyPointsRemain
47,8478499,A. Hill,VGK,G,47683,13,362,388,2,64.4,0,59.44,35,4.86,4.95,173.41
62,8480313,L. Thompson,VGK,G,32536,9,247,268,0,28.4,0,40.56,24,3.14,3.16,76.47


# Function for summary statistics

In [15]:
def summary_statistics(df):
    temp_build = df.groupby('playerId').agg({
        'name': 'first',
        'team': 'last',
        'position': 'first',
        'toi': 'sum',
        'gamesPlayed': 'sum',
        'goals': 'sum',
        'assists': 'sum',
        'specialTeams': 'sum',
        'shots': 'sum',
        'hits': 'sum',
        'blockedShots': 'sum',
        'powerPlayToi': 'sum',
        'shorthandedToi': 'sum',
        'plusMinus': 'sum',
        'fantasyPoints': 'sum',
        'secondaryPosition': 'first',
        'tertiaryPosition': 'first'
    }).reset_index()


    temp_build['FPP60'] = (temp_build['fantasyPoints'] / temp_build['toi'] * 3600).round(2)
    temp_build['FPPG'] = (temp_build['fantasyPoints'] / temp_build['gamesPlayed']).round(2)

    temp_build = temp_build.sort_values(by='FPPG', ascending=False)
    
    return temp_build

# Clean the skaters; add FP and show summary stats

In [16]:
all_df = all_df_raw.copy()

all_df = all_df.drop(columns=(['faceoffWinningPctg']))

columns_to_convert1 = ['goals', 'assists', 'points', 'plusMinus', 'pim', 'hits', 'blockedShots',
                      'powerPlayGoals', 'powerPlayPoints', 'shorthandedGoals', 'shPoints', 'shots',
                       'toi', 'powerPlayToi', 'shorthandedToi']
all_df[columns_to_convert1] = all_df[columns_to_convert1].apply(pd.to_numeric)


for index, row in all_df.iterrows():
    
    if row['toi'] > 0:
        all_df.at[index, 'gamesPlayed'] = 1
    
    specialTeams = row['powerPlayPoints'] + row['shPoints']
    fantasyPoints = (row['goals']*2) + row['assists'] + (specialTeams * .5) + (row['blockedShots'] * .5) + ((row['hits'] + row['shots']) * .1)
    all_df.at[index, 'fantasyPoints'] = fantasyPoints
    all_df.at[index, 'specialTeams'] = specialTeams
    
columns_to_convert4 = ['specialTeams', 'gamesPlayed']
all_df[columns_to_convert4] = all_df[columns_to_convert4].astype(int)

summary_stats = summary_statistics(all_df).sort_values('fantasyPoints', ascending=False)
all_df

Unnamed: 0,playerId,sweaterNumber,name,position,goals,assists,points,plusMinus,pim,hits,...,team,opponent,secondaryPosition,tertiaryPosition,gameDate,gameTime,gameId,gamesPlayed,fantasyPoints,specialTeams
0,8478178,43,D. Raddysh,D,0,0,0,1,0,1,...,TBL,NSH,D,D,2023-10-10,Tuesday 05:30 PM,2023020001,1,0.9,0
1,8475177,44,C. de Haan,D,0,0,0,0,0,2,...,TBL,NSH,D,D,2023-10-10,Tuesday 05:30 PM,2023020001,1,0.7,0
2,8480246,48,N. Perbix,D,0,0,0,-1,2,0,...,TBL,NSH,D,D,2023-10-10,Tuesday 05:30 PM,2023020001,1,0.6,0
3,8475167,77,V. Hedman,D,0,1,1,-1,0,0,...,TBL,NSH,D,D,2023-10-10,Tuesday 05:30 PM,2023020001,1,1.9,0
4,8478416,81,E. Cernak,D,0,0,0,0,0,6,...,TBL,NSH,D,D,2023-10-10,Tuesday 05:30 PM,2023020001,1,1.3,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20230203268477511,8477511,39,A. Mantha,R,0,0,0,1,0,1,...,WSH,SJS,W,F,2023-11-27 00:00:00,Monday 10:30 PM,2023020326,1,0.8,0
20230203268476880,8476880,43,T. Wilson,R,0,0,0,0,0,4,...,WSH,SJS,W,F,2023-11-27 00:00:00,Monday 10:30 PM,2023020326,1,1.0,0
20230203268479547,8479547,45,M. Phillips,R,0,0,0,-1,0,0,...,WSH,SJS,W,F,2023-11-27 00:00:00,Monday 10:30 PM,2023020326,1,0.3,0
20230203268479359,8479359,47,B. Malenstyn,L,0,0,0,1,0,3,...,WSH,SJS,W,F,2023-11-27 00:00:00,Monday 10:30 PM,2023020326,1,1.4,0


# Set missed games as 0 and manually adjust injury dictionary

In [17]:
summary_stats['missedGames'] = 0

In [18]:
injuries_list = {
    'T. Zegras': '2023-11-28',
    'J. Drysdale': '2023-11-28',
    'J. McBain': '2023-11-28',
    'B. Hayton': '2023-12-22',
    'J. Valimaki': '2023-11-28',
    'T. Dermott': '2023-11-30',
    'M. Dumba': '2023-11-28',
    'M. Lucic': '2023-12-17',
#     'M. Grzelcyk': '2023-11-25',
    'T. Thompson': '2023-12-02',
    'Z. Girgensons': '2023-12-02',
    'J. Greenway': '2023-11-28',
#     'Z. Benson': '2023-11-17',
    'T. Hall': '2024-04-20',
    'A. Athanasiou': '2023-11-28',
    'C. Perry': '2023-12-14',
    'A. Lehkonen': '2023-12-05',
    'S. Girard': '2024-01-16',
    'D. Severson': '2024-01-02',
    'J. Roslovic': '2023-12-14',
#     'M. Janmark': '2023-11-22',
    'D. Holloway': '2023-12-10',
#     'A. Barkov': '2023-11-22',
    'J. Mahura': '2023-11-27',
    'R. Harvey-Pinard': '2024-01-17',
    'D. Savard': '2023-12-07',
    'J. Harris': '2023-12-07',
    'K. Dach': '2024-04-20',
    'L. Schenn': '2023-11-26',
    'T. Novak': '2023-12-12',
#     'N. Hischier': '2023-11-22',
    'T. Meier': '2023-11-28',
#     'J. Hughes': '2023-11-14',
    'A. Pelech': '2023-12-19',
    'F. Chytil': '2023-11-29',
    'A. Fox': '2023-11-29',
    'R. Greig': '2023-11-27',
    'T. Chabot': '2023-12-01',
    'B. Rust': '2023-11-28',
    'R. Rakell': '2023-12-16',
    'A. Barabanov': '2023-12-05',
    'J. Rutta': '2023-11-30',
    'A. Burakovsky': '2023-12-02',
#     'L. Brown': '2023-12-06',
    'T. Liljegren': '2023-11-30',
    'J. Klingberg': '2023-12-12',
    'C. Soucy': '2023-12-28',
    'P. Suter': '2023-12-30',
    'M. Fehervary': '2023-11-27',
    'T. Oshie': '2023-11-29',
    'N. Backstrom': '2024-04-20',
#     'A. Mantha': '2023-11-18',
    'G. Vilardi': '2023-11-28',
    'R. Kupari': '2023-12-16'
    
}

for player, date in injuries_list.items():
#     print(player)
    team = summary_stats.loc[summary_stats['name'] == player]['team'].iloc[0]
    missedGames = missed_games(team, date)
    summary_stats.loc[summary_stats['name'] == player, 'missedGames'] = missedGames

# Add fantasy stats and missing games

In [19]:
for index, row in summary_stats.iterrows():
    
    gamesRemaining = team_names.loc[team_names['abbreviation'] == row['team']]['gamesRemaining']

    summary_stats.loc[index, 'gamesRemaining'] = int(gamesRemaining) - row['missedGames']

    summary_stats.loc[index, 'fantasyPointsRemain'] = row['FPPG'] * (int(gamesRemaining) - row['missedGames'])
    
summary_stats['gamesRemaining'] = summary_stats['gamesRemaining'].astype(int)


summary_stats.loc[summary_stats['team'] == 'NYR']

Unnamed: 0,playerId,name,team,position,toi,gamesPlayed,goals,assists,specialTeams,shots,...,shorthandedToi,plusMinus,fantasyPoints,secondaryPosition,tertiaryPosition,FPP60,FPPG,missedGames,gamesRemaining,fantasyPointsRemain
393,8478550,A. Panarin,NYR,L,23355,20,11,19,13,79,...,4,1,57.0,W,F,8.79,2.85,0,62,176.7
194,8476885,J. Trouba,NYR,D,27306,20,1,7,1,39,...,4012,7,53.0,D,D,6.99,2.65,0,62,164.3
81,8475184,C. Kreider,NYR,L,22554,20,13,7,11,52,...,2068,6,48.5,W,F,7.74,2.42,0,62,150.04
144,8476389,V. Trocheck,NYR,C,24046,20,5,11,6,38,...,1555,-1,39.4,C,F,5.9,1.97,0,62,122.14
161,8476459,M. Zibanejad,NYR,C,23749,20,5,10,8,53,...,2652,8,37.9,C,F,5.75,1.9,0,62,117.8
217,8476979,E. Gustafsson,NYR,D,22926,20,3,12,6,31,...,489,5,36.8,D,D,5.78,1.84,0,62,114.08
32,8474009,N. Bonino,NYR,C,14377,20,1,1,0,12,...,2487,-3,29.6,C,F,7.41,1.48,0,62,91.76
662,8482073,B. Schneider,NYR,D,19126,20,1,5,0,25,...,1701,-2,29.0,D,D,5.46,1.45,0,62,89.9
560,8480817,K. Miller,NYR,D,27138,20,2,6,1,22,...,2492,6,28.6,D,D,3.79,1.43,0,62,88.66
676,8482109,A. Lafrenière,NYR,L,20076,20,8,4,2,43,...,34,-2,27.3,W,F,4.9,1.36,0,62,84.32


# Single Frame to add bio details

In [20]:
trim_stats = summary_stats.drop(columns=['goals', 'assists', 'specialTeams', 'shots', 'hits', 'blockedShots', 'powerPlayToi',
                                        'shorthandedToi', 'plusMinus', 'secondaryPosition', 'tertiaryPosition'])

trim_statsG = summary_statsG.drop(columns=['saves', 'shots', 'shutout'])

column_order = ['playerId', 'name', 'team', 'position', 'toi', 'gamesPlayed', 'fantasyPoints', 'FPP60', 'FPPG', 'missedGames', 'gamesRemaining', 'fantasyPointsRemain']

trim_stats = trim_stats[column_order]
trim_statsG = trim_statsG[column_order]

print(trim_statsG.columns.to_list())
print(trim_stats.columns.to_list())

trimmed_stats = pd.concat([trim_stats, trim_statsG])
trimmed_stats = trimmed_stats.sort_values('fantasyPointsRemain', ascending = False)
trimmed_stats

['playerId', 'name', 'team', 'position', 'toi', 'gamesPlayed', 'fantasyPoints', 'FPP60', 'FPPG', 'missedGames', 'gamesRemaining', 'fantasyPointsRemain']
['playerId', 'name', 'team', 'position', 'toi', 'gamesPlayed', 'fantasyPoints', 'FPP60', 'FPPG', 'missedGames', 'gamesRemaining', 'fantasyPointsRemain']


Unnamed: 0,playerId,name,team,position,toi,gamesPlayed,fantasyPoints,FPP60,FPPG,missedGames,gamesRemaining,fantasyPointsRemain
156,8476453,N. Kucherov,TBL,R,26961,21,75.1,10.03,3.58,0,60,214.80
7,8475660,C. Talbot,LAK,G,49933,14,63.2,4.56,4.51,0,45,208.11
514,8480069,C. Makar,COL,D,30559,21,69.7,8.21,3.32,0,61,202.52
416,8479318,A. Matthews,TOR,C,24219,19,60.0,8.92,3.16,0,63,199.08
630,8481559,J. Hughes,NJD,C,17363,14,44.0,9.12,3.14,0,63,197.82
...,...,...,...,...,...,...,...,...,...,...,...,...
0,8470594,M. Fleury,MIN,G,32280,9,-4.6,-0.51,-0.51,0,29,-15.16
56,8479406,F. Gustavsson,MIN,G,36534,11,-7.8,-0.77,-0.71,0,33,-25.71
17,8476341,A. Forsberg,OTT,G,22557,7,-7.0,-1.12,-1.00,0,23,-26.70
10,8475789,J. Campbell,EDM,G,16007,5,-8.6,-1.93,-1.72,0,13,-26.83


# Get ESPN ownership %

In [22]:
rosters = requests.get(espn_link).json()

# pprint(rosters[9])

rostership = pd.DataFrame()

for player in rosters:
    n = player['fullName']
    try:
        percent = player['ownership']['percentOwned']
    except:
        percent = 0
    pos = player['eligibleSlots']
    dpos = player['defaultPositionId']
    if (n == 'Sebastian Aho') & (dpos == 4):
        n = 'Sebastian Aho (D)'
#     print(n)
    temp = pd.DataFrame({'fullName': [n], 'rostered': [percent], 'pos': [dpos]})
    rostership = pd.concat([rostership, temp], ignore_index=True)
    
rostership.head()

fixes = {
    'Tim Stutzle': 'Tim Stützle',
    'Alex Barre-Boulet': 'Alex Barré-Boulet',
    'Jani Hakanpaa': 'Jani Hakanpää',
    'Jesse Ylonen': 'Jesse Ylönen',
    'Alexis Lafreniere': 'Alexis Lafrenière',
    'Gustav Lindstrom': 'Gustav Lindström',
    'Alexander Kerfoot': 'Alex Kerfoot',
    'Johnny Beecher': 'John Beecher',
    'Samuel Walker': 'Sammy Walker',
    'Maxime Lajoie': 'Max Lajoie'
}

rostership['fullName'].replace(fixes, inplace=True)

pos_fixes = {
    1: 'C',
    2: 'LW',
    3: 'RW',
    4: 'D',
    5: 'G'
}

rostership['pos'].replace(pos_fixes, inplace=True)

rostership.loc[rostership['fullName'] == 'Sebastian Aho (D)']
rostership.loc[rostership['fullName'] == 'Andrei Vasilevskiy']

Unnamed: 0,fullName,rostered,pos
1361,Andrei Vasilevskiy,93.876415,G


# Read in saved player bios

In [23]:
player_bios = pd.read_csv('data/playerbios.csv', encoding='utf-8')
player_bios = player_bios.rename(columns={'id': 'playerId', 'name': 'fullName'})
player_bios

Unnamed: 0,playerId,fullName,birthDate,shootsCatches,height,weight
0,8477967,Thatcher Demko,1995-12-08,L,76,192
1,8481559,Jack Hughes,2001-05-14,L,71,175
2,8479318,Auston Matthews,1997-09-17,L,75,215
3,8476453,Nikita Kucherov,1993-06-17,L,71,182
4,8480012,Elias Pettersson,1998-11-12,L,74,176
...,...,...,...,...,...,...
816,8476883,Andrei Vasilevskiy,1994-07-25,L,76,220
817,8481593,Jayden Struble,2001-09-08,L,72,202
818,8480890,Jan Jenik,2000-09-15,L,73,185
819,8477499,Rasmus Ristolainen,1994-10-27,R,76,208


# Function for getting bios of missing players

In [24]:
def add_player_bio (missing_id_list):
    
    new_player_bios = pd.DataFrame()
    
    for player in missing_id_list:
        URL = 'https://api-web.nhle.com/v1/player/' + str(player) + '/landing'
        data = requests.get(URL).json()

        first = data['firstName']['default']
        last = data['lastName']['default']
        birthDate = data['birthDate']
        hand = data['shootsCatches']
        height = data['heightInInches']
        weight = data['weightInPounds']

        temp_df = pd.DataFrame({'playerId': [player], 
                                'fullName': first + ' ' + last, 
                                'birthDate': [birthDate], 
                                'shootsCatches': [hand],
                                'height': [height],
                                'weight': [weight]
                               })

#         print(temp_df['name'])

        new_player_bios = pd.concat([new_player_bios, temp_df])
        
    return new_player_bios

# Get missing players bios

In [25]:
player_bios_list = player_bios['playerId'].to_list()
rankings_list = trimmed_stats['playerId'].to_list()

ids_not_in_bios = set(rankings_list) - set(player_bios_list)

list(ids_not_in_bios)

[8477473]

In [26]:
missing_players = add_player_bio(list(ids_not_in_bios))
missing_players

Unnamed: 0,playerId,fullName,birthDate,shootsCatches,height,weight
0,8477473,Justin Bailey,1995-07-01,R,76,214


# Add missing bios and re-save bios file

In [27]:
updated_player_bios = pd.concat([player_bios, missing_players], axis=0)
updated_player_bios['playerId'] = updated_player_bios['playerId'].astype(int)

updated_player_bios.tail(18)

Unnamed: 0,playerId,fullName,birthDate,shootsCatches,height,weight
804,8476441,Joel Edmundson,1993-06-28,L,77,221
805,8477464,Nic Petan,1995-03-22,L,69,175
806,8480441,Trey Fix-Wolansky,1999-05-26,R,67,191
807,8480891,Michael Kesselring,2000-01-13,R,76,190
808,8482496,Nils Aman,2000-02-07,L,74,179
809,8475717,Calvin Pickard,1992-04-15,L,73,206
810,8481609,Vladislav Kolyachonok,2001-05-26,L,73,193
811,8483468,Jiri Kulich,2004-04-14,L,73,186
812,8482765,Isak Rosen,2003-03-15,L,72,173
813,8479982,Conor Timmins,1998-09-18,R,74,206


In [28]:
file_name = f"data/playerbios.csv"
updated_player_bios.to_csv(file_name, index=False)

# Get ESPN Ids

In [29]:
espn_ids = pd.read_csv('data/espn_ids.csv', encoding='utf-8')
# espn_ids = espn_ids.rename(columns={'id': 'espnId'})

for index, row in espn_ids.iterrows():
    playerId = row['id,fullName'].split(',')[0]
    fullName = row['id,fullName'].split(',')[1]
    
    espn_ids.at[index, 'espnId'] = playerId
    espn_ids.at[index, 'fullName'] = fullName

    
fixes = {
    'Tim Stutzle': 'Tim Stützle',
    'Jani Hakanpaa': 'Jani Hakanpää',
    'Benoit-Olivier Groulx': 'Bo Groulx',
    'Jesse Ylonen': 'Jesse Ylönen',
    'Alexis Lafreniere': 'Alexis Lafrenière',
    'Gustav Lindstrom': 'Gustav Lindström',
    'Alexander Kerfoot': 'Alex Kerfoot',
#     'Johnny Beecher': 'John Beecher',
    'Samuel Walker': 'Sammy Walker',
    'Alex Barre-Boulet': 'Alex Barré-Boulet'
}

inverse_fixes = {value: key for key, value in fixes.items()}

espn_ids['fullName'].replace(fixes, inplace=True)
    
espn_ids.loc[espn_ids['fullName'] == 'Sebastian Aho (D)']
espn_ids.loc[espn_ids['fullName'] == 'Andrei Vasilevskiy']

Unnamed: 0,"id,fullName",espnId,fullName
440,"2976847,Andrei Vasilevskiy",2976847,Andrei Vasilevskiy


# Add Roster Percent to bios

In [30]:
bios_on_date = player_bios.copy()

for index, row in bios_on_date.iterrows():
    try:
        roster_percent = rostership.loc[rostership['fullName'] == row['fullName']]['rostered'].iloc[0]
        default_pos = rostership.loc[rostership['fullName'] == row['fullName']]['pos'].iloc[0]
    except:
        roster_percent = -2
        default_pos = 'S'
        
    bios_on_date.at[index, 'roster_percent'] = roster_percent
    bios_on_date.at[index, 'default_pos'] = default_pos
    
bios_on_date.sort_values('roster_percent')

bios_on_date.loc[bios_on_date['fullName'] == 'Sebastian Aho (D)']
bios_on_date.loc[bios_on_date['fullName'] == 'Andrei Vasilevskiy']

Unnamed: 0,playerId,fullName,birthDate,shootsCatches,height,weight,roster_percent,default_pos
816,8476883,Andrei Vasilevskiy,1994-07-25,L,76,220,93.876415,G


# Add ESPNID to Bios

In [31]:
for index, row in bios_on_date.iterrows():
    try:
        espnId = espn_ids.loc[espn_ids['fullName'] == row['fullName']]['espnId'].iloc[0]
    
        bios_on_date.at[index, 'espnId'] = espnId
    except:
        bios_on_date.at[index, 'espnId'] = 0
    
bios_on_date['espnId'] = bios_on_date['espnId'].astype(int)

bios_on_date.loc[bios_on_date['espnId'] == 0]

Unnamed: 0,playerId,fullName,birthDate,shootsCatches,height,weight,roster_percent,default_pos,espnId
633,8479320,Max Lajoie,1997-11-05,L,73,191,0.011701,D,0
641,8482411,Hunter Shepard,1995-11-07,L,72,215,0.055656,G,0
651,8483482,Tristan Luneau,2004-01-12,R,73,195,0.032179,D,0
654,8483489,Fraser Minten,2004-07-05,L,74,192,0.059963,C,0
656,8482470,Ilya Solovyov,2000-07-20,L,75,208,0.007313,D,0
663,8483512,Matt Savoie,2004-01-01,R,69,179,0.188665,C,0
672,8482511,Mason Lohrei,2001-01-17,L,77,211,0.346628,D,0
677,8480992,Magnus Chrona,2000-08-28,L,76,194,0.016112,G,0
693,8481534,Raphael Lavoie,2000-09-25,R,76,215,0.02779,C,0
697,8481028,Martin Pospisil,1999-11-19,L,74,173,2.382624,LW,0


# Create summary snapshot file and save

In [32]:
summary_stats_snapshot = pd.merge(trimmed_stats, bios_on_date, how='left', on='playerId')
summary_stats_snapshot

Unnamed: 0,playerId,name,team,position,toi,gamesPlayed,fantasyPoints,FPP60,FPPG,missedGames,gamesRemaining,fantasyPointsRemain,fullName,birthDate,shootsCatches,height,weight,roster_percent,default_pos,espnId
0,8476453,N. Kucherov,TBL,R,26961,21,75.1,10.03,3.58,0,60,214.80,Nikita Kucherov,1993-06-17,L,71.0,182.0,99.878611,RW,2563060.0
1,8475660,C. Talbot,LAK,G,49933,14,63.2,4.56,4.51,0,45,208.11,Cam Talbot,1987-07-05,L,76.0,200.0,80.821396,G,5734.0
2,8480069,C. Makar,COL,D,30559,21,69.7,8.21,3.32,0,61,202.52,Cale Makar,1998-10-30,R,71.0,187.0,99.904926,D,4233563.0
3,8479318,A. Matthews,TOR,C,24219,19,60.0,8.92,3.16,0,63,199.08,Auston Matthews,1997-09-17,L,75.0,215.0,99.907861,C,4024123.0
4,8481559,J. Hughes,NJD,C,17363,14,44.0,9.12,3.14,0,63,197.82,Jack Hughes,2001-05-14,L,71.0,175.0,99.688483,C,4565222.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
817,8470594,M. Fleury,MIN,G,32280,9,-4.6,-0.51,-0.51,0,29,-15.16,Marc-Andre Fleury,1984-11-28,L,74.0,185.0,13.554605,G,2346.0
818,8479406,F. Gustavsson,MIN,G,36534,11,-7.8,-0.77,-0.71,0,33,-25.71,Filip Gustavsson,1998-06-07,L,74.0,184.0,64.259140,G,4272674.0
819,8476341,A. Forsberg,OTT,G,22557,7,-7.0,-1.12,-1.00,0,23,-26.70,Anton Forsberg,1992-11-27,L,75.0,195.0,1.485127,G,3036851.0
820,8475789,J. Campbell,EDM,G,16007,5,-8.6,-1.93,-1.72,0,13,-26.83,Jack Campbell,1992-01-09,L,75.0,200.0,2.541305,G,5473.0


In [33]:
fileName = f"data/summary_stats-{today.strftime('%Y-%m-%d')}.csv"
summary_stats_snapshot.to_csv(fileName, index=False)

In [34]:
inverse_fixes

{'Tim Stützle': 'Tim Stutzle',
 'Jani Hakanpää': 'Jani Hakanpaa',
 'Bo Groulx': 'Benoit-Olivier Groulx',
 'Jesse Ylönen': 'Jesse Ylonen',
 'Alexis Lafrenière': 'Alexis Lafreniere',
 'Gustav Lindström': 'Gustav Lindstrom',
 'Alex Kerfoot': 'Alexander Kerfoot',
 'Sammy Walker': 'Samuel Walker',
 'Alex Barré-Boulet': 'Alex Barre-Boulet'}

In [35]:
file_name = f"data/allG_df_fp-{today.strftime('%Y-%m-%d')}.csv"
allG_df.to_csv(file_name, index=False)

file_name = f"data/all_df_fp-{today.strftime('%Y-%m-%d')}.csv"
all_df.to_csv(file_name, index=False)

In [36]:
file_name = f"data/summary_statsG-{today.strftime('%Y-%m-%d')}.csv"
summary_statsG.to_csv(file_name, index=False)

file_name = f"data/summary_statsS-{today.strftime('%Y-%m-%d')}.csv"
summary_stats.to_csv(file_name, index=False)

In [37]:

# file_name = f"data/goaliesSummary-{today.strftime('%Y-%m-%d')}.csv"
# summary_statsG.to_csv(file_name, encoding='utf-8')

# file_name = f"data/skatersSummary-{today.strftime('%Y-%m-%d')}.csv"
# summary_stats.to_csv(file_name, encoding='utf-8')

# file_name = f"data/goaliesLog-{today.strftime('%Y-%m-%d')}.csv"
# allG_df.to_csv(file_name, encoding='utf-8')

# file_name = f"data/skatersLog-{today.strftime('%Y-%m-%d')}.csv"
# all_df.to_csv(file_name, encoding='utf-8')