In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress
from pprint import pprint
from datetime import datetime, timedelta, timezone
import pytz
import scipy.stats
from dateutil.relativedelta import relativedelta
import time


import warnings

# Suppress the specific warnings
warnings.filterwarnings("ignore")

today = datetime.utcnow()

from api_keys import espn_link

last_scrape = '2023-12-18'

# Get current standings

In [2]:
teamURL = 'https://api-web.nhle.com/v1/standings/now'

teams = requests.get(teamURL).json()
teams = teams['standings']

team_names = pd.DataFrame()

for team in teams:
    abbreviation = team['teamAbbrev']['default']
    name = team['teamName']['default']
    logoURL = team['teamLogo']
    gamesPlayed = team['gamesPlayed']
    gamesRemaining = 82 - gamesPlayed
    goalsFor = team['goalFor']
    goalsAgainst = team['goalAgainst']
    conference = team['conferenceName']
    division = team['divisionName']

    temp = pd.DataFrame({'abbreviation': [abbreviation], 'name': [name], 'gamesPlayed': [gamesPlayed],
                         'gamesRemaining': [gamesRemaining], 'goalsFor': [goalsFor], 'goalsAgainst': [goalsAgainst], 'logo': [logoURL],
                        'conference': [conference], 'division': [division]})

    team_names = pd.concat([team_names, temp], ignore_index=True)

    
teamList = team_names['abbreviation'].tolist()
team_names

team_names.to_csv('data/team_names.csv', index=False)
team_names

Unnamed: 0,abbreviation,name,gamesPlayed,gamesRemaining,goalsFor,goalsAgainst,logo,conference,division
0,VGK,Vegas Golden Knights,32,50,113,81,https://assets.nhle.com/logos/nhl/svg/VGK_ligh...,Western,Pacific
1,VAN,Vancouver Canucks,32,50,120,79,https://assets.nhle.com/logos/nhl/svg/VAN_ligh...,Western,Pacific
2,NYR,New York Rangers,29,53,96,80,https://assets.nhle.com/logos/nhl/svg/NYR_ligh...,Eastern,Metropolitan
3,BOS,Boston Bruins,29,53,94,73,https://assets.nhle.com/logos/nhl/svg/BOS_ligh...,Eastern,Atlantic
4,DAL,Dallas Stars,30,52,107,95,https://assets.nhle.com/logos/nhl/svg/DAL_ligh...,Western,Central
5,COL,Colorado Avalanche,31,51,113,96,https://assets.nhle.com/logos/nhl/svg/COL_ligh...,Western,Central
6,WPG,Winnipeg Jets,30,52,99,79,https://assets.nhle.com/logos/nhl/svg/WPG_ligh...,Western,Central
7,LAK,Los Angeles Kings,27,55,97,67,https://assets.nhle.com/logos/nhl/svg/LAK_ligh...,Western,Pacific
8,TOR,Toronto Maple Leafs,28,54,106,90,https://assets.nhle.com/logos/nhl/svg/TOR_ligh...,Eastern,Atlantic
9,FLA,Florida Panthers,31,51,91,81,https://assets.nhle.com/logos/nhl/svg/FLA_ligh...,Eastern,Atlantic


# Get schedule

This includes the winning goalie

In [3]:
baseURL = 'https://api-web.nhle.com/v1/club-schedule-season/'
season = '/20232024'

completeSked = pd.DataFrame()
homeOnlySked = pd.DataFrame()

date_format = "%d-%m-%Y"
eastern_timezone = pytz.timezone('US/Eastern')
today = datetime.utcnow()

for team in teamList:
    
    skedURL = baseURL + team + season
    sked = requests.get(skedURL).json()
    sked = sked['games']

    sked = [entry for entry in sked if entry.get('gameType') == 2]
    
    for game in sked:
        gameID = game['id']
        gameDate = game['startTimeUTC']
        awayTeam = game['awayTeam']['abbrev']
        homeTeam = game['homeTeam']['abbrev']
        
        # GET WINNING GOALIE
        
        if game['gameState'] == 'OFF':
            winningGoalie = game['winningGoalie']['playerId']
        else:
            winningGoalie = 0
        
        datetime_obj = datetime.strptime(gameDate, '%Y-%m-%dT%H:%M:%SZ')
        utc_timezone = pytz.timezone('UTC')
        utc_datetime = utc_timezone.localize(datetime_obj)
        eastern_timezone = pytz.timezone('US/Eastern')
        eastern_datetime = utc_datetime.astimezone(eastern_timezone)
        formatted_date_string = eastern_datetime.strftime('%d-%m-%Y')
        game_date = datetime.strptime(formatted_date_string, date_format)
        game_time = eastern_datetime.strftime("%A %I:%M %p")
        

        gameTemp = pd.DataFrame({'gameID': [gameID], 'gameDate': [game_date], 'gameTime': [game_time],
                                 'awayTeam': [awayTeam], 'homeTeam': [homeTeam], 'gameDT': [datetime_obj],
                                'winningGoalie': [winningGoalie]})
        completeSked = pd.concat([completeSked, gameTemp], ignore_index=True)

        if homeTeam == team:
            homeOnlySked = pd.concat([homeOnlySked, gameTemp], ignore_index=True)
    
homeOnlySked.to_csv('data/sked.csv', index=False)

completeSked.to_csv('data/sked_full.csv', index=False)

## Show remaining sked. Adjust time if needed

In [4]:
# remainSked = homeOnlySked.loc[homeOnlySked['gameDT'] >= (datetime.utcnow() - timedelta(days=0.5))]

remainSked = homeOnlySked.loc[homeOnlySked['gameDT'] >= (datetime.utcnow())]
remainSked.sort_values('gameDate')

Unnamed: 0,gameID,gameDate,gameTime,awayTeam,homeTeam,gameDT,winningGoalie
468,2023020490,2023-12-19,Tuesday 07:30 PM,EDM,NYI,2023-12-20 00:30:00,0
753,2023020493,2023-12-19,Tuesday 09:00 PM,OTT,ARI,2023-12-20 02:00:00,0
711,2023020488,2023-12-19,Tuesday 07:00 PM,STL,TBL,2023-12-20 00:00:00,0
343,2023020489,2023-12-19,Tuesday 07:00 PM,NYR,TOR,2023-12-20 00:00:00,0
1285,2023020492,2023-12-19,Tuesday 08:30 PM,COL,CHI,2023-12-20 01:30:00,0
...,...,...,...,...,...,...,...
942,2023021309,2024-04-18,Thursday 09:00 PM,SJS,CGY,2024-04-19 01:00:00,0
327,2023021312,2024-04-18,Thursday 10:30 PM,CHI,LAK,2024-04-19 02:30:00,0
40,2023021311,2024-04-18,Thursday 10:00 PM,ANA,VGK,2024-04-19 02:00:00,0
1065,2023021307,2024-04-18,Thursday 07:00 PM,SEA,MIN,2024-04-18 23:00:00,0


## Show completed sked. Adjust time if needed

In [5]:
# completedSked = homeOnlySked.loc[homeOnlySked['gameDT'] <= (datetime.utcnow() - timedelta(days=0.5))]

completedSked = homeOnlySked.loc[homeOnlySked['gameDT'] <= (datetime.utcnow())]
completedSked.sort_values('gameDate')

Unnamed: 0,gameID,gameDate,gameTime,awayTeam,homeTeam,gameDT,winningGoalie
0,2023020003,2023-10-10,Tuesday 10:30 PM,SEA,VGK,2023-10-11 02:30:00,8478499
861,2023020002,2023-10-10,Tuesday 08:00 PM,CHI,PIT,2023-10-11 00:00:00,8475852
697,2023020001,2023-10-10,Tuesday 05:30 PM,NSH,TBL,2023-10-10 21:30:00,8477992
902,2023020007,2023-10-11,Wednesday 10:00 PM,WPG,CGY,2023-10-12 02:00:00,8474593
533,2023020004,2023-10-11,Wednesday 07:00 PM,OTT,CAR,2023-10-11 23:00:00,8475883
...,...,...,...,...,...,...,...
874,2023020480,2023-12-18,Monday 07:00 PM,MIN,PIT,2023-12-19 00:00:00,8477968
631,2023020479,2023-12-18,Monday 07:00 PM,ANA,DET,2023-12-19 00:00:00,8480843
261,2023020481,2023-12-18,Monday 07:30 PM,MTL,WPG,2023-12-19 00:30:00,8474596
178,2023020482,2023-12-18,Monday 08:00 PM,SEA,DAL,2023-12-19 01:00:00,8475809


# Bring in the most recent game logs. Manually adjust date, likely

In [6]:
# file_name = f"data/allG_df_raw-{today.strftime('%Y-%m-%d')}.csv"
file_name = f"data/allG_df_raw-{last_scrape}.csv"
allG_df_raw = pd.read_csv(file_name, index_col=False)
allG_df_raw

# file_name = f"data/all_df_raw-{today.strftime('%Y-%m-%d')}.csv"
file_name = f"data/all_df_raw-{last_scrape}.csv"
all_df_raw = pd.read_csv(file_name, index_col=False)
all_df_raw

Unnamed: 0,playerId,sweaterNumber,name,position,goals,assists,points,plusMinus,pim,hits,...,toi,powerPlayToi,shorthandedToi,team,opponent,secondaryPosition,tertiaryPosition,gameDate,gameTime,gameId
0,8478178,43,D. Raddysh,D,0,0,0,1,0,1,...,1084,5,19,TBL,NSH,D,D,2023-10-10,Tuesday 05:30 PM,2023020001
1,8475177,44,C. de Haan,D,0,0,0,0,0,2,...,821,0,172,TBL,NSH,D,D,2023-10-10,Tuesday 05:30 PM,2023020001
2,8480246,48,N. Perbix,D,0,0,0,-1,2,0,...,702,0,0,TBL,NSH,D,D,2023-10-10,Tuesday 05:30 PM,2023020001
3,8475167,77,V. Hedman,D,0,1,1,-1,0,0,...,1615,386,248,TBL,NSH,D,D,2023-10-10,Tuesday 05:30 PM,2023020001
4,8478416,81,E. Cernak,D,0,0,0,0,0,6,...,1160,0,276,TBL,NSH,D,D,2023-10-10,Tuesday 05:30 PM,2023020001
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17197,8480448,27,P. Kelly,L,0,0,0,-1,0,0,...,707,5,125,OTT,VGK,W,F,2023-12-17 00:00:00,Sunday 08:00 PM,2023020478
17198,8473512,28,C. Giroux,R,0,1,1,-1,0,0,...,1234,230,248,OTT,VGK,W,F,2023-12-17 00:00:00,Sunday 08:00 PM,2023020478
17199,8481065,59,A. Crookshank,L,0,0,0,0,0,3,...,611,7,0,OTT,VGK,W,F,2023-12-17 00:00:00,Sunday 08:00 PM,2023020478
17200,8482092,71,R. Greig,C,1,0,1,-1,2,2,...,1067,238,121,OTT,VGK,C,F,2023-12-17 00:00:00,Sunday 08:00 PM,2023020478


# Show games missing from logs

In [7]:
all_games = homeOnlySked['gameID'].unique().tolist()

games_saved = all_df_raw['gameId'].unique().tolist()

games_done = completedSked['gameID'].unique().tolist()

missing_games = list(set(games_done) - set(games_saved))

missing_games

[2023020480, 2023020481, 2023020482, 2023020483, 2023020479]

# Scrape for nex game logs only

In [8]:
baseURL = 'https://api-web.nhle.com/v1/gamecenter/'
appendix = '/boxscore'

playsBaseURL = 'https://api-web.nhle.com/v1/gamecenter/'
playsAppendix = '/play-by-play'

date_format = "%d-%m-%Y"
eastern_timezone = pytz.timezone('US/Eastern')
today = datetime.utcnow()

awayLogs = {}
awayLogsG = {}
homeLogs = {}
homeLogsG = {}

for Id in missing_games:
    
    gameURL = baseURL + str(Id) + appendix
    
    print(gameURL)
    
    game = requests.get(gameURL).json()

    # pprint(game)

    numPeriods = len(game['boxscore']['linescore']['byPeriod'])
    score = game['boxscore']['linescore']['totals']
    awayTeam = game['awayTeam']
    homeTeam = game['homeTeam']
    awayTeamName = game['awayTeam']['abbrev']
    homeTeamName = game['homeTeam']['abbrev']
    awayTeamId = game['awayTeam']['id']
    homeTeamId = game['homeTeam']['id']
    gameDate = game['startTimeUTC']
    gameOutcome = game['gameOutcome']
    gameID = game['id']

    playerStatsAway = game['boxscore']['playerByGameStats']['awayTeam']
    playerStatsHome = game['boxscore']['playerByGameStats']['homeTeam']

    # pprint(playerStatsHome)

    # GET THE STARTING GOALTENDERS

    
    playsURL = playsBaseURL + str(Id) + playsAppendix
    plays = requests.get(playsURL).json()
    plays = plays['plays']

    homeStartingG = next((play['details']['goalieInNetId'] for play in plays if 
                          ((play['typeDescKey'] == 'shot-on-goal') and (play['details']['awaySOG'] == 1))), None)
    awayStartingG = next((play['details']['goalieInNetId'] for play in plays if 
                          ((play['typeDescKey'] == 'shot-on-goal') and (play['details']['homeSOG'] == 1))), None)

    homeEndingG = next((play['details']['goalieInNetId'] for play in reversed(plays) if 
                        ((play['typeDescKey'] == 'shot-on-goal') and (play['details']['eventOwnerTeamId'] == awayTeamId))), None)
    awayEndingG = next((play['details']['goalieInNetId'] for play in reversed(plays) if 
                        ((play['typeDescKey'] == 'shot-on-goal') and (play['details']['eventOwnerTeamId'] == homeTeamId))), None)

    # Get THE WINNING GOALTENDER


    winningGoalie = completedSked.loc[completedSked['gameID'] == Id]['winningGoalie'].iloc[0]


    # FIGURE OUT THE TYPE OF FINISH

    if numPeriods > 3:
        OT = 1
    else:
        OT = 0

    if numPeriods > 4:
        SO = 1
    else:
        SO = 0

    if score['home'] > score['away']:
        homeTeamWin = 1
        awayTeamWin = 0
    else:
        homeTeamWin = 0
        awayTeamWin = 1

    # FORMAT THE DATES AND TIMES

    datetime_obj = datetime.strptime(gameDate, '%Y-%m-%dT%H:%M:%SZ')
    utc_timezone = pytz.timezone('UTC')
    utc_datetime = utc_timezone.localize(datetime_obj)
    eastern_timezone = pytz.timezone('US/Eastern')
    eastern_datetime = utc_datetime.astimezone(eastern_timezone)
    formatted_date_string = eastern_datetime.strftime('%d-%m-%Y')
    game_date = datetime.strptime(formatted_date_string, date_format)
    game_time = eastern_datetime.strftime("%A %I:%M %p")

    # AWAY LOG ASSEMBLER
    
    awaySOGcheck = 0

    for defender in playerStatsAway['defense']:
        logName = str(gameID) + str(defender['playerId'])
        awayLogs[logName] = defender
        awayLogs[logName]['name'] = awayLogs[logName]['name']['default']
        awayLogs[logName]['team'] = awayTeamName
        awayLogs[logName]['opponent'] = homeTeamName
        awayLogs[logName]['secondaryPosition'] = 'D'
        awayLogs[logName]['tertiaryPosition'] = 'D'
        awayLogs[logName]['gameDate'] = game_date
        awayLogs[logName]['gameTime'] = game_time
        awayLogs[logName]['toi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(awayLogs[logName]['toi'].split(':'))))
        awayLogs[logName]['powerPlayToi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(awayLogs[logName]['powerPlayToi'].split(':'))))
        awayLogs[logName]['shorthandedToi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(awayLogs[logName]['shorthandedToi'].split(':'))))
        awayLogs[logName]['gameId'] = Id
        
        awaySOGcheck = awaySOGcheck + awayLogs[logName]['shots']

    for forward in playerStatsAway['forwards']:
        logName = str(gameID) + str(forward['playerId'])
        awayLogs[logName] = forward
        awayLogs[logName]['name'] = awayLogs[logName]['name']['default']
        awayLogs[logName]['team'] = awayTeamName
        awayLogs[logName]['opponent'] = homeTeamName

        if forward['position'] == 'C':
            awayLogs[logName]['secondaryPosition'] = 'C'
        else:
            awayLogs[logName]['secondaryPosition'] = 'W'
        awayLogs[logName]['tertiaryPosition'] = 'F'

        awayLogs[logName]['gameDate'] = game_date
        awayLogs[logName]['gameTime'] = game_time
        awayLogs[logName]['toi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(awayLogs[logName]['toi'].split(':'))))
        awayLogs[logName]['powerPlayToi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(awayLogs[logName]['powerPlayToi'].split(':'))))
        awayLogs[logName]['shorthandedToi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(awayLogs[logName]['shorthandedToi'].split(':'))))
        awayLogs[logName]['gameId'] = Id
        
        awaySOGcheck = awaySOGcheck + awayLogs[logName]['shots']
        
    awayGshots = 0
        
    for goalie in playerStatsAway['goalies']:
        if goalie['toi'] != '00:00':
            logName = str(gameID) + str(goalie['playerId'])
            awayLogsG[logName] = goalie
            awayLogsG[logName]['name'] = awayLogsG[logName]['name']['default']
            awayLogsG[logName]['team'] = awayTeamName
            awayLogsG[logName]['opponent'] = homeTeamName
            awayLogsG[logName]['gameDate'] = game_date
            awayLogsG[logName]['gameTime'] = game_time
            awayLogsG[logName]['toi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(awayLogsG[logName]['toi'].split(':'))))
            awayLogsG[logName]['gameId'] = Id
            
            # FIND THE STARTER

            if goalie['playerId'] == awayStartingG:
                awayLogsG[logName]['start'] = 1
            else:
                awayLogsG[logName]['start'] = 0

            # DETERMINE SHUTOUT

            shots = awayLogsG[logName]['saveShotsAgainst'].split('/')[1]
            saves = awayLogsG[logName]['saveShotsAgainst'].split('/')[0]
            if (shots == saves) & (awayLogsG[logName]['toi'] > 3446):
                awayLogsG[logName]['shutout'] = 1
            else:
                awayLogsG[logName]['shutout'] = 0
                
            awayGshots = awayGshots + int(shots)

            # DETERMINE WIN

            if goalie['playerId'] == winningGoalie:
                awayLogsG[logName]['win'] = 4
            elif (awayTeamWin == 0) & (OT == 1) & (awayEndingG == goalie['playerId']):
                awayLogsG[logName]['win'] = 1
            else:
                awayLogsG[logName]['win'] = 0
                
            # CHECK FOR GOALIE ABBERATION
            
            if int(shots) - int(saves) != awayLogsG[logName]['goalsAgainst']:
                print(f"Error for goalies: {awayLogsG[logName]['name']}")



    # HOME LOG ASSEMBLER
                      
    homeSOGcheck = 0

    for defender in playerStatsHome['defense']:
        logName = str(gameID) + str(defender['playerId'])
        homeLogs[logName] = defender
        homeLogs[logName]['name'] = homeLogs[logName]['name']['default']
        homeLogs[logName]['team'] = homeTeamName
        homeLogs[logName]['opponent'] = awayTeamName
        homeLogs[logName]['secondaryPosition'] = 'D'
        homeLogs[logName]['tertiaryPosition'] = 'D'
        homeLogs[logName]['gameDate'] = game_date
        homeLogs[logName]['gameTime'] = game_time
        homeLogs[logName]['toi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(homeLogs[logName]['toi'].split(':'))))
        homeLogs[logName]['powerPlayToi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(homeLogs[logName]['powerPlayToi'].split(':'))))
        homeLogs[logName]['shorthandedToi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(homeLogs[logName]['shorthandedToi'].split(':'))))
        homeLogs[logName]['gameId'] = Id
        
        homeSOGcheck = homeSOGcheck + homeLogs[logName]['shots']
                      
    for forward in playerStatsHome['forwards']:
        logName = str(gameID) + str(forward['playerId'])
        homeLogs[logName] = forward
        homeLogs[logName]['name'] = homeLogs[logName]['name']['default']
        homeLogs[logName]['team'] = homeTeamName
        homeLogs[logName]['opponent'] = awayTeamName

        if forward['position'] == 'C':
            homeLogs[logName]['secondaryPosition'] = 'C'
        else:
            homeLogs[logName]['secondaryPosition'] = 'W'
        homeLogs[logName]['tertiaryPosition'] = 'F'

        homeLogs[logName]['gameDate'] = game_date
        homeLogs[logName]['gameTime'] = game_time
        homeLogs[logName]['toi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(homeLogs[logName]['toi'].split(':'))))
        homeLogs[logName]['powerPlayToi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(homeLogs[logName]['powerPlayToi'].split(':'))))
        homeLogs[logName]['shorthandedToi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(homeLogs[logName]['shorthandedToi'].split(':'))))
        homeLogs[logName]['gameId'] = Id
        
        homeSOGcheck = homeSOGcheck + homeLogs[logName]['shots']
        
    homeGshots = 0
    
    for goalie in playerStatsHome['goalies']:
        if goalie['toi'] != '00:00':
            logName = str(gameID) + str(goalie['playerId'])
            homeLogsG[logName] = goalie
            homeLogsG[logName]['name'] = homeLogsG[logName]['name']['default']
            homeLogsG[logName]['team'] = homeTeamName
            homeLogsG[logName]['opponent'] = awayTeamName
            homeLogsG[logName]['gameDate'] = game_date
            homeLogsG[logName]['gameTime'] = game_time
            homeLogsG[logName]['toi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(homeLogsG[logName]['toi'].split(':'))))
            homeLogsG[logName]['gameId'] = Id
            
            # FIND THE STARTER

            if goalie['playerId'] == homeStartingG:
                homeLogsG[logName]['start'] = 1
            else:
                homeLogsG[logName]['start'] = 0

            # DETERMINE SHUTOUT

            shots = homeLogsG[logName]['saveShotsAgainst'].split('/')[1]
            saves = homeLogsG[logName]['saveShotsAgainst'].split('/')[0]
            if (shots == saves) & (homeLogsG[logName]['toi'] > 3446):
                homeLogsG[logName]['shutout'] = 1
            else:
                homeLogsG[logName]['shutout'] = 0
                
            homeGshots = homeGshots + int(shots)

            # DETERMINE WIN

            if goalie['playerId'] == winningGoalie:
                homeLogsG[logName]['win'] = 4
            elif (homeTeamWin == 0) & (OT == 1) & (homeEndingG == goalie['playerId']):
                homeLogsG[logName]['win'] = 1
            else:
                homeLogsG[logName]['win'] = 0
                
            # CHECK FOR GOALIE ABBERATION
            
            if int(shots) - int(saves) != homeLogsG[logName]['goalsAgainst']:
                print(f"Error for goalies: {homeLogsG[logName]['name']} vs {homeGshots}")
        

https://api-web.nhle.com/v1/gamecenter/2023020480/boxscore
https://api-web.nhle.com/v1/gamecenter/2023020481/boxscore
https://api-web.nhle.com/v1/gamecenter/2023020482/boxscore
https://api-web.nhle.com/v1/gamecenter/2023020483/boxscore
https://api-web.nhle.com/v1/gamecenter/2023020479/boxscore


# Assemble and append new game logs

In [9]:
homeG_df = pd.DataFrame(homeLogsG)
homeG_df = homeG_df.transpose()

awayG_df = pd.DataFrame(awayLogsG)
awayG_df = awayG_df.transpose()

allG_df_raw = pd.concat([allG_df_raw, homeG_df, awayG_df], axis=0)

file_name = f"data/allG_df_raw-{today.strftime('%Y-%m-%d')}.csv"
allG_df_raw.to_csv(file_name, index=False)

home_df = pd.DataFrame(homeLogs)
home_df = home_df.transpose()

away_df = pd.DataFrame(awayLogs)
away_df = away_df.transpose()

all_df_raw = pd.concat([all_df_raw, home_df, away_df], axis=0)

file_name = f"data/all_df_raw-{today.strftime('%Y-%m-%d')}.csv"
all_df_raw.to_csv(file_name, index=False)

# Clean and add FP to goalies; show summary stats

In [10]:
allG_df = allG_df_raw.copy()

allG_df[['saves', 'shots']] = allG_df['saveShotsAgainst'].str.split('/', expand=True).apply(pd.to_numeric)
allG_df[['evSaves', 'evShots']] = allG_df['evenStrengthShotsAgainst'].str.split('/', expand=True).apply(pd.to_numeric)
allG_df[['ppSaves', 'ppShots']] = allG_df['powerPlayShotsAgainst'].str.split('/', expand=True).apply(pd.to_numeric)

allG_df = allG_df.drop(columns=(['saveShotsAgainst', 'savePctg', 'evenStrengthShotsAgainst',
                                   'powerPlayShotsAgainst', 'shorthandedShotsAgainst', 'evenStrengthGoalsAgainst',
                                  'powerPlayGoalsAgainst', 'shorthandedGoalsAgainst']))

columns_to_convert1 = ['toi', 'start', 'shutout', 'win', 'pim']
allG_df[columns_to_convert1] = allG_df[columns_to_convert1].apply(pd.to_numeric)

for index, row in allG_df.iterrows():
    decisionPoints = 0
    shutoutPoints = 0
    decisionType = 0

    # ADD A GAMES PLAYED TALLY
    
    if row['toi'] > 0:
        allG_df.at[index, 'gamesPlayed'] = 1

    # CALCULATE SHUTOUT POINTS

    if (row['shutout'] == 1):
        shutoutPoints = 3
    else:
        shutoutPoints = 0
        
    # CALCULATE DECISION POINTS
    
    if row['win'] == 4:
        decisionType = 'W'
    elif row['win'] == 1:
        decisionType = 'OTL'
    else:
        decisionType = 'L'
    
    decisionPoints = row['win']

    goalsAgainstPoints = (row['goalsAgainst']) * -2
    
    savesPoints = (row['shots'] - row['goalsAgainst']) * 0.2
    
    # TOTAL FANTASY POINTS
    
    fantasyPoints = decisionPoints + shutoutPoints + goalsAgainstPoints + savesPoints
    
    # ADD TO DATAFRAME
    
    allG_df.at[index, 'fantasyPoints'] = fantasyPoints
    allG_df.at[index, 'decisionType'] = decisionType
    
columns_to_convert3 = ['gamesPlayed']
allG_df[columns_to_convert3] = allG_df[columns_to_convert3].astype(int)

summary_statsG = allG_df.groupby('playerId').agg({
    'name': 'first',
    'team': 'last',
    'position': 'first',
    'toi': 'sum',
    'gamesPlayed': 'sum',
    'saves': 'sum',
    'shots': 'sum',
#     'evenSaves': 'sum',
    'shutout': 'sum',
    'fantasyPoints': 'sum'
}).reset_index()

summary_statsG.sort_values('fantasyPoints', ascending=False).head(25)

Unnamed: 0,playerId,name,team,position,toi,gamesPlayed,saves,shots,shutout,fantasyPoints
35,8477967,T. Demko,VAN,G,81696,23,621,674,3,91.2
8,8475660,C. Talbot,LAK,G,71457,20,501,541,2,80.2
25,8476945,C. Hellebuyck,WPG,G,82985,23,610,666,1,75.0
51,8478971,C. Ingram,ARI,G,64166,19,518,561,3,74.6
64,8480280,J. Swayman,BOS,G,52478,15,422,452,2,70.4
48,8478499,A. Hill,VGK,G,50468,15,380,407,2,70.0
9,8475683,S. Bobrovsky,FLA,G,80836,23,572,628,2,65.4
30,8477424,J. Saros,NSH,G,80939,24,629,690,1,62.8
1,8471734,J. Quick,NYR,G,37948,11,277,299,2,54.4
31,8477465,T. Jarry,PIT,G,73102,22,554,607,3,51.8


# Function to caluclate missed games from injury based on sked

In [11]:
def missed_games (team, returnDate):
    tempDate = datetime.strptime(returnDate, '%Y-%m-%d')
#     utc_datetime = datetime.strptime(returnDate, '%Y-%m-%d').replace(tzinfo=timezone.utc)
    temp = homeOnlySked.loc[(homeOnlySked['awayTeam'] == team) | (homeOnlySked['homeTeam'] == team)]
    temp = temp.loc[(temp['gameDT'] >= datetime.utcnow()) & (temp['gameDate'] < tempDate)]
                     
    return len(temp)

# Set all players to 0 missed games; then manually adjust dictionary

In [12]:
summary_statsG['missedGames'] = 0

In [13]:
injuries_listG = {
    'F. Andersen': '2024-01-16',
    'J. Oettinger': '2023-12-27',
    'P. Copley': '2024-01-09',
    'J. Gibson': '2023-12-21',
#     'V. Husso': '2023-11-22',
#     'J. Korpisalo': '2023-11-27',
#     'J. Markstrom': '2023-12-14',
#     'E. Merzlikins': '2023-12-14',
    'P. Grubauer': '2023-12-23',
    'A. Hill': '2023-12-23',
    'J. Woll': '2024-01-09'
#     'I. Samsonov': '2023-12-07'

}

for player, date in injuries_listG.items():
    team = summary_statsG.loc[summary_statsG['name'] == player]['team'].iloc[0]
    missedGames = missed_games(team, date)
    summary_statsG.loc[summary_statsG['name'] == player, 'missedGames'] = missedGames

# Add crease minutes to team_names to calculate share %

In [14]:
for index, row in team_names.iterrows():
    team_names.at[index, 'creaseMins'] = allG_df.loc[allG_df['team'] == row['abbreviation']]['toi'].sum()

team_names['creaseMins'] = team_names['creaseMins'].astype(int)

for index, row in summary_statsG.iterrows():
    
    creaseShare = row['toi'] / team_names.loc[team_names['abbreviation'] == row['team']]['creaseMins'].sum()
    summary_statsG.at[index, 'creaseShare'] = round(creaseShare * 100, 2)
    
    gamesRemaining = team_names.loc[team_names['abbreviation'] == row['team']]['gamesRemaining'].iloc[0] - row['missedGames']
    summary_statsG.at[index, 'gamesRemaining'] = round(gamesRemaining * creaseShare, 2)
    
    FPP60 = row['fantasyPoints'] / row['toi'] * 3600
    summary_statsG.at[index, 'FPP60'] = round(FPP60, 2)
    
    FPPG = row['fantasyPoints'] / row['gamesPlayed']
    summary_statsG.at[index, 'FPPG'] = round(FPPG, 2)
    
    FPremain = FPP60 * gamesRemaining * creaseShare
    summary_statsG.at[index, 'fantasyPointsRemain'] = round(FPremain, 2)

summary_statsG['gamesRemaining'] = summary_statsG['gamesRemaining'].astype(int)

summary_statsG.loc[summary_statsG['team'] == 'VGK']

Unnamed: 0,playerId,name,team,position,toi,gamesPlayed,saves,shots,shutout,fantasyPoints,missedGames,creaseShare,gamesRemaining,FPP60,FPPG,fantasyPointsRemain
48,8478499,A. Hill,VGK,G,50468,15,380,407,2,70.0,2,43.23,20,4.99,4.67,103.61
63,8480238,J. Patera,VGK,G,3900,1,34,38,0,2.8,0,3.34,1,2.58,2.8,4.32
65,8480313,L. Thompson,VGK,G,62382,18,442,485,0,45.4,0,53.43,26,2.62,2.52,70.0


In [15]:
# Fix errors

# Georgiev versus Coyotes
# Hill versus Pittsburgh
# deSmith versus ... someone Flordia?

summary_statsG.loc[summary_statsG['team'] == 'COL']

Unnamed: 0,playerId,name,team,position,toi,gamesPlayed,saves,shots,shutout,fantasyPoints,missedGames,creaseShare,gamesRemaining,FPP60,FPPG,fantasyPointsRemain
66,8480382,A. Georgiev,COL,G,83910,24,601,669,1,46.0,0,75.03,38,1.97,1.92,75.52
71,8481031,I. Prosvetov,COL,G,27921,10,213,235,0,15.6,0,24.97,12,2.01,1.56,25.61


# Function for summary statistics

In [16]:
def summary_statistics(df):
    temp_build = df.groupby('playerId').agg({
        'name': 'first',
        'team': 'last',
        'position': 'first',
        'toi': 'sum',
        'gamesPlayed': 'sum',
        'goals': 'sum',
        'assists': 'sum',
        'specialTeams': 'sum',
        'shots': 'sum',
        'hits': 'sum',
        'blockedShots': 'sum',
        'powerPlayToi': 'sum',
        'shorthandedToi': 'sum',
        'plusMinus': 'sum',
        'fantasyPoints': 'sum',
        'secondaryPosition': 'first',
        'tertiaryPosition': 'first'
    }).reset_index()


    temp_build['FPP60'] = (temp_build['fantasyPoints'] / temp_build['toi'] * 3600).round(2)
    temp_build['FPPG'] = (temp_build['fantasyPoints'] / temp_build['gamesPlayed']).round(2)

    temp_build = temp_build.sort_values(by='FPPG', ascending=False)
    
    return temp_build

# Clean the skaters; add FP and show summary stats

In [17]:
all_df = all_df_raw.copy()

all_df = all_df.drop(columns=(['faceoffWinningPctg']))

columns_to_convert1 = ['goals', 'assists', 'points', 'plusMinus', 'pim', 'hits', 'blockedShots',
                      'powerPlayGoals', 'powerPlayPoints', 'shorthandedGoals', 'shPoints', 'shots',
                       'toi', 'powerPlayToi', 'shorthandedToi']
all_df[columns_to_convert1] = all_df[columns_to_convert1].apply(pd.to_numeric)


for index, row in all_df.iterrows():
    
    if row['toi'] > 0:
        all_df.at[index, 'gamesPlayed'] = 1
    
    specialTeams = row['powerPlayPoints'] + row['shPoints']
    fantasyPoints = (row['goals']*2) + row['assists'] + (specialTeams * .5) + (row['blockedShots'] * .5) + ((row['hits'] + row['shots']) * .1)
    all_df.at[index, 'fantasyPoints'] = fantasyPoints
    all_df.at[index, 'specialTeams'] = specialTeams
    
columns_to_convert4 = ['specialTeams', 'gamesPlayed']
all_df[columns_to_convert4] = all_df[columns_to_convert4].astype(int)

summary_stats = summary_statistics(all_df).sort_values('fantasyPoints', ascending=False)
all_df

Unnamed: 0,playerId,sweaterNumber,name,position,goals,assists,points,plusMinus,pim,hits,...,team,opponent,secondaryPosition,tertiaryPosition,gameDate,gameTime,gameId,gamesPlayed,fantasyPoints,specialTeams
0,8478178,43,D. Raddysh,D,0,0,0,1,0,1,...,TBL,NSH,D,D,2023-10-10,Tuesday 05:30 PM,2023020001,1,0.9,0
1,8475177,44,C. de Haan,D,0,0,0,0,0,2,...,TBL,NSH,D,D,2023-10-10,Tuesday 05:30 PM,2023020001,1,0.7,0
2,8480246,48,N. Perbix,D,0,0,0,-1,2,0,...,TBL,NSH,D,D,2023-10-10,Tuesday 05:30 PM,2023020001,1,0.6,0
3,8475167,77,V. Hedman,D,0,1,1,-1,0,0,...,TBL,NSH,D,D,2023-10-10,Tuesday 05:30 PM,2023020001,1,1.9,0
4,8478416,81,E. Cernak,D,0,0,0,0,0,6,...,TBL,NSH,D,D,2023-10-10,Tuesday 05:30 PM,2023020001,1,1.3,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20230204798475842,8475842,39,S. Carrick,C,0,0,0,0,0,1,...,ANA,DET,C,F,2023-12-18 00:00:00,Monday 07:00 PM,2023020479,1,1.2,0
20230204798477527,8477527,44,R. Johnston,L,0,0,0,0,2,1,...,ANA,DET,W,F,2023-12-18 00:00:00,Monday 07:00 PM,2023020479,1,0.6,0
20230204798479368,8479368,49,M. Jones,L,0,0,0,0,0,0,...,ANA,DET,W,F,2023-12-18 00:00:00,Monday 07:00 PM,2023020479,1,0.0,0
20230204798478366,8478366,77,F. Vatrano,R,0,0,0,0,2,1,...,ANA,DET,W,F,2023-12-18 00:00:00,Monday 07:00 PM,2023020479,1,0.8,0


# Set missed games as 0 and manually adjust injury dictionary

In [18]:
summary_stats['missedGames'] = 0

In [19]:
injuries_list = {
    
#     ANAHEIM
    'T. Zegras': '2023-12-24',
    'J. Drysdale': '2023-12-24',
    'M. McTavish': '2023-12-19',   
    
#     ARIZONA
    'T. Boyd': '2024-04-20',
    'S. Durzi': '2023-12-21',
#     'J. McBain': '2023-12-15',
    'B. Hayton': '2023-12-22',
#     'J. Valimaki': '2023-11-28',
#     'T. Dermott': '2023-12-04',
#     'M. Dumba': '2023-11-28',   
    
#     BOSTON
    'M. Lucic': '2024-01-16',
    'D. Forbort': '2024-01-02',
    'C. McAvoy': '2023-12-19',
    'P. Zacha': '2023-12-19',
#     'M. Grzelcyk': '2023-11-25',  
    
#     BUFFALO
#     'T. Thompson': '2023-12-05',
    'Z. Girgensons': '2023-12-21',
    'J. Skinner': '2023-12-21',
    'J. Greenway': '2023-12-19',
#     'A. Tuch': '2023-12-15',
#     'Z. Benson': '2023-11-17',  
    
#     CAROLINA
    'A. Svechnikov': '2023-12-19',   
    
#     CHICAGO
    'T. Hall': '2024-04-20',
    'A. Athanasiou': '2023-12-24',
    'K. Korchinski': '2023-12-19',
    'A. Vlasic': '2023-12-19',
    'S. Jones': '2023-12-19',
    'C. Perry': '2024-04-20',   
    
#     COLORADO
    'A. Lehkonen': '2024-01-02',
    'S. Girard': '2024-01-31',   
    'C. Makar': '2023-12-19',
    
#     COLUMBUS
    'B. Jenner': '2024-01-19',
    'A. Boqvist': '2024-01-04',
    'D. Severson': '2024-01-02',
    'J. Roslovic': '2023-12-21',
    'P. Laine': '2024-01-27',
    
#     DALLAS
    
#     DETROIT
#     'D. Larkin': '2023-12-18',
#     'J. Compher': '2023-12-14',
    'R. Fabbri': '2023-12-23',
    
#     EDMONTON
#     'M. Janmark': '2023-11-22',
    'D. Holloway': '2023-12-28',
    
#     FLORIDA
#     'A. Barkov': '2023-11-22',
#     'J. Mahura': '2023-11-27',
    'A. Ekblad': '2023-12-21',
    'A. Lundell': '2023-12-21',
    
#     LOS ANGELES
    'V. Gavrikov': '2023-12-20',
    
#     MINNESOTA
    'J. Brodin': '2024-01-02',
    
#     MONTREAL
    'T. Pearson': '2024-01-10',
    'R. Harvey-Pinard': '2024-01-17',
    'A. Newhook': '2024-02-17',
#     'D. Savard': '2023-12-07',
    'J. Harris': '2023-12-28',
#     'A. Xhekaj': '2023-12-07',
    'K. Dach': '2024-04-20',
#     'C. Wideman': '2023-12-23',
    
#     NASHVILLE
#     'L. Schenn': '2023-11-26',
#     'T. Novak': '2023-12-12',
#     'A. Carrier': '2023-12-16',
    'K. Sherwood': '2023-12-21',
    
#     NEW JERSEY
#     'N. Hischier': '2023-11-22',
#     'T. Meier': '2023-11-28',
    'D. Hamilton': '2024-02-15',
    'T. Nosek': '2023-12-30',
#     'J. Hughes': '2023-11-14',
    
#     NY ISLANDERS
    'A. Pelech': '2023-12-27',
    'R. Pulock': '2023-12-27',
    'S. Mayfield': '2023-12-19',
    
#     NY RANGERS
    'F. Chytil': '2023-10-19',
#     'A. Fox': '2023-11-29',
    'K. Kakko': '2023-12-22',
    
#     OTTAWA
#     'R. Greig': '2023-12-05',
    'T. Chabot': '2024-01-04',
    'M. Joseph': '2023-12-23',
    
#     PHILADELPHIA
    'N. Cates': '2024-01-20',
    'T. Sanheim': '2023-12-21',
    
#     PITTSBURGH
    'B. Rust': '2024-01-02',
#     'R. Rakell': '2023-12-21',
#     'N. Acciari': '2023-12-18',
    'M. Nieto': '2023-12-27',
    
#     SAN JOSE
#     'A. Barabanov': '2023-12-05',
#     'L. Kunin': '2023-12-12',
#     'W. Eklund': '2023-12-12',
#     'J. Rutta': '2023-12-05',
    'A. Duclair': '2023-12-19',
    'N. Sturm': '2023-12-21',
    
#     SEATTLE
    'A. Burakovsky': '2023-12-27',
    'J. Schwartz': '2024-01-11',
    'J. Schultz': '2023-12-21',
#     'J. Eberle': '2023-12-21',
    
#     ST. LOUIS
    
#     TAMPA BAY
#     'L. Brown': '2023-12-06',
    
#     TORONTO
#     'T. Liljegren': '2023-12-12',
    'M. Giordano': '2023-12-19',
    'J. Klingberg': '2024-04-20',
    'R. Reaves': '2024-01-09',
    
#     VANCOUVER
    'C. Soucy': '2023-12-28',
#     'P. Suter': '2023-12-30',
#     'G. Brisebois': '2023-12-23',
    
#     VEGAS
    'S. Theodore': '2023-12-21',
#     'A. Martinez': '2023-12-23',
    'K. Korczak': '2023-12-27',
    
#     WASHINGTON
#     'M. Fehervary': '2023-11-27',
#     'T. Oshie': '2023-12-07',
    'N. Backstrom': '2024-04-20',
    'S. Milano': '2023-12-20',
#     'A. Mantha': '2023-11-18',
    
#     WINNIPEG
#     'G. Vilardi': '2023-11-28',
    'K. Connor': '2024-02-06',
    'R. Kupari': '2023-12-27'
    
}

for player, date in injuries_list.items():
    print(player)
    team = summary_stats.loc[summary_stats['name'] == player]['team'].iloc[0]
    missedGames = missed_games(team, date)
    summary_stats.loc[summary_stats['name'] == player, 'missedGames'] = missedGames

T. Zegras
J. Drysdale
M. McTavish
T. Boyd
S. Durzi
B. Hayton
M. Lucic
D. Forbort
C. McAvoy
P. Zacha
Z. Girgensons
J. Skinner
J. Greenway
A. Svechnikov
T. Hall
A. Athanasiou
K. Korchinski
A. Vlasic
S. Jones
C. Perry
A. Lehkonen
S. Girard
C. Makar
B. Jenner
A. Boqvist
D. Severson
J. Roslovic
P. Laine
R. Fabbri
D. Holloway
A. Ekblad
A. Lundell
V. Gavrikov
J. Brodin
T. Pearson
R. Harvey-Pinard
A. Newhook
J. Harris
K. Dach
K. Sherwood
D. Hamilton
T. Nosek
A. Pelech
R. Pulock
S. Mayfield
F. Chytil
K. Kakko
T. Chabot
M. Joseph
N. Cates
T. Sanheim
B. Rust
M. Nieto
A. Duclair
N. Sturm
A. Burakovsky
J. Schwartz
J. Schultz
M. Giordano
J. Klingberg
R. Reaves
C. Soucy
S. Theodore
K. Korczak
N. Backstrom
S. Milano
K. Connor
R. Kupari


# Add fantasy stats and missing games

In [20]:
for index, row in summary_stats.iterrows():
    
    gamesRemaining = team_names.loc[team_names['abbreviation'] == row['team']]['gamesRemaining']

    summary_stats.loc[index, 'gamesRemaining'] = int(gamesRemaining) - row['missedGames']

    summary_stats.loc[index, 'fantasyPointsRemain'] = row['FPPG'] * (int(gamesRemaining) - row['missedGames'])
    
summary_stats['gamesRemaining'] = summary_stats['gamesRemaining'].astype(int)


summary_stats.loc[summary_stats['team'] == 'DET']

Unnamed: 0,playerId,name,team,position,toi,gamesPlayed,goals,assists,specialTeams,shots,...,shorthandedToi,plusMinus,fantasyPoints,secondaryPosition,tertiaryPosition,FPP60,FPPG,missedGames,gamesRemaining,fantasyPointsRemain
642,8481542,M. Seider,DET,D,40906,31,5,13,11,62,...,5598,-1,76.7,D,D,6.75,2.47,0,51,125.97
326,8478013,J. Walman,DET,D,34448,29,6,7,5,52,...,4687,5,68.3,D,D,7.14,2.36,0,51,120.36
432,8479337,A. DeBrincat,DET,R,33979,31,15,13,11,101,...,83,-3,64.6,W,F,6.84,2.08,0,51,106.08
304,8477946,D. Larkin,DET,C,29203,25,11,15,15,87,...,2454,3,59.0,C,F,7.27,2.36,0,51,120.36
206,8476906,S. Gostisbehere,DET,D,35112,30,5,18,13,51,...,1492,-6,52.7,D,D,5.4,1.76,0,51,89.76
96,8475279,B. Chiarot,DET,D,35859,31,2,7,0,46,...,4077,8,52.1,D,D,5.23,1.68,0,51,85.68
688,8482078,L. Raymond,DET,L,33000,31,10,14,6,68,...,223,-2,49.9,W,F,5.44,1.61,0,51,82.11
497,8479992,M. Rasmussen,DET,C,28707,31,7,7,3,48,...,3988,9,47.5,C,F,5.96,1.53,0,51,78.03
26,8473507,J. Petry,DET,D,26793,24,1,8,0,32,...,2466,3,40.1,D,D,5.39,1.67,0,51,85.17
258,8477456,J. Compher,DET,L,29809,26,6,13,5,32,...,4524,5,40.0,W,F,4.83,1.54,0,51,78.54


# Single Frame to add bio details

In [21]:
trim_stats = summary_stats.drop(columns=['goals', 'assists', 'specialTeams', 'shots', 'hits', 'blockedShots', 'powerPlayToi',
                                        'shorthandedToi', 'plusMinus', 'secondaryPosition', 'tertiaryPosition'])

trim_statsG = summary_statsG.drop(columns=['saves', 'shots', 'shutout'])

column_order = ['playerId', 'name', 'team', 'position', 'toi', 'gamesPlayed', 'fantasyPoints', 'FPP60', 'FPPG', 'missedGames', 'gamesRemaining', 'fantasyPointsRemain']

trim_stats = trim_stats[column_order]
trim_statsG = trim_statsG[column_order]

print(trim_statsG.columns.to_list())
print(trim_stats.columns.to_list())

trimmed_stats = pd.concat([trim_stats, trim_statsG])
trimmed_stats = trimmed_stats.sort_values('fantasyPointsRemain', ascending = False)
trimmed_stats

['playerId', 'name', 'team', 'position', 'toi', 'gamesPlayed', 'fantasyPoints', 'FPP60', 'FPPG', 'missedGames', 'gamesRemaining', 'fantasyPointsRemain']
['playerId', 'name', 'team', 'position', 'toi', 'gamesPlayed', 'fantasyPoints', 'FPP60', 'FPPG', 'missedGames', 'gamesRemaining', 'fantasyPointsRemain']


Unnamed: 0,playerId,name,team,position,toi,gamesPlayed,fantasyPoints,FPP60,FPPG,missedGames,gamesRemaining,fantasyPointsRemain
424,8479318,A. Matthews,TOR,C,35008,27,95.9,9.86,3.55,0,54,191.70
159,8476453,N. Kucherov,TBL,R,40470,31,106.1,9.44,3.42,0,50,171.00
649,8481559,J. Hughes,NJD,C,29942,24,76.5,9.20,3.19,0,53,169.07
356,8478402,C. McDavid,EDM,C,33567,26,80.0,8.58,3.08,0,54,166.32
266,8477492,N. MacKinnon,COL,C,42087,31,99.4,8.50,3.21,0,51,163.71
...,...,...,...,...,...,...,...,...,...,...,...,...
70,8480992,M. Chrona,SJS,G,1830,1,-5.4,-10.62,-5.40,0,0,-8.90
28,8477293,A. Raanta,CAR,G,44928,14,-9.2,-0.74,-0.66,0,20,-15.11
11,8475789,J. Campbell,EDM,G,16007,5,-8.6,-1.93,-1.72,0,8,-16.67
32,8477480,E. Comrie,BUF,G,20642,7,-13.2,-2.30,-1.89,0,9,-20.75


# Get ESPN ownership %

In [22]:
rosters = requests.get(espn_link).json()

# pprint(rosters[9])

rostership = pd.DataFrame()

for player in rosters:
    n = player['fullName']
    try:
        percent = player['ownership']['percentOwned']
    except:
        percent = 0
    pos = player['eligibleSlots']
    dpos = player['defaultPositionId']
    if (n == 'Sebastian Aho') & (dpos == 4):
        n = 'Sebastian Aho (D)'
#     print(n)
    temp = pd.DataFrame({'fullName': [n], 'rostered': [percent], 'pos': [dpos]})
    rostership = pd.concat([rostership, temp], ignore_index=True)
    
rostership.head()

fixes = {
    'Tim Stutzle': 'Tim Stützle',
    'Alex Barre-Boulet': 'Alex Barré-Boulet',
    'Jani Hakanpaa': 'Jani Hakanpää',
    'Jesse Ylonen': 'Jesse Ylönen',
    'Alexis Lafreniere': 'Alexis Lafrenière',
    'Gustav Lindstrom': 'Gustav Lindström',
    'Alexander Kerfoot': 'Alex Kerfoot',
    'Johnny Beecher': 'John Beecher',
    'Samuel Walker': 'Sammy Walker',
    'Maxime Lajoie': 'Max Lajoie'
}

rostership['fullName'].replace(fixes, inplace=True)

pos_fixes = {
    1: 'C',
    2: 'LW',
    3: 'RW',
    4: 'D',
    5: 'G'
}

rostership['pos'].replace(pos_fixes, inplace=True)

rostership.loc[rostership['fullName'] == 'Sebastian Aho (D)']
rostership.loc[rostership['fullName'] == 'Andrei Vasilevskiy']

Unnamed: 0,fullName,rostered,pos
1365,Andrei Vasilevskiy,94.346229,G


# Read in saved player bios

In [23]:
player_bios = pd.read_csv('data/playerbios.csv', encoding='utf-8')
player_bios = player_bios.rename(columns={'id': 'playerId', 'name': 'fullName'})
player_bios

Unnamed: 0,playerId,fullName,birthDate,shootsCatches,height,weight
0,8477967,Thatcher Demko,1995-12-08,L,76,192
1,8481559,Jack Hughes,2001-05-14,L,71,175
2,8479318,Auston Matthews,1997-09-17,L,75,215
3,8476453,Nikita Kucherov,1993-06-17,L,71,182
4,8480012,Elias Pettersson,1998-11-12,L,74,176
...,...,...,...,...,...,...
853,8476907,Mackenzie MacEachern,1994-03-09,L,74,193
854,8482070,Mitchell Chaffee,1998-01-26,R,73,201
855,8479421,Jacob Moverare,1998-08-31,L,75,210
856,8481726,Adam Edstrom,2000-10-12,L,79,234


# Function for getting bios of missing players

In [24]:
def add_player_bio (missing_id_list):
    
    new_player_bios = pd.DataFrame()
    
    for player in missing_id_list:
        URL = 'https://api-web.nhle.com/v1/player/' + str(player) + '/landing'
        data = requests.get(URL).json()

        first = data['firstName']['default']
        last = data['lastName']['default']
        birthDate = data['birthDate']
        hand = data['shootsCatches']
        height = data['heightInInches']
        weight = data['weightInPounds']

        temp_df = pd.DataFrame({'playerId': [player], 
                                'fullName': first + ' ' + last, 
                                'birthDate': [birthDate], 
                                'shootsCatches': [hand],
                                'height': [height],
                                'weight': [weight]
                               })

#         print(temp_df['name'])

        new_player_bios = pd.concat([new_player_bios, temp_df])
        
    return new_player_bios

# Get missing players bios

In [25]:
player_bios_list = player_bios['playerId'].to_list()
rankings_list = trimmed_stats['playerId'].to_list()

ids_not_in_bios = set(rankings_list) - set(player_bios_list)

list(ids_not_in_bios)

[]

In [26]:
missing_players = add_player_bio(list(ids_not_in_bios))
missing_players

# Add missing bios and re-save bios file

In [27]:
updated_player_bios = pd.concat([player_bios, missing_players], axis=0)
updated_player_bios['playerId'] = updated_player_bios['playerId'].astype(int)

updated_player_bios.tail(18)

Unnamed: 0,playerId,fullName,birthDate,shootsCatches,height,weight
840,8483401,Marc Johnstone,1996-06-19,R,72,181
841,8483565,Nick Blankenburg,1998-05-12,R,69,177
842,8481070,Hugh McGing,1998-07-11,L,68,176
843,8474062,Riley Nash,1989-05-09,R,73,187
844,8480238,Jiri Patera,1999-02-24,L,75,212
845,8483570,Ben Meyers,1998-11-15,L,71,194
846,8480245,Olle Lycksell,1999-08-24,L,70,163
847,8484314,Jiri Smejkal,1996-11-05,L,76,209
848,8479419,Brett Murray,1998-07-20,L,77,228
849,8474141,Patrick Kane,1988-11-19,L,70,177


In [28]:
file_name = f"data/playerbios.csv"
updated_player_bios.to_csv(file_name, index=False)

# Get ESPN Ids

In [29]:
espn_ids = pd.read_csv('data/espn_ids.csv', encoding='utf-8')
# espn_ids = espn_ids.rename(columns={'id': 'espnId'})

for index, row in espn_ids.iterrows():
    playerId = row['id,fullName'].split(',')[0]
    fullName = row['id,fullName'].split(',')[1]
    
    espn_ids.at[index, 'espnId'] = playerId
    espn_ids.at[index, 'fullName'] = fullName

    
fixes = {
    'Tim Stutzle': 'Tim Stützle',
    'Jani Hakanpaa': 'Jani Hakanpää',
    'Benoit-Olivier Groulx': 'Bo Groulx',
    'Jesse Ylonen': 'Jesse Ylönen',
    'Alexis Lafreniere': 'Alexis Lafrenière',
    'Gustav Lindstrom': 'Gustav Lindström',
    'Alexander Kerfoot': 'Alex Kerfoot',
#     'Johnny Beecher': 'John Beecher',
    'Samuel Walker': 'Sammy Walker',
    'Alex Barre-Boulet': 'Alex Barré-Boulet'
}

inverse_fixes = {value: key for key, value in fixes.items()}

espn_ids['fullName'].replace(fixes, inplace=True)
    
espn_ids.loc[espn_ids['fullName'] == 'Sebastian Aho (D)']
espn_ids.loc[espn_ids['fullName'] == 'Andrei Vasilevskiy']

Unnamed: 0,"id,fullName",espnId,fullName
440,"2976847,Andrei Vasilevskiy",2976847,Andrei Vasilevskiy


# Add Roster Percent to bios

In [30]:
bios_on_date = updated_player_bios.copy()

for index, row in bios_on_date.iterrows():
    try:
        roster_percent = rostership.loc[rostership['fullName'] == row['fullName']]['rostered'].iloc[0]
        default_pos = rostership.loc[rostership['fullName'] == row['fullName']]['pos'].iloc[0]
    except:
        roster_percent = -2
        default_pos = 'S'
        
    bios_on_date.at[index, 'roster_percent'] = roster_percent
    bios_on_date.at[index, 'default_pos'] = default_pos
    
bios_on_date.sort_values('roster_percent')

bios_on_date.loc[bios_on_date['fullName'] == 'Sebastian Aho (D)']
bios_on_date.loc[bios_on_date['fullName'] == 'Andrei Vasilevskiy']

Unnamed: 0,playerId,fullName,birthDate,shootsCatches,height,weight,roster_percent,default_pos
816,8476883,Andrei Vasilevskiy,1994-07-25,L,76,220,94.346229,G


# Add ESPNID to Bios

In [31]:
for index, row in bios_on_date.iterrows():
    try:
        espnId = espn_ids.loc[espn_ids['fullName'] == row['fullName']]['espnId'].iloc[0]
    
        bios_on_date.at[index, 'espnId'] = espnId
    except:
        bios_on_date.at[index, 'espnId'] = 0
    
bios_on_date['espnId'] = bios_on_date['espnId'].astype(int)

bios_on_date.loc[bios_on_date['espnId'] == 0]

Unnamed: 0,playerId,fullName,birthDate,shootsCatches,height,weight,roster_percent,default_pos,espnId
633,8479320,Max Lajoie,1997-11-05,L,73,191,0.015636,D,0
641,8482411,Hunter Shepard,1995-11-07,L,72,215,0.082558,G,0
651,8483482,Tristan Luneau,2004-01-12,R,73,195,0.058283,D,0
654,8483489,Fraser Minten,2004-07-05,L,74,192,0.055432,C,0
656,8482470,Ilya Solovyov,2000-07-20,L,75,208,0.021323,D,0
663,8483512,Matt Savoie,2004-01-01,R,69,179,0.170561,C,0
672,8482511,Mason Lohrei,2001-01-17,L,77,211,0.274335,D,0
677,8480992,Magnus Chrona,2000-08-28,L,76,194,0.018506,G,0
693,8481534,Raphael Lavoie,2000-09-25,R,76,215,0.022743,C,0
697,8481028,Martin Pospisil,1999-11-19,L,74,173,0.690832,LW,0


# Create summary snapshot file and save

In [32]:
summary_stats_snapshot = pd.merge(trimmed_stats, bios_on_date, how='left', on='playerId')
summary_stats_snapshot

Unnamed: 0,playerId,name,team,position,toi,gamesPlayed,fantasyPoints,FPP60,FPPG,missedGames,gamesRemaining,fantasyPointsRemain,fullName,birthDate,shootsCatches,height,weight,roster_percent,default_pos,espnId
0,8479318,A. Matthews,TOR,C,35008,27,95.9,9.86,3.55,0,54,191.70,Auston Matthews,1997-09-17,L,75,215,99.913298,C,4024123
1,8476453,N. Kucherov,TBL,R,40470,31,106.1,9.44,3.42,0,50,171.00,Nikita Kucherov,1993-06-17,L,71,182,99.880607,RW,2563060
2,8481559,J. Hughes,NJD,C,29942,24,76.5,9.20,3.19,0,53,169.07,Jack Hughes,2001-05-14,L,71,175,99.779692,C,4565222
3,8478402,C. McDavid,EDM,C,33567,26,80.0,8.58,3.08,0,54,166.32,Connor McDavid,1997-01-13,L,73,194,99.891969,C,3895074
4,8477492,N. MacKinnon,COL,C,42087,31,99.4,8.50,3.21,0,51,163.71,Nathan MacKinnon,1995-09-01,R,72,200,99.914712,C,3041969
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
853,8480992,M. Chrona,SJS,G,1830,1,-5.4,-10.62,-5.40,0,0,-8.90,Magnus Chrona,2000-08-28,L,76,194,0.018506,G,0
854,8477293,A. Raanta,CAR,G,44928,14,-9.2,-0.74,-0.66,0,20,-15.11,Antti Raanta,1989-05-12,L,72,195,4.378398,G,3037703
855,8475789,J. Campbell,EDM,G,16007,5,-8.6,-1.93,-1.72,0,8,-16.67,Jack Campbell,1992-01-09,L,75,200,2.193657,G,5473
856,8477480,E. Comrie,BUF,G,20642,7,-13.2,-2.30,-1.89,0,9,-20.75,Eric Comrie,1995-07-06,L,73,183,0.219205,G,3042057


In [33]:
fileName = f"data/summary_stats-{today.strftime('%Y-%m-%d')}.csv"
summary_stats_snapshot.to_csv(fileName, index=False)

In [34]:
inverse_fixes

{'Tim Stützle': 'Tim Stutzle',
 'Jani Hakanpää': 'Jani Hakanpaa',
 'Bo Groulx': 'Benoit-Olivier Groulx',
 'Jesse Ylönen': 'Jesse Ylonen',
 'Alexis Lafrenière': 'Alexis Lafreniere',
 'Gustav Lindström': 'Gustav Lindstrom',
 'Alex Kerfoot': 'Alexander Kerfoot',
 'Sammy Walker': 'Samuel Walker',
 'Alex Barré-Boulet': 'Alex Barre-Boulet'}

In [35]:
file_name = f"data/allG_df_fp-{today.strftime('%Y-%m-%d')}.csv"
allG_df.to_csv(file_name, index=False)

file_name = f"data/all_df_fp-{today.strftime('%Y-%m-%d')}.csv"
all_df.to_csv(file_name, index=False)

In [36]:
file_name = f"data/summary_statsG-{today.strftime('%Y-%m-%d')}.csv"
summary_statsG.to_csv(file_name, index=False)

file_name = f"data/summary_statsS-{today.strftime('%Y-%m-%d')}.csv"
summary_stats.to_csv(file_name, index=False)

In [None]:

# file_name = f"data/goaliesSummary-{today.strftime('%Y-%m-%d')}.csv"
# summary_statsG.to_csv(file_name, encoding='utf-8')

# file_name = f"data/skatersSummary-{today.strftime('%Y-%m-%d')}.csv"
# summary_stats.to_csv(file_name, encoding='utf-8')

# file_name = f"data/goaliesLog-{today.strftime('%Y-%m-%d')}.csv"
# allG_df.to_csv(file_name, encoding='utf-8')

# file_name = f"data/skatersLog-{today.strftime('%Y-%m-%d')}.csv"
# all_df.to_csv(file_name, encoding='utf-8')