In [15]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress
from pprint import pprint
from datetime import datetime, timedelta, timezone
import pytz
import scipy.stats
from dateutil.relativedelta import relativedelta

import warnings

# Suppress the specific warnings
warnings.filterwarnings("ignore")

today = datetime.utcnow()

from api_keys import espn_link

In [16]:
teamURL = 'https://api-web.nhle.com/v1/standings/now'

teams = requests.get(teamURL).json()
teams = teams['standings']

team_names = pd.DataFrame()

for team in teams:
    abbreviation = team['teamAbbrev']['default']
    name = team['teamName']['default']
    logoURL = team['teamLogo']
    gamesPlayed = team['gamesPlayed']
    gamesRemaining = 82 - gamesPlayed
    goalsFor = team['goalFor']
    goalsAgainst = team['goalAgainst']
    conference = team['conferenceName']
    division = team['divisionName']

    temp = pd.DataFrame({'abbreviation': [abbreviation], 'name': [name], 'gamesPlayed': [gamesPlayed],
                         'gamesRemaining': [gamesRemaining], 'goalsFor': [goalsFor], 'goalsAgainst': [goalsAgainst], 'logo': [logoURL],
                        'conference': [conference], 'division': [division]})

    team_names = pd.concat([team_names, temp], ignore_index=True)

    
teamList = team_names['abbreviation'].tolist()
team_names

team_names.to_csv('data/team_names.csv', index=False)

In [20]:
baseURL = 'https://api-web.nhle.com/v1/club-schedule-season/'
season = '/20232024'

completeSked = pd.DataFrame()
homeOnlySked = pd.DataFrame()

date_format = "%d-%m-%Y"
eastern_timezone = pytz.timezone('US/Eastern')
today = datetime.utcnow()

for team in teamList:
    
    skedURL = baseURL + team + season
    sked = requests.get(skedURL).json()
    sked = sked['games']

    sked = [entry for entry in sked if entry.get('gameType') == 2]
    
    for game in sked:
        gameID = game['id']
        gameDate = game['startTimeUTC']
        awayTeam = game['awayTeam']['abbrev']
        homeTeam = game['homeTeam']['abbrev']
        
        # GET WINNING GOALIE
        
        if game['gameState'] == 'OFF':
            winningGoalie = game['winningGoalie']['playerId']
        else:
            winningGoalie = 0
        
        datetime_obj = datetime.strptime(gameDate, '%Y-%m-%dT%H:%M:%SZ')
        utc_timezone = pytz.timezone('UTC')
        utc_datetime = utc_timezone.localize(datetime_obj)
        eastern_timezone = pytz.timezone('US/Eastern')
        eastern_datetime = utc_datetime.astimezone(eastern_timezone)
        formatted_date_string = eastern_datetime.strftime('%d-%m-%Y')
        game_date = datetime.strptime(formatted_date_string, date_format)
        game_time = eastern_datetime.strftime("%A %I:%M %p")
        

        gameTemp = pd.DataFrame({'gameID': [gameID], 'gameDate': [game_date], 'gameTime': [game_time],
                                 'awayTeam': [awayTeam], 'homeTeam': [homeTeam], 'gameDT': [datetime_obj],
                                'winningGoalie': [winningGoalie]})
        completeSked = pd.concat([completeSked, gameTemp], ignore_index=True)

        if homeTeam == team:
            homeOnlySked = pd.concat([homeOnlySked, gameTemp], ignore_index=True)
    
homeOnlySked.to_csv('data/sked.csv', index=False)

completeSked.to_csv('data/sked_full.csv', index=False)

In [45]:
# remainSked = homeOnlySked.loc[homeOnlySked['gameDT'] >= (datetime.utcnow() + timedelta(days=0.5))]

remainSked = homeOnlySked.loc[homeOnlySked['gameDT'] >= (datetime.utcnow())]
remainSked.sort_values('gameDate')

Unnamed: 0,gameID,gameDate,gameTime,awayTeam,homeTeam,gameDT,winningGoalie
1193,2023020246,2023-11-16,Thursday 08:00 PM,TBL,CHI,2023-11-17 01:00:00,0
663,2023020245,2023-11-16,Thursday 07:00 PM,NJD,PIT,2023-11-17 00:00:00,0
950,2023020248,2023-11-16,Thursday 10:00 PM,NYI,SEA,2023-11-17 03:00:00,0
294,2023020249,2023-11-16,Thursday 10:30 PM,FLA,LAK,2023-11-17 03:30:00,0
993,2023020242,2023-11-16,Thursday 02:00 PM,DET,OTT,2023-11-16 19:00:00,0
...,...,...,...,...,...,...,...
1106,2023021307,2024-04-18,Thursday 07:00 PM,SEA,MIN,2024-04-18 23:00:00,0
122,2023021311,2024-04-18,Thursday 10:00 PM,ANA,VGK,2024-04-19 02:00:00,0
286,2023021310,2024-04-18,Thursday 09:30 PM,EDM,COL,2024-04-19 01:30:00,0
327,2023021312,2024-04-18,Thursday 10:30 PM,CHI,LAK,2024-04-19 02:30:00,0


In [46]:
# completedSked = homeOnlySked.loc[homeOnlySked['gameDT'] <= (datetime.utcnow() - timedelta(days=0.5))]

completedSked = homeOnlySked.loc[homeOnlySked['gameDT'] <= (datetime.utcnow())]
completedSked.sort_values('gameDate')

Unnamed: 0,gameID,gameDate,gameTime,awayTeam,homeTeam,gameDT,winningGoalie
656,2023020002,2023-10-10,Tuesday 08:00 PM,CHI,PIT,2023-10-11 00:00:00,8475852
738,2023020001,2023-10-10,Tuesday 05:30 PM,NSH,TBL,2023-10-10 21:30:00,8477992
82,2023020003,2023-10-10,Tuesday 10:30 PM,SEA,VGK,2023-10-11 02:30:00,8478499
1025,2023020007,2023-10-11,Wednesday 10:00 PM,WPG,CGY,2023-10-12 02:00:00,8474593
533,2023020004,2023-10-11,Wednesday 07:00 PM,OTT,CAR,2023-10-11 23:00:00,8475883
...,...,...,...,...,...,...,...
1279,2023020237,2023-11-14,Tuesday 10:30 PM,FLA,SJS,2023-11-15 03:30:00,8476932
1155,2023020239,2023-11-15,Wednesday 08:30 PM,SEA,EDM,2023-11-16 01:30:00,8479973
537,2023020238,2023-11-15,Wednesday 07:30 PM,PHI,CAR,2023-11-16 00:30:00,8479394
252,2023020240,2023-11-15,Wednesday 09:00 PM,ANA,COL,2023-11-16 02:00:00,8480382


In [52]:
file_name = f"data/allG_df_raw-{today.strftime('%Y-%m-%d')}.csv"
allG_df_raw = pd.read_csv(file_name, index_col=False)
allG_df_raw

file_name = f"data/all_df_raw-{today.strftime('%Y-%m-%d')}.csv"
all_df_raw = pd.read_csv(file_name, index_col=False)
all_df_raw

Unnamed: 0,playerId,sweaterNumber,name,position,goals,assists,points,plusMinus,pim,hits,...,toi,powerPlayToi,shorthandedToi,team,opponent,secondaryPosition,tertiaryPosition,gameDate,gameTime,gameId
0,8478178,43,D. Raddysh,D,0,0,0,1,0,1,...,1084,5,19,TBL,NSH,D,D,2023-10-10,Tuesday 05:30 PM,2023020001
1,8475177,44,C. de Haan,D,0,0,0,0,0,2,...,821,0,172,TBL,NSH,D,D,2023-10-10,Tuesday 05:30 PM,2023020001
2,8480246,48,N. Perbix,D,0,0,0,-1,2,0,...,702,0,0,TBL,NSH,D,D,2023-10-10,Tuesday 05:30 PM,2023020001
3,8475167,77,V. Hedman,D,0,1,1,-1,0,0,...,1615,386,248,TBL,NSH,D,D,2023-10-10,Tuesday 05:30 PM,2023020001
4,8478416,81,E. Cernak,D,0,0,0,0,0,6,...,1160,0,276,TBL,NSH,D,D,2023-10-10,Tuesday 05:30 PM,2023020001
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8665,8480789,26,O. Wahlstrom,R,0,1,1,0,0,1,...,696,29,0,NYI,VAN,W,F,2023-11-15 00:00:00,Wednesday 10:00 PM,2023020241
8666,8475314,27,A. Lee,L,0,0,0,0,2,2,...,726,29,0,NYI,VAN,W,F,2023-11-15 00:00:00,Wednesday 10:00 PM,2023020241
8667,8475754,29,B. Nelson,C,1,0,1,0,0,1,...,889,15,0,NYI,VAN,C,F,2023-11-15 00:00:00,Wednesday 10:00 PM,2023020241
8668,8476419,44,J. Pageau,C,0,0,0,0,2,3,...,937,29,123,NYI,VAN,C,F,2023-11-15 00:00:00,Wednesday 10:00 PM,2023020241


In [53]:
all_games = homeOnlySked['gameID'].unique().tolist()

games_saved = all_df_raw['gameId'].unique().tolist()

games_done = completedSked['gameID'].unique().tolist()

missing_games = list(set(games_done) - set(games_saved))

missing_games

[]

In [54]:
baseURL = 'https://api-web.nhle.com/v1/gamecenter/'
appendix = '/boxscore'

playsBaseURL = 'https://api-web.nhle.com/v1/gamecenter/'
playsAppendix = '/play-by-play'

date_format = "%d-%m-%Y"
eastern_timezone = pytz.timezone('US/Eastern')
today = datetime.utcnow()

awayLogs = {}
awayLogsG = {}
homeLogs = {}
homeLogsG = {}

for Id in missing_games:
    
    gameURL = baseURL + str(Id) + appendix
    
    print(gameURL)
    
    game = requests.get(gameURL).json()

    # pprint(game)

    numPeriods = len(game['boxscore']['linescore']['byPeriod'])
    score = game['boxscore']['linescore']['totals']
    awayTeam = game['awayTeam']
    homeTeam = game['homeTeam']
    awayTeamName = game['awayTeam']['abbrev']
    homeTeamName = game['homeTeam']['abbrev']
    awayTeamId = game['awayTeam']['id']
    homeTeamId = game['homeTeam']['id']
    gameDate = game['startTimeUTC']
    gameOutcome = game['gameOutcome']
    gameID = game['id']

    playerStatsAway = game['boxscore']['playerByGameStats']['awayTeam']
    playerStatsHome = game['boxscore']['playerByGameStats']['homeTeam']

    # pprint(playerStatsHome)

    # GET THE STARTING GOALTENDERS

    
    playsURL = playsBaseURL + str(Id) + playsAppendix
    plays = requests.get(playsURL).json()
    plays = plays['plays']

    homeStartingG = next((play['details']['goalieInNetId'] for play in plays if 
                          ((play['typeDescKey'] == 'shot-on-goal') and (play['details']['awaySOG'] == 1))), None)
    awayStartingG = next((play['details']['goalieInNetId'] for play in plays if 
                          ((play['typeDescKey'] == 'shot-on-goal') and (play['details']['homeSOG'] == 1))), None)

    homeEndingG = next((play['details']['goalieInNetId'] for play in reversed(plays) if 
                        ((play['typeDescKey'] == 'shot-on-goal') and (play['details']['eventOwnerTeamId'] == awayTeamId))), None)
    awayEndingG = next((play['details']['goalieInNetId'] for play in reversed(plays) if 
                        ((play['typeDescKey'] == 'shot-on-goal') and (play['details']['eventOwnerTeamId'] == homeTeamId))), None)

    # Get THE WINNING GOALTENDER


    winningGoalie = completedSked.loc[completedSked['gameID'] == Id]['winningGoalie'].iloc[0]


    # FIGURE OUT THE TYPE OF FINISH

    if numPeriods > 3:
        OT = 1
    else:
        OT = 0

    if numPeriods > 4:
        SO = 1
    else:
        SO = 0

    if score['home'] > score['away']:
        homeTeamWin = 1
        awayTeamWin = 0
    else:
        homeTeamWin = 0
        awayTeamWin = 1

    # FORMAT THE DATES AND TIMES

    datetime_obj = datetime.strptime(gameDate, '%Y-%m-%dT%H:%M:%SZ')
    utc_timezone = pytz.timezone('UTC')
    utc_datetime = utc_timezone.localize(datetime_obj)
    eastern_timezone = pytz.timezone('US/Eastern')
    eastern_datetime = utc_datetime.astimezone(eastern_timezone)
    formatted_date_string = eastern_datetime.strftime('%d-%m-%Y')
    game_date = datetime.strptime(formatted_date_string, date_format)
    game_time = eastern_datetime.strftime("%A %I:%M %p")

    # AWAY LOG ASSEMBLER
    
    awaySOGcheck = 0

    for defender in playerStatsAway['defense']:
        logName = str(gameID) + str(defender['playerId'])
        awayLogs[logName] = defender
        awayLogs[logName]['name'] = awayLogs[logName]['name']['default']
        awayLogs[logName]['team'] = awayTeamName
        awayLogs[logName]['opponent'] = homeTeamName
        awayLogs[logName]['secondaryPosition'] = 'D'
        awayLogs[logName]['tertiaryPosition'] = 'D'
        awayLogs[logName]['gameDate'] = game_date
        awayLogs[logName]['gameTime'] = game_time
        awayLogs[logName]['toi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(awayLogs[logName]['toi'].split(':'))))
        awayLogs[logName]['powerPlayToi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(awayLogs[logName]['powerPlayToi'].split(':'))))
        awayLogs[logName]['shorthandedToi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(awayLogs[logName]['shorthandedToi'].split(':'))))
        awayLogs[logName]['gameId'] = Id
        
        awaySOGcheck = awaySOGcheck + awayLogs[logName]['shots']

    for forward in playerStatsAway['forwards']:
        logName = str(gameID) + str(forward['playerId'])
        awayLogs[logName] = forward
        awayLogs[logName]['name'] = awayLogs[logName]['name']['default']
        awayLogs[logName]['team'] = awayTeamName
        awayLogs[logName]['opponent'] = homeTeamName

        if forward['position'] == 'C':
            awayLogs[logName]['secondaryPosition'] = 'C'
        else:
            awayLogs[logName]['secondaryPosition'] = 'W'
        awayLogs[logName]['tertiaryPosition'] = 'F'

        awayLogs[logName]['gameDate'] = game_date
        awayLogs[logName]['gameTime'] = game_time
        awayLogs[logName]['toi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(awayLogs[logName]['toi'].split(':'))))
        awayLogs[logName]['powerPlayToi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(awayLogs[logName]['powerPlayToi'].split(':'))))
        awayLogs[logName]['shorthandedToi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(awayLogs[logName]['shorthandedToi'].split(':'))))
        awayLogs[logName]['gameId'] = Id
        
        awaySOGcheck = awaySOGcheck + awayLogs[logName]['shots']
        
    awayGshots = 0
        
    for goalie in playerStatsAway['goalies']:
        if goalie['toi'] != '00:00':
            logName = str(gameID) + str(goalie['playerId'])
            awayLogsG[logName] = goalie
            awayLogsG[logName]['name'] = awayLogsG[logName]['name']['default']
            awayLogsG[logName]['team'] = awayTeamName
            awayLogsG[logName]['opponent'] = homeTeamName
            awayLogsG[logName]['gameDate'] = game_date
            awayLogsG[logName]['gameTime'] = game_time
            awayLogsG[logName]['toi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(awayLogsG[logName]['toi'].split(':'))))
            awayLogsG[logName]['gameId'] = Id
            
            # FIND THE STARTER

            if goalie['playerId'] == awayStartingG:
                awayLogsG[logName]['start'] = 1
            else:
                awayLogsG[logName]['start'] = 0

            # DETERMINE SHUTOUT

            shots = awayLogsG[logName]['saveShotsAgainst'].split('/')[1]
            saves = awayLogsG[logName]['saveShotsAgainst'].split('/')[0]
            if (shots == saves) & (awayLogsG[logName]['toi'] > 3446):
                awayLogsG[logName]['shutout'] = 1
            else:
                awayLogsG[logName]['shutout'] = 0
                
            awayGshots = awayGshots + int(shots)

            # DETERMINE WIN

            if goalie['playerId'] == winningGoalie:
                awayLogsG[logName]['win'] = 4
            elif (awayTeamWin == 0) & (OT == 1) & (awayEndingG == goalie['playerId']):
                awayLogsG[logName]['win'] = 1
            else:
                awayLogsG[logName]['win'] = 0
                
            # CHECK FOR GOALIE ABBERATION
            
            if int(shots) - int(saves) != awayLogsG[logName]['goalsAgainst']:
                print(f"Error for goalies: {awayLogsG[logName]['name']}")



    # HOME LOG ASSEMBLER
                      
    homeSOGcheck = 0

    for defender in playerStatsHome['defense']:
        logName = str(gameID) + str(defender['playerId'])
        homeLogs[logName] = defender
        homeLogs[logName]['name'] = homeLogs[logName]['name']['default']
        homeLogs[logName]['team'] = homeTeamName
        homeLogs[logName]['opponent'] = awayTeamName
        homeLogs[logName]['secondaryPosition'] = 'D'
        homeLogs[logName]['tertiaryPosition'] = 'D'
        homeLogs[logName]['gameDate'] = game_date
        homeLogs[logName]['gameTime'] = game_time
        homeLogs[logName]['toi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(homeLogs[logName]['toi'].split(':'))))
        homeLogs[logName]['powerPlayToi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(homeLogs[logName]['powerPlayToi'].split(':'))))
        homeLogs[logName]['shorthandedToi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(homeLogs[logName]['shorthandedToi'].split(':'))))
        homeLogs[logName]['gameId'] = Id
        
        homeSOGcheck = homeSOGcheck + homeLogs[logName]['shots']
                      
    for forward in playerStatsHome['forwards']:
        logName = str(gameID) + str(forward['playerId'])
        homeLogs[logName] = forward
        homeLogs[logName]['name'] = homeLogs[logName]['name']['default']
        homeLogs[logName]['team'] = homeTeamName
        homeLogs[logName]['opponent'] = awayTeamName

        if forward['position'] == 'C':
            homeLogs[logName]['secondaryPosition'] = 'C'
        else:
            homeLogs[logName]['secondaryPosition'] = 'W'
        homeLogs[logName]['tertiaryPosition'] = 'F'

        homeLogs[logName]['gameDate'] = game_date
        homeLogs[logName]['gameTime'] = game_time
        homeLogs[logName]['toi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(homeLogs[logName]['toi'].split(':'))))
        homeLogs[logName]['powerPlayToi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(homeLogs[logName]['powerPlayToi'].split(':'))))
        homeLogs[logName]['shorthandedToi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(homeLogs[logName]['shorthandedToi'].split(':'))))
        homeLogs[logName]['gameId'] = Id
        
        homeSOGcheck = homeSOGcheck + homeLogs[logName]['shots']
        
    homeGshots = 0
    
    for goalie in playerStatsHome['goalies']:
        if goalie['toi'] != '00:00':
            logName = str(gameID) + str(goalie['playerId'])
            homeLogsG[logName] = goalie
            homeLogsG[logName]['name'] = homeLogsG[logName]['name']['default']
            homeLogsG[logName]['team'] = homeTeamName
            homeLogsG[logName]['opponent'] = awayTeamName
            homeLogsG[logName]['gameDate'] = game_date
            homeLogsG[logName]['gameTime'] = game_time
            homeLogsG[logName]['toi'] = sum(int(x) * 60**i for i, x in enumerate(reversed(homeLogsG[logName]['toi'].split(':'))))
            homeLogsG[logName]['gameId'] = Id
            
            # FIND THE STARTER

            if goalie['playerId'] == homeStartingG:
                homeLogsG[logName]['start'] = 1
            else:
                homeLogsG[logName]['start'] = 0

            # DETERMINE SHUTOUT

            shots = homeLogsG[logName]['saveShotsAgainst'].split('/')[1]
            saves = homeLogsG[logName]['saveShotsAgainst'].split('/')[0]
            if (shots == saves) & (homeLogsG[logName]['toi'] > 3446):
                homeLogsG[logName]['shutout'] = 1
            else:
                homeLogsG[logName]['shutout'] = 0
                
            homeGshots = homeGshots + int(shots)

            # DETERMINE WIN

            if goalie['playerId'] == winningGoalie:
                homeLogsG[logName]['win'] = 4
            elif (homeTeamWin == 0) & (OT == 1) & (homeEndingG == goalie['playerId']):
                homeLogsG[logName]['win'] = 1
            else:
                homeLogsG[logName]['win'] = 0
                
            # CHECK FOR GOALIE ABBERATION
            
            if int(shots) - int(saves) != homeLogsG[logName]['goalsAgainst']:
                print(f"Error for goalies: {homeLogsG[logName]['name']} vs {homeGshots}")
        

In [55]:
homeG_df = pd.DataFrame(homeLogsG)
homeG_df = homeG_df.transpose()

awayG_df = pd.DataFrame(awayLogsG)
awayG_df = awayG_df.transpose()

allG_df_raw = pd.concat([allG_df_raw, homeG_df, awayG_df], axis=0)

file_name = f"data/allG_df_raw-{today.strftime('%Y-%m-%d')}.csv"
allG_df_raw.to_csv(file_name, index=False)

home_df = pd.DataFrame(homeLogs)
home_df = home_df.transpose()

away_df = pd.DataFrame(awayLogs)
away_df = away_df.transpose()

all_df_raw = pd.concat([all_df_raw, home_df, away_df], axis=0)

file_name = f"data/all_df_raw-{today.strftime('%Y-%m-%d')}.csv"
all_df_raw.to_csv(file_name, index=False)

In [59]:
allG_df = allG_df_raw.copy()

allG_df[['saves', 'shots']] = allG_df['saveShotsAgainst'].str.split('/', expand=True).apply(pd.to_numeric)
allG_df[['evSaves', 'evShots']] = allG_df['evenStrengthShotsAgainst'].str.split('/', expand=True).apply(pd.to_numeric)
allG_df[['ppSaves', 'ppShots']] = allG_df['powerPlayShotsAgainst'].str.split('/', expand=True).apply(pd.to_numeric)

allG_df = allG_df.drop(columns=(['saveShotsAgainst', 'savePctg', 'evenStrengthShotsAgainst',
                                   'powerPlayShotsAgainst', 'shorthandedShotsAgainst', 'evenStrengthGoalsAgainst',
                                  'powerPlayGoalsAgainst', 'shorthandedGoalsAgainst']))

columns_to_convert1 = ['toi', 'start', 'shutout', 'win', 'pim']
allG_df[columns_to_convert1] = allG_df[columns_to_convert1].apply(pd.to_numeric)

for index, row in allG_df.iterrows():
    decisionPoints = 0
    shutoutPoints = 0
    decisionType = 0

    # ADD A GAMES PLAYED TALLY
    
    if row['toi'] > 0:
        allG_df.at[index, 'gamesPlayed'] = 1

    # CALCULATE SHUTOUT POINTS

    if (row['shutout'] == 1):
        shutoutPoints = 3
    else:
        shutoutPoints = 0
        
    # CALCULATE DECISION POINTS
    
    if row['win'] == 4:
        decisionType = 'W'
    elif row['win'] == 1:
        decisionType = 'OTL'
    else:
        decisionType = 'L'
    
    decisionPoints = row['win']

    goalsAgainstPoints = (row['goalsAgainst']) * -2
    
    savesPoints = (row['shots'] - row['goalsAgainst']) * 0.2
    
    # TOTAL FANTASY POINTS
    
    fantasyPoints = decisionPoints + shutoutPoints + goalsAgainstPoints + savesPoints
    
    # ADD TO DATAFRAME
    
    allG_df.at[index, 'fantasyPoints'] = fantasyPoints
    allG_df.at[index, 'decisionType'] = decisionType
    
columns_to_convert3 = ['gamesPlayed']
allG_df[columns_to_convert3] = allG_df[columns_to_convert3].astype(int)

summary_statsG = allG_df.groupby('playerId').agg({
    'name': 'first',
    'team': 'last',
    'position': 'first',
    'toi': 'sum',
    'gamesPlayed': 'sum',
    'saves': 'sum',
    'shots': 'sum',
#     'evenSaves': 'sum',
    'shutout': 'sum',
    'fantasyPoints': 'sum'
}).reset_index()

summary_statsG.sort_values('fantasyPoints', ascending=False).head(25)

Unnamed: 0,playerId,name,team,position,toi,gamesPlayed,saves,shots,shutout,fantasyPoints
32,8477967,T. Demko,VAN,G,38734,11,303,325,2,54.6
45,8478499,A. Hill,VGK,G,32897,9,246,262,2,52.2
55,8479979,J. Oettinger,DAL,G,39819,11,311,336,0,45.2
58,8480280,J. Swayman,BOS,G,25534,7,204,216,1,44.8
7,8475660,C. Talbot,LAK,G,39153,11,288,312,1,41.6
17,8476412,J. Binnington,STL,G,36463,10,305,328,1,39.0
28,8477465,T. Jarry,PIT,G,35848,11,257,280,3,38.4
23,8476999,L. Ullmark,BOS,G,29045,8,233,251,0,35.6
8,8475683,S. Bobrovsky,FLA,G,43025,12,305,337,1,33.0
52,8479394,C. Hart,PHI,G,29289,9,220,239,1,29.0


In [62]:
def missed_games (team, returnDate):
    tempDate = datetime.strptime(returnDate, '%Y-%m-%d')
#     utc_datetime = datetime.strptime(returnDate, '%Y-%m-%d').replace(tzinfo=timezone.utc)
    temp = homeOnlySked.loc[(homeOnlySked['awayTeam'] == team) | (homeOnlySked['homeTeam'] == team)]
    temp = temp.loc[(temp['gameDT'] >= datetime.utcnow()) & (temp['gameDate'] < tempDate)]
                     
    return len(temp)

In [63]:
summary_statsG['missedGames'] = 0

In [64]:
injuries_listG = {
    'F. Andersen': '2023-12-02',
    'I. Shesterkin': '2023-11-18'

}

for player, date in injuries_listG.items():
    team = summary_statsG.loc[summary_statsG['name'] == player]['team'].iloc[0]
    missedGames = missed_games(team, date)
    summary_statsG.loc[summary_statsG['name'] == player, 'missedGames'] = missedGames

In [66]:
for index, row in team_names.iterrows():
    team_names.at[index, 'creaseMins'] = allG_df.loc[allG_df['team'] == row['abbreviation']]['toi'].sum()

team_names['creaseMins'] = team_names['creaseMins'].astype(int)

for index, row in summary_statsG.iterrows():
    
    gamesRemaining = team_names.loc[team_names['abbreviation'] == row['team']]['gamesRemaining'].iloc[0] - row['missedGames']
    summary_statsG.at[index, 'gamesRemaining'] = gamesRemaining
    
    creaseShare = row['toi'] / team_names.loc[team_names['abbreviation'] == row['team']]['creaseMins'].sum()
    summary_statsG.at[index, 'creaseShare'] = round(creaseShare * 100, 2)
    
    FPP60 = row['fantasyPoints'] / row['toi'] * 3600
    summary_statsG.at[index, 'FPP60'] = round(FPP60, 2)
    
    FPPG = row['fantasyPoints'] / row['gamesPlayed']
    summary_statsG.at[index, 'FPPG'] = round(FPPG, 2)
    
    FPremain = FPP60 * gamesRemaining * creaseShare
    summary_statsG.at[index, 'fantasyPointsRemain'] = round(FPremain, 2)

summary_statsG['gamesRemaining'] = summary_statsG['gamesRemaining'].astype(int)

summary_statsG.loc[summary_statsG['team'] == 'NYR']

Unnamed: 0,playerId,name,team,position,toi,gamesPlayed,saves,shots,shutout,fantasyPoints,missedGames,gamesRemaining,creaseShare,FPP60,FPPG,fantasyPointsRemain
1,8471734,J. Quick,NYR,G,20028,6,141,152,1,26.2,0,68,39.22,4.71,4.37,125.6
12,8475839,L. Domingue,NYR,G,3600,1,25,26,0,7.0,0,68,7.05,7.0,7.0,33.56
40,8478048,I. Shesterkin,NYR,G,27435,8,188,206,0,25.6,0,68,53.73,3.36,3.2,122.73


In [67]:
def summary_statistics(df):
    temp_build = df.groupby('playerId').agg({
        'name': 'first',
        'team': 'last',
        'position': 'first',
        'toi': 'sum',
        'gamesPlayed': 'sum',
        'goals': 'sum',
        'assists': 'sum',
        'specialTeams': 'sum',
        'shots': 'sum',
        'hits': 'sum',
        'blockedShots': 'sum',
        'powerPlayToi': 'sum',
        'shorthandedToi': 'sum',
        'plusMinus': 'sum',
        'fantasyPoints': 'sum',
        'secondaryPosition': 'first',
        'tertiaryPosition': 'first'
    }).reset_index()


    temp_build['FPP60'] = (temp_build['fantasyPoints'] / temp_build['toi'] * 3600).round(2)
    temp_build['FPPG'] = (temp_build['fantasyPoints'] / temp_build['gamesPlayed']).round(2)

    temp_build = temp_build.sort_values(by='FPPG', ascending=False)
    
    return temp_build

In [68]:
all_df = all_df_raw.copy()

all_df = all_df.drop(columns=(['faceoffWinningPctg']))

columns_to_convert1 = ['goals', 'assists', 'points', 'plusMinus', 'pim', 'hits', 'blockedShots',
                      'powerPlayGoals', 'powerPlayPoints', 'shorthandedGoals', 'shPoints', 'shots',
                       'toi', 'powerPlayToi', 'shorthandedToi']
all_df[columns_to_convert1] = all_df[columns_to_convert1].apply(pd.to_numeric)


for index, row in all_df.iterrows():
    
    if row['toi'] > 0:
        all_df.at[index, 'gamesPlayed'] = 1
    
    specialTeams = row['powerPlayPoints'] + row['shPoints']
    fantasyPoints = (row['goals']*2) + row['assists'] + (specialTeams * .5) + (row['blockedShots'] * .5) + ((row['hits'] + row['shots']) * .1)
    all_df.at[index, 'fantasyPoints'] = fantasyPoints
    all_df.at[index, 'specialTeams'] = specialTeams
    
columns_to_convert4 = ['specialTeams', 'gamesPlayed']
all_df[columns_to_convert4] = all_df[columns_to_convert4].astype(int)

summary_stats = summary_statistics(all_df).sort_values('fantasyPoints', ascending=False)

In [69]:
summary_stats['missedGames'] = 0

In [70]:
injuries_list = {
    'J. McBain': '2023-11-18',
    'D. Savard': '2023-12-07',
    'M. Grzelcyk': '2023-11-25',
    'Z. Benson': '2023-11-17',
    'T. Hall': '2023-11-16',
    'A. Athanasiou': '2023-11-16',
    'A. Lehkonen': '2023-11-25',
    'L. Schenn': '2023-11-24',
    'N. Hischier': '2023-11-14',
    'J. Hughes': '2023-11-14',
    'F. Chytil': '2023-11-18',
    'A. Fox': '2023-11-29',
    'R. Greig': '2023-11-24',
    'T. Chabot': '2023-12-01',
    'A. Barabanov': '2023-12-05',
    'A. Burakovsky': '2023-12-02',
    'T. Liljegren': '2023-11-30',
    'M. Fehervary': '2023-11-18',
    'A. Mantha': '2023-11-18',
    'G. Vilardi': '2023-11-22'
    
}

for player, date in injuries_list.items():
    team = summary_stats.loc[summary_stats['name'] == player]['team'].iloc[0]
    missedGames = missed_games(team, date)
    summary_stats.loc[summary_stats['name'] == player, 'missedGames'] = missedGames

In [71]:
for index, row in summary_stats.iterrows():
    
    gamesRemaining = team_names.loc[team_names['abbreviation'] == row['team']]['gamesRemaining']

    summary_stats.loc[index, 'gamesRemaining'] = int(gamesRemaining) - row['missedGames']

    summary_stats.loc[index, 'fantasyPointsRemain'] = row['FPPG'] * (int(gamesRemaining) - row['missedGames'])
    
summary_stats['gamesRemaining'] = summary_stats['gamesRemaining'].astype(int)


summary_stats.loc[summary_stats['team'] == 'NYR']

Unnamed: 0,playerId,name,team,position,toi,gamesPlayed,goals,assists,specialTeams,shots,...,shorthandedToi,plusMinus,fantasyPoints,secondaryPosition,tertiaryPosition,FPP60,FPPG,missedGames,gamesRemaining,fantasyPointsRemain
386,8478550,A. Panarin,NYR,L,16359,14,8,16,10,55,...,0,5,43.1,W,F,9.48,3.08,0,68,209.44
192,8476885,J. Trouba,NYR,D,18751,14,1,4,0,28,...,2695,3,37.6,D,D,7.22,2.69,0,68,182.92
81,8475184,C. Kreider,NYR,L,15682,14,10,4,8,37,...,1239,4,35.2,W,F,8.08,2.51,0,68,170.68
144,8476389,V. Trocheck,NYR,C,16669,14,4,7,4,25,...,1154,2,28.0,C,F,6.05,2.0,0,68,136.0
411,8479323,A. Fox,NYR,D,12639,10,3,8,9,15,...,839,1,26.4,D,D,7.52,2.64,6,62,163.68
160,8476459,M. Zibanejad,NYR,C,16637,14,2,8,7,39,...,1697,6,24.8,C,F,5.37,1.77,0,68,120.36
215,8476979,E. Gustafsson,NYR,D,15817,14,3,7,2,22,...,336,7,24.7,D,D,5.62,1.76,0,68,119.68
655,8482109,A. Lafrenière,NYR,L,13890,14,7,4,2,24,...,11,3,22.9,W,F,5.94,1.64,0,68,111.52
546,8480817,K. Miller,NYR,D,18900,14,1,6,1,11,...,1781,6,20.7,D,D,3.94,1.48,0,68,100.64
641,8482073,B. Schneider,NYR,D,13304,14,1,2,0,16,...,933,-3,20.0,D,D,5.41,1.43,0,68,97.24


In [None]:

# file_name = f"data/goaliesSummary-{today.strftime('%Y-%m-%d')}.csv"
# summary_statsG.to_csv(file_name, encoding='utf-8')

# file_name = f"data/skatersSummary-{today.strftime('%Y-%m-%d')}.csv"
# summary_stats.to_csv(file_name, encoding='utf-8')

# file_name = f"data/goaliesLog-{today.strftime('%Y-%m-%d')}.csv"
# allG_df.to_csv(file_name, encoding='utf-8')

# file_name = f"data/skatersLog-{today.strftime('%Y-%m-%d')}.csv"
# all_df.to_csv(file_name, encoding='utf-8')