In [33]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress
from pprint import pprint
from datetime import datetime, timedelta, timezone
import pytz
import scipy.stats
from dateutil.relativedelta import relativedelta

import warnings

# Suppress the specific warnings
warnings.filterwarnings("ignore")

today = datetime.utcnow()

from api_keys import espn_link

x = 10

In [34]:
homeOnlySked = pd.read_csv('data/sked.csv', index_col=False)
homeOnlySked

completeSked = pd.read_csv('data/sked_full.csv', index_col=False)
completeSked

team_names = pd.read_csv('data/team_names.csv', index_col=False)
team_names

file_name = f"data/allG_df_raw-{today.strftime('%Y-%m-%d')}.csv"
allG_df = pd.read_csv(file_name, index_col=False)
allG_df

file_name = f"data/all_df_raw-{today.strftime('%Y-%m-%d')}.csv"
all_df = pd.read_csv(file_name, index_col=False)
all_df

team_names_only = team_names[['abbreviation', 'name']]

file_name = f"data/summary_stats-{today.strftime('%Y-%m-%d')}.csv"
player_bios = pd.read_csv(file_name, index_col=False)

In [35]:
last_10_df_team = pd.DataFrame()
last_10_g_df_team = pd.DataFrame()

for index, row in team_names_only.iterrows():
    temp_all = all_df.loc[all_df['team'] == row['abbreviation']].sort_values('gameDate', ascending=False)
    five_dates = temp_all['gameDate'].unique()[:x]
    temp_filtered = temp_all[temp_all['gameDate'].isin(five_dates)]
    last_10_df_team = pd.concat([last_10_df_team, temp_filtered])
    
last_10_df_team

for index, row in team_names_only.iterrows():
    temp_all = allG_df.loc[allG_df['team'] == row['abbreviation']].sort_values('gameDate', ascending=False)
    five_dates = temp_all['gameDate'].unique()[:x]
    temp_filtered = temp_all[temp_all['gameDate'].isin(five_dates)]
    last_10_g_df_team = pd.concat([last_10_g_df_team, temp_filtered])
    
last_10_g_df_team

Unnamed: 0,playerId,sweaterNumber,name,position,evenStrengthShotsAgainst,powerPlayShotsAgainst,shorthandedShotsAgainst,saveShotsAgainst,savePctg,evenStrengthGoalsAgainst,...,goalsAgainst,toi,team,opponent,gameDate,gameTime,gameId,start,shutout,win
595,8471734,32,J. Quick,G,22/25,3/4,2/2,27/31,0.871,3,...,4,3568,NYR,BOS,2023-11-25 00:00:00,Saturday 01:00 PM,2023020308,1,0,4
629,8478048,31,I. Shesterkin,G,23/24,13/13,0/0,36/37,0.973,1,...,1,3593,NYR,PHI,2023-11-24 00:00:00,Friday 01:00 PM,2023020294,1,0,4
665,8471734,32,J. Quick,G,26/26,5/5,1/1,32/32,1.000,0,...,0,3600,NYR,PIT,2023-11-22 00:00:00,Wednesday 07:00 PM,2023020283,1,1,4
654,8478048,31,I. Shesterkin,G,26/29,3/4,1/1,30/34,0.882,3,...,4,3537,NYR,DAL,2023-11-20 00:00:00,Monday 08:00 PM,2023020274,1,0,0
559,8478048,31,I. Shesterkin,G,28/30,0/1,2/2,30/33,0.909,2,...,3,3600,NYR,NJD,2023-11-18 00:00:00,Saturday 07:00 PM,2023020261,1,0,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
493,8478406,29,M. Blackwood,G,21/23,4/6,1/1,26/30,0.867,2,...,4,3434,SJS,FLA,2023-11-14 00:00:00,Tuesday 10:30 PM,2023020237,1,0,0
481,8478406,29,M. Blackwood,G,27/29,12/14,1/1,40/44,0.909,2,...,4,3600,SJS,ANA,2023-11-12,Sunday 08:00 PM,2023020226,1,0,0
464,8478039,36,K. Kahkonen,G,33/37,0/0,1/2,34/39,0.872,4,...,5,3600,SJS,VGK,2023-11-10,Friday 10:00 PM,2023020209,1,0,0
216,8478406,29,M. Blackwood,G,31/33,6/6,2/2,39/41,0.951,2,...,2,3600,SJS,EDM,2023-11-09,Thursday 10:30 PM,2023020203,1,0,4


In [36]:
def summary_statistics(df):
    temp_build = df.groupby('playerId').agg({
        'name': 'first',
        'team': 'last',
        'position': 'first',
        'toi': 'sum',
        'gamesPlayed': 'sum',
        'goals': 'sum',
        'assists': 'sum',
        'specialTeams': 'sum',
        'shots': 'sum',
        'hits': 'sum',
        'blockedShots': 'sum',
        'powerPlayToi': 'sum',
        'shorthandedToi': 'sum',
        'plusMinus': 'sum',
        'fantasyPoints': 'sum',
        'secondaryPosition': 'first',
        'tertiaryPosition': 'first'
    }).reset_index()


    temp_build['FPP60'] = (temp_build['fantasyPoints'] / temp_build['toi'] * 3600).round(2)
    temp_build['FPPG'] = (temp_build['fantasyPoints'] / temp_build['gamesPlayed']).round(2)

    temp_build = temp_build.sort_values(by='FPPG', ascending=False)
    
    return temp_build

# Summary Stats Skaters

In [37]:
last_10_df = last_10_df_team.copy()

last_10_df = last_10_df.drop(columns=(['faceoffWinningPctg']))

columns_to_convert1 = ['goals', 'assists', 'points', 'plusMinus', 'pim', 'hits', 'blockedShots',
                      'powerPlayGoals', 'powerPlayPoints', 'shorthandedGoals', 'shPoints', 'shots',
                       'toi', 'powerPlayToi', 'shorthandedToi']
last_10_df[columns_to_convert1] = last_10_df[columns_to_convert1].apply(pd.to_numeric)


for index, row in last_10_df.iterrows():
    
    if row['toi'] > 0:
        last_10_df.at[index, 'gamesPlayed'] = 1
    
    specialTeams = row['powerPlayPoints'] + row['shPoints']
    fantasyPoints = (row['goals']*2) + row['assists'] + (specialTeams * .5) + (row['blockedShots'] * .5) + ((row['hits'] + row['shots']) * .1)
    last_10_df.at[index, 'fantasyPoints'] = fantasyPoints
    last_10_df.at[index, 'specialTeams'] = specialTeams
    
columns_to_convert4 = ['specialTeams', 'gamesPlayed']
last_10_df[columns_to_convert4] = last_10_df[columns_to_convert4].astype(int)

summary_stats = summary_statistics(last_10_df).sort_values('fantasyPoints', ascending=False)
summary_stats

Unnamed: 0,playerId,name,team,position,toi,gamesPlayed,goals,assists,specialTeams,shots,hits,blockedShots,powerPlayToi,shorthandedToi,plusMinus,fantasyPoints,secondaryPosition,tertiaryPosition,FPP60,FPPG
152,8476453,N. Kucherov,TBL,R,11978,9,8,11,11,44,10,1,2232,1,5,38.4,W,F,11.54,4.27
499,8480069,C. Makar,COL,D,14689,10,2,17,7,26,5,19,2662,1897,14,37.1,D,D,9.09,3.71
191,8476887,F. Forsberg,NSH,L,11439,10,10,6,4,46,14,3,2623,20,4,35.5,W,F,11.17,3.55
163,8476468,J. Miller,VAN,C,11803,10,6,9,7,23,12,11,2369,825,1,33.5,C,F,10.22,3.35
298,8477956,D. Pastrnak,BOS,R,11590,10,5,12,10,51,3,2,2242,24,-2,33.4,W,F,10.37,3.34
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
300,8477962,B. Lemieux,CAR,L,484,1,0,0,0,0,1,0,0,0,0,0.1,W,F,0.74,0.10
27,8473563,N. Backstrom,WSH,C,744,1,0,0,0,0,1,0,58,0,0,0.1,C,F,0.48,0.10
689,8482765,I. Rosen,BUF,R,780,1,0,0,0,0,0,0,0,0,-2,0.0,W,F,0.00,0.00
698,8483512,M. Savoie,BUF,C,235,1,0,0,0,0,0,0,0,0,0,0.0,C,F,0.00,0.00


# Goalies


In [38]:
last_10_g_df = last_10_g_df_team.copy()

last_10_g_df[['saves', 'shots']] = last_10_g_df['saveShotsAgainst'].str.split('/', expand=True).apply(pd.to_numeric)
last_10_g_df[['evSaves', 'evShots']] = last_10_g_df['evenStrengthShotsAgainst'].str.split('/', expand=True).apply(pd.to_numeric)
last_10_g_df[['ppSaves', 'ppShots']] = last_10_g_df['powerPlayShotsAgainst'].str.split('/', expand=True).apply(pd.to_numeric)

last_10_g_df = last_10_g_df.drop(columns=(['saveShotsAgainst', 'savePctg', 'evenStrengthShotsAgainst',
                                   'powerPlayShotsAgainst', 'shorthandedShotsAgainst', 'evenStrengthGoalsAgainst',
                                  'powerPlayGoalsAgainst', 'shorthandedGoalsAgainst']))

columns_to_convert1 = ['toi', 'start', 'shutout', 'win', 'pim']
last_10_g_df[columns_to_convert1] = last_10_g_df[columns_to_convert1].apply(pd.to_numeric)

for index, row in last_10_g_df.iterrows():
    decisionPoints = 0
    shutoutPoints = 0
    decisionType = 0

    # ADD A GAMES PLAYED TALLY
    
    if row['toi'] > 0:
        last_10_g_df.at[index, 'gamesPlayed'] = 1

    # CALCULATE SHUTOUT POINTS

    if (row['shutout'] == 1):
        shutoutPoints = 3
    else:
        shutoutPoints = 0
        
    # CALCULATE DECISION POINTS
    
    if row['win'] == 4:
        decisionType = 'W'
    elif row['win'] == 1:
        decisionType = 'OTL'
    else:
        decisionType = 'L'
    
    decisionPoints = row['win']

    goalsAgainstPoints = (row['goalsAgainst']) * -2
    
    savesPoints = (row['shots'] - row['goalsAgainst']) * 0.2
    
    # TOTAL FANTASY POINTS
    
    fantasyPoints = decisionPoints + shutoutPoints + goalsAgainstPoints + savesPoints
    
    # ADD TO DATAFRAME
    
    last_10_g_df.at[index, 'fantasyPoints'] = fantasyPoints
    last_10_g_df.at[index, 'decisionType'] = decisionType
    
columns_to_convert3 = ['gamesPlayed']
last_10_g_df[columns_to_convert3] = last_10_g_df[columns_to_convert3].astype(int)

summary_statsG = last_10_g_df.groupby('playerId').agg({
    'name': 'first',
    'team': 'last',
    'position': 'first',
    'toi': 'sum',
    'gamesPlayed': 'sum',
    'saves': 'sum',
    'shots': 'sum',
#     'evenSaves': 'sum',
    'shutout': 'sum',
    'fantasyPoints': 'sum'
}).reset_index()

summary_statsG.sort_values('fantasyPoints', ascending=False).head(25)

Unnamed: 0,playerId,name,team,position,toi,gamesPlayed,saves,shots,shutout,fantasyPoints
7,8475660,C. Talbot,LAK,G,25172,7,195,206,1,44.0
47,8478499,A. Hill,VGK,G,21614,6,174,185,2,34.8
30,8477465,T. Jarry,PIT,G,23822,7,204,218,1,31.8
24,8476945,C. Hellebuyck,WPG,G,28737,8,210,229,1,31.0
52,8479292,C. Lindgren,WSH,G,17888,5,156,167,1,28.2
50,8478971,C. Ingram,ARI,G,20335,6,184,200,1,23.8
22,8476914,J. Korpisalo,OTT,G,23292,7,217,236,0,21.4
1,8471734,J. Quick,NYR,G,18444,5,144,158,1,20.8
69,8481035,S. Ersson,PHI,G,14608,4,97,104,1,20.4
39,8478009,I. Sorokin,NYI,G,22221,6,206,223,1,20.2


In [39]:
for index, row in team_names.iterrows():
    team_names.at[index, 'creaseMins'] = last_10_g_df.loc[last_10_g_df['team'] == row['abbreviation']]['toi'].sum()

team_names['creaseMins'] = team_names['creaseMins'].astype(int)

for index, row in summary_statsG.iterrows():
    
#     gamesRemaining = team_names.loc[team_names['abbreviation'] == row['team']]['gamesRemaining'].iloc[0] - row['missedGames']
#     summary_statsG.at[index, 'gamesRemaining'] = gamesRemaining
    
    creaseShare = row['toi'] / team_names.loc[team_names['abbreviation'] == row['team']]['creaseMins'].sum()
    summary_statsG.at[index, 'creaseShare'] = round(creaseShare * 100, 2)
    
    FPP60 = row['fantasyPoints'] / row['toi'] * 3600
    summary_statsG.at[index, 'FPP60'] = round(FPP60, 2)
    
    FPPG = row['fantasyPoints'] / row['gamesPlayed']
    summary_statsG.at[index, 'FPPG'] = round(FPPG, 2)
    
#     FPremain = FPP60 * gamesRemaining * creaseShare
#     summary_statsG.at[index, 'fantasyPointsRemain'] = round(FPremain, 2)

# summary_statsG['gamesRemaining'] = summary_statsG['gamesRemaining'].astype(int)

summary_statsG.loc[summary_statsG['team'] == 'VGK']

Unnamed: 0,playerId,name,team,position,toi,gamesPlayed,saves,shots,shutout,fantasyPoints,creaseShare,FPP60,FPPG
47,8478499,A. Hill,VGK,G,21614,6,174,185,2,34.8,60.27,5.8,5.8
62,8480313,L. Thompson,VGK,G,14245,4,97,107,0,0.4,39.73,0.1,0.1


In [40]:
trim_stats = summary_stats.drop(columns=['goals', 'assists', 'specialTeams', 'shots', 'hits', 'blockedShots', 'powerPlayToi',
                                        'shorthandedToi', 'plusMinus', 'secondaryPosition', 'tertiaryPosition'])

trim_statsG = summary_statsG.drop(columns=['saves', 'shots', 'shutout'])

column_order = ['playerId', 'name', 'team', 'position', 'toi', 'gamesPlayed', 'fantasyPoints', 'FPP60', 'FPPG']

trim_stats = trim_stats[column_order]
trim_statsG = trim_statsG[column_order]

print(trim_statsG.columns.to_list())
print(trim_stats.columns.to_list())

trimmed_stats = pd.concat([trim_stats, trim_statsG])
trimmed_stats = trimmed_stats.sort_values('fantasyPoints', ascending = False)
trimmed_stats

['playerId', 'name', 'team', 'position', 'toi', 'gamesPlayed', 'fantasyPoints', 'FPP60', 'FPPG']
['playerId', 'name', 'team', 'position', 'toi', 'gamesPlayed', 'fantasyPoints', 'FPP60', 'FPPG']


Unnamed: 0,playerId,name,team,position,toi,gamesPlayed,fantasyPoints,FPP60,FPPG
7,8475660,C. Talbot,LAK,G,25172,7,44.0,6.29,6.29
152,8476453,N. Kucherov,TBL,R,11978,9,38.4,11.54,4.27
499,8480069,C. Makar,COL,D,14689,10,37.1,9.09,3.71
191,8476887,F. Forsberg,NSH,L,11439,10,35.5,11.17,3.55
47,8478499,A. Hill,VGK,G,21614,6,34.8,5.80,5.80
...,...,...,...,...,...,...,...,...,...
17,8476341,A. Forsberg,OTT,G,12973,4,-10.0,-2.77,-2.50
74,8482821,A. Soderblom,CHI,G,18093,6,-10.6,-2.11,-1.77
5,8474596,J. Allen,MTL,G,11563,4,-11.6,-3.61,-2.90
64,8480843,L. Dostal,ANA,G,7050,2,-14.0,-7.15,-7.00


In [41]:
rosters = requests.get(espn_link).json()

# pprint(rosters[9])

rostership = pd.DataFrame()

for player in rosters:
    n = player['fullName']
    percent = player['ownership']['percentOwned']
    pos = player['eligibleSlots']
    dpos = player['defaultPositionId']
    if (n == 'Sebastian Aho') & (dpos == 4):
        n = 'Sebastian Aho (D)'
#     print(n)
    temp = pd.DataFrame({'fullName': [n], 'rostered': [percent], 'pos': [dpos]})
    rostership = pd.concat([rostership, temp], ignore_index=True)
    
rostership.head()

fixes = {
    'Tim Stutzle': 'Tim Stützle',
    'Alex Barre-Boulet': 'Alex Barré-Boulet',
    'Jani Hakanpaa': 'Jani Hakanpää',
    'Jesse Ylonen': 'Jesse Ylönen',
    'Alexis Lafreniere': 'Alexis Lafrenière',
    'Gustav Lindstrom': 'Gustav Lindström',
    'Alexander Kerfoot': 'Alex Kerfoot',
    'Johnny Beecher': 'John Beecher',
    'Samuel Walker': 'Sammy Walker',
    'Maxime Lajoie': 'Max Lajoie'
}

rostership['fullName'].replace(fixes, inplace=True)

pos_fixes = {
    1: 'C',
    2: 'LW',
    3: 'RW',
    4: 'D',
    5: 'G'
}

rostership['pos'].replace(pos_fixes, inplace=True)

rostership.loc[rostership['fullName'] == 'Sebastian Aho (D)']

Unnamed: 0,fullName,rostered,pos
1510,Sebastian Aho (D),0.834676,D


In [42]:
player_bios = pd.read_csv('data/playerbios.csv', encoding='utf-8')
player_bios = player_bios.rename(columns={'id': 'playerId', 'name': 'fullName'})

In [43]:
espn_ids = pd.read_csv('data/espn_ids.csv', encoding='utf-8')
# espn_ids = espn_ids.rename(columns={'id': 'espnId'})

for index, row in espn_ids.iterrows():
    playerId = row['id,fullName'].split(',')[0]
    fullName = row['id,fullName'].split(',')[1]
    
    espn_ids.at[index, 'espnId'] = playerId
    espn_ids.at[index, 'fullName'] = fullName

    
fixes = {
    'Tim Stutzle': 'Tim Stützle',
    'Jani Hakanpaa': 'Jani Hakanpää',
    'Benoit-Olivier Groulx': 'Bo Groulx',
    'Jesse Ylonen': 'Jesse Ylönen',
    'Alexis Lafreniere': 'Alexis Lafrenière',
    'Gustav Lindstrom': 'Gustav Lindström',
    'Alexander Kerfoot': 'Alex Kerfoot',
#     'Johnny Beecher': 'John Beecher',
    'Samuel Walker': 'Sammy Walker',
    'Alex Barre-Boulet': 'Alex Barré-Boulet'
}

inverse_fixes = {value: key for key, value in fixes.items()}

espn_ids['fullName'].replace(fixes, inplace=True)
    
espn_ids.loc[espn_ids['fullName'] == 'Sebastian Aho (D)']

Unnamed: 0,"id,fullName",espnId,fullName
1083,"4272688,Sebastian Aho (D)",4272688,Sebastian Aho (D)


In [44]:
bios_on_date = player_bios.copy()

for index, row in bios_on_date.iterrows():
    try:
        roster_percent = rostership.loc[rostership['fullName'] == row['fullName']]['rostered'].iloc[0]
        default_pos = rostership.loc[rostership['fullName'] == row['fullName']]['pos'].iloc[0]
    except:
        roster_percent = -2
        default_pos = 'S'
        
    bios_on_date.at[index, 'roster_percent'] = roster_percent
    bios_on_date.at[index, 'default_pos'] = default_pos
    
bios_on_date.sort_values('roster_percent')

bios_on_date.loc[bios_on_date['fullName'] == 'Sebastian Aho (D)']

Unnamed: 0,playerId,fullName,birthDate,shootsCatches,height,weight,roster_percent,default_pos
445,8480222,Sebastian Aho (D),1996-02-17,L,70,180,0.834676,D


In [45]:
for index, row in bios_on_date.iterrows():
    try:
        espnId = espn_ids.loc[espn_ids['fullName'] == row['fullName']]['espnId'].iloc[0]
    
        bios_on_date.at[index, 'espnId'] = espnId
    except:
        bios_on_date.at[index, 'espnId'] = 0
    
bios_on_date['espnId'] = bios_on_date['espnId'].astype(int)

bios_on_date.loc[bios_on_date['espnId'] == 0]

Unnamed: 0,playerId,fullName,birthDate,shootsCatches,height,weight,roster_percent,default_pos,espnId
633,8479320,Max Lajoie,1997-11-05,L,73,191,0.013179,D,0
641,8482411,Hunter Shepard,1995-11-07,L,72,215,0.054259,G,0
651,8483482,Tristan Luneau,2004-01-12,R,73,195,0.030754,D,0
654,8483489,Fraser Minten,2004-07-05,L,74,192,0.060036,C,0
656,8482470,Ilya Solovyov,2000-07-20,L,75,208,0.007322,D,0
663,8483512,Matt Savoie,2004-01-01,R,69,179,0.188895,C,0
672,8482511,Mason Lohrei,2001-01-17,L,77,211,0.383658,D,0
677,8480992,Magnus Chrona,2000-08-28,L,76,194,0.016133,G,0
693,8481534,Raphael Lavoie,2000-09-25,R,76,215,0.02636,C,0
697,8481028,Martin Pospisil,1999-11-19,L,74,173,2.671119,LW,0


In [46]:
summary_stats_snapshot = pd.merge(trimmed_stats, bios_on_date, how='left', on='playerId')
summary_stats_snapshot

Unnamed: 0,playerId,name,team,position,toi,gamesPlayed,fantasyPoints,FPP60,FPPG,fullName,birthDate,shootsCatches,height,weight,roster_percent,default_pos,espnId
0,8475660,C. Talbot,LAK,G,25172,7,44.0,6.29,6.29,Cam Talbot,1987-07-05,L,76,200,80.380712,G,5734
1,8476453,N. Kucherov,TBL,R,11978,9,38.4,11.54,4.27,Nikita Kucherov,1993-06-17,L,71,182,99.876999,RW,2563060
2,8480069,C. Makar,COL,D,14689,10,37.1,9.09,3.71,Cale Makar,1998-10-30,R,71,187,99.907738,D,4233563
3,8476887,F. Forsberg,NSH,L,11439,10,35.5,11.17,3.55,Filip Forsberg,1994-08-13,R,73,205,89.145652,LW,2968772
4,8478499,A. Hill,VGK,G,21614,6,34.8,5.80,5.80,Adin Hill,1996-05-11,L,76,215,92.994060,G,3939714
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
783,8476341,A. Forsberg,OTT,G,12973,4,-10.0,-2.77,-2.50,Anton Forsberg,1992-11-27,L,75,195,1.529527,G,3036851
784,8482821,A. Soderblom,CHI,G,18093,6,-10.6,-2.11,-1.77,Arvid Soderblom,1999-08-19,L,75,180,0.813962,G,4894729
785,8474596,J. Allen,MTL,G,11563,4,-11.6,-3.61,-2.90,Jake Allen,1990-08-07,L,74,197,6.464196,G,5111
786,8480843,L. Dostal,ANA,G,7050,2,-14.0,-7.15,-7.00,Lukas Dostal,2000-06-22,L,74,188,9.886339,G,4588165


In [47]:
fileName = f"data/summary_stats_last{x}-{today.strftime('%Y-%m-%d')}.csv"
summary_stats_snapshot.to_csv(fileName, index=False)