In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress
from pprint import pprint
from datetime import datetime, timedelta, timezone
import pytz
import scipy.stats
from dateutil.relativedelta import relativedelta

import warnings

# Suppress the specific warnings
warnings.filterwarnings("ignore")

today = datetime.utcnow()

from api_keys import espn_link

In [2]:
homeOnlySked = pd.read_csv('data/sked.csv', index_col=False)
homeOnlySked

completeSked = pd.read_csv('data/sked_full.csv', index_col=False)
completeSked

team_names = pd.read_csv('data/team_names.csv', index_col=False)
team_names

file_name = f"data/allG_df_fp-{today.strftime('%Y-%m-%d')}.csv"
allG_df = pd.read_csv(file_name, index_col=False)
allG_df

file_name = f"data/all_df_fp-{today.strftime('%Y-%m-%d')}.csv"
all_df = pd.read_csv(file_name, index_col=False)
all_df

file_name = f"data/summary_statsG-{today.strftime('%Y-%m-%d')}.csv"
summary_statsG = pd.read_csv(file_name, index_col=False)

file_name = f"data/summary_statsS-{today.strftime('%Y-%m-%d')}.csv"
summary_statsS = pd.read_csv(file_name, index_col=False)

team_names_only = team_names[['abbreviation', 'name']]

all_df_forwards =  all_df.loc[all_df['position'] != 'D']
all_df_defense =  all_df.loc[all_df['position'] == 'D']

all_df_centers =  all_df.loc[all_df['secondaryPosition'] == 'C']
all_df_wingers =  all_df.loc[all_df['secondaryPosition'] == 'W']

file_name = f"data/summary_stats-{today.strftime('%Y-%m-%d')}.csv"
player_bios = pd.read_csv(file_name, index_col=False)

In [3]:
espn_teams = {
    'BOS': 'Bos',
    'VGK': 'Vgk',
    'NYR': 'NYR',
    'VAN': 'Van',
    'LAK': 'LA',
    'DAL': 'Dal',
    'FLA': 'Fla',
    'COL': 'Col',
    'WPG': 'Wpg',
    'TOR': 'Tor',
    'WSH': 'Wsh',
    'CAR': 'Car',
    'TBL': 'TB',
    'PHI': 'Phi',
    'DET': 'Det',
    'ANA': 'Ana',
    'ARI': 'Ari',
    'SEA': 'Sea',
    'NJD': 'NJ',
    'STL': 'StL',
    'NYI': 'NYI',
    'OTT': 'Ott',
    'PIT': 'Pit',
    'MTL': 'Mon',
    'BUF': 'Buf',
    'CGY': 'Cgy',
    'MIN': 'Min',
    'NSH': 'Nsh',
    'CBJ': 'Cls',
    'EDM': 'Edm',
    'CHI': 'Chi',
    'SJS': 'SJ'
}

team_names_only['espn_name'] = team_names_only['abbreviation'].map(espn_teams)
team_names_only

Unnamed: 0,abbreviation,name,espn_name
0,NYR,New York Rangers,NYR
1,BOS,Boston Bruins,Bos
2,VGK,Vegas Golden Knights,Vgk
3,COL,Colorado Avalanche,Col
4,LAK,Los Angeles Kings,LA
5,VAN,Vancouver Canucks,Van
6,FLA,Florida Panthers,Fla
7,DAL,Dallas Stars,Dal
8,WPG,Winnipeg Jets,Wpg
9,DET,Detroit Red Wings,Det


In [4]:
allG_df['fantasyPoints'].describe()


count    695.000000
mean       1.720863
std        5.177196
min      -14.800000
25%       -2.400000
50%        1.400000
75%        5.500000
max       15.200000
Name: fantasyPoints, dtype: float64

In [5]:
goalies = player_bios.loc[player_bios['position'] == 'G']
for index, row in allG_df.iterrows():
    allG_df.at[index, 'available'] = round(100 - goalies.loc[goalies['playerId'] == row['playerId'], 'roster_percent'].iloc[0], 2)

    
allG_df    

Unnamed: 0,playerId,sweaterNumber,name,position,pim,goalsAgainst,toi,team,opponent,gameDate,...,saves,shots,evSaves,evShots,ppSaves,ppShots,gamesPlayed,fantasyPoints,decisionType,available
0,8477992,31,J. Johansson,G,0,3,3600,TBL,NSH,2023-10-10,...,28,31,21,23,6,7,1,3.6,W,61.94
1,8477465,35,T. Jarry,G,0,3,3450,PIT,CHI,2023-10-10,...,32,35,26,29,5,5,1,0.4,L,16.39
2,8478499,33,A. Hill,G,0,1,3600,VGK,SEA,2023-10-10,...,32,33,22,23,10,10,1,8.4,W,6.91
3,8475883,31,F. Andersen,G,0,3,3600,CAR,OTT,2023-10-11,...,27,30,17,19,6,6,1,3.4,W,53.03
4,8478492,35,I. Samsonov,G,0,5,3726,TOR,MTL,2023-10-11,...,19,24,16,20,3,4,1,-2.2,W,34.22
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
690,8480045,1,U. Luukkonen,G,0,1,3600,BUF,NYR,2023-11-27 00:00:00,...,25,26,22,22,3,4,1,7.0,W,93.63
691,8475683,72,S. Bobrovsky,G,0,0,3600,FLA,OTT,2023-11-27 00:00:00,...,20,20,15,15,4,4,1,11.0,W,29.30
692,8476883,88,A. Vasilevskiy,G,0,3,3512,TBL,COL,2023-11-27 00:00:00,...,19,22,16,18,3,4,1,-2.2,L,6.12
693,8478499,33,A. Hill,G,0,2,3875,VGK,CGY,2023-11-27 00:00:00,...,32,34,25,27,4,4,1,3.4,OTL,6.91


In [23]:
top_goalies = goalies.sort_values('fantasyPoints', ascending=False)
top_goalies = top_goalies.head(24)
top_goalies_list = top_goalies['name'].tolist()
filtered_box = allG_df[allG_df['name'].isin(top_goalies_list)]

filtered_box['name'].nunique()


24

In [24]:
filtered_box['fantasyPoints'].describe()

count    299.000000
mean       3.022074
std        5.203424
min       -8.600000
25%       -0.800000
50%        3.200000
75%        6.700000
max       15.200000
Name: fantasyPoints, dtype: float64

In [6]:
owned_goalies = allG_df.loc[allG_df['available'] <= 60]
goalie_list = owned_goalies['name'].unique()
owned_goalies['name'].nunique()

24

In [7]:
owned_goalies['fantasyPoints'].describe()

count    303.000000
mean       2.385479
std        5.187535
min       -8.800000
25%       -1.700000
50%        2.600000
75%        6.400000
max       15.200000
Name: fantasyPoints, dtype: float64

In [8]:
notowned_goalies = allG_df.loc[allG_df['available'] > 60]
notowned_goalies['name'].nunique()

52

In [9]:
notowned_goalies['fantasyPoints'].describe()

count    392.000000
mean       1.207143
std        5.116859
min      -14.800000
25%       -2.600000
50%        1.000000
75%        4.800000
max       14.600000
Name: fantasyPoints, dtype: float64

In [10]:
average = owned_goalies['fantasyPoints'].mean()

In [11]:
goalie_list
filtered_goalies = goalies[goalies['name'].isin(goalie_list)]
remaining_goalies = goalies[~goalies['name'].isin(goalie_list)]

filtered_goalies.sort_values('roster_percent', ascending=False, inplace=True)
remaining_goalies.sort_values('roster_percent', ascending=False, inplace=True)

In [31]:
for index, row in filtered_goalies.iterrows():
    temp = allG_df.loc[allG_df['playerId'] == row['playerId']]
    temp.sort_values('gameDate', ascending=True, inplace=True)
    teamESPN = team_names_only.loc[team_names_only['abbreviation'] == row['team'], 'name'].iloc[0]
    crease = summary_statsG.loc[summary_statsG['playerId'] == row['playerId'], 'creaseShare'].iloc[0]
    aas = 0
    bas = 0
    aar = 0
    bar = 0
    neg = 0
    tot = 0
    for i, r in temp.iterrows():
        if r['start'] == 1:
            if r['fantasyPoints'] > average:
                aas += 1
            else:
                bas += 1
        else:
            if r['fantasyPoints'] > average:
                aar += 1
            else:
                bar += 1
        if r['fantasyPoints'] < 0:
            neg += 1
            tot += r['fantasyPoints']
    print(f"<p><b>{row['fullName']}, G, {teamESPN}</b> (rostered in: {round(row['roster_percent'], 1)}%) <br />")
    print(f"Above average: {aas}; Below average: {bas}; Relief (above/below): {aar}/{bar}; Minus appearances: {neg} ({round(tot, 1)} total) <br />")
    print(f"{str(round((aas+aar)/(aas+bas+aar+bar)*100, 1))}% of appearances above average; {round(crease, 1)}% crease share; {round(row['fantasyPoints'], 1)} fantasy points</p>")
    print()
    print(f"<p>")
    print()

<p><b>Igor Shesterkin, G, New York Rangers</b> (rostered in: 99.6%) <br />
Above average: 8; Below average: 4; Relief (above/below): 0/0; Minus appearances: 4 (-15.2 total) <br />
66.7% of appearances above average; 57.5% crease share; 35.6 fantasy points</p>

<p>

<p><b>Connor Hellebuyck, G, Winnipeg Jets</b> (rostered in: 99.2%) <br />
Above average: 9; Below average: 6; Relief (above/below): 0/0; Minus appearances: 5 (-14.8 total) <br />
60.0% of appearances above average; 74.9% crease share; 42.8 fantasy points</p>

<p>

<p><b>Jake Oettinger, G, Dallas Stars</b> (rostered in: 98.9%) <br />
Above average: 9; Below average: 5; Relief (above/below): 0/0; Minus appearances: 3 (-12.4 total) <br />
64.3% of appearances above average; 73.8% crease share; 35.4 fantasy points</p>

<p>

<p><b>Linus Ullmark, G, Boston Bruins</b> (rostered in: 98.2%) <br />
Above average: 7; Below average: 3; Relief (above/below): 0/1; Minus appearances: 4 (-12.2 total) <br />
63.6% of appearances above averag

In [50]:
for index, row in remaining_goalies.iterrows():
    temp = allG_df.loc[allG_df['playerId'] == row['playerId']]
    temp.sort_values('gameDate', ascending=True, inplace=True)
    teamESPN = team_names_only.loc[team_names_only['abbreviation'] == row['team'], 'name'].iloc[0]
    crease = summary_statsG.loc[summary_statsG['playerId'] == row['playerId'], 'creaseShare'].iloc[0]
    aas = 0
    bas = 0
    aar = 0
    bar = 0
    neg = 0
    tot = 0
    for i, r in temp.iterrows():
        if r['start'] == 1:
            if r['fantasyPoints'] > average:
                aas += 1
            else:
                bas += 1
        else:
            if r['fantasyPoints'] > average:
                aar += 1
            else:
                bar += 1
        if r['fantasyPoints'] < 0:
            neg += 1
            tot += r['fantasyPoints']
    print(f"<p><b>{row['fullName']}, G, {teamESPN}</b> (rostered in: {round(row['roster_percent'], 1)}%) <br />")
    print(f"Above average: {aas}; Below average: {bas}; Relief (above/below): {aar}/{bar}; Minus appearances: {neg} ({round(tot, 1)} total) <br />")
    print(f"{str(round((aas+aar)/(aas+bas+aar+bar)*100, 1))}% of appearances above average; {round(crease, 1)}% crease share; {round(row['fantasyPoints'], 1)} fantasy points</p>")
    print()
    print(f"<p>")
    print()

<p><b>Jonas Johansson, G, Tampa Bay Lightning</b> (rostered in: 38.1%) <br />
Above average: 7; Below average: 10; Relief (above/below): 0/0; Minus appearances: 5 (-19.0 total) <br />
41.2% of appearances above average; 77.6% crease share; 25.2 fantasy points</p>

<p>

<p><b>Jonathan Quick, G, New York Rangers</b> (rostered in: 32.3%) <br />
Above average: 5; Below average: 2; Relief (above/below): 0/1; Minus appearances: 0 (0 total) <br />
62.5% of appearances above average; 37.5% crease share; 41.0 fantasy points</p>

<p>

<p><b>Joonas Korpisalo, G, Ottawa Senators</b> (rostered in: 31.3%) <br />
Above average: 5; Below average: 6; Relief (above/below): 0/1; Minus appearances: 4 (-14.8 total) <br />
41.7% of appearances above average; 63.2% crease share; 14.4 fantasy points</p>

<p>

<p><b>Pyotr Kochetkov, G, Carolina Hurricanes</b> (rostered in: 29.0%) <br />
Above average: 2; Below average: 3; Relief (above/below): 0/2; Minus appearances: 5 (-14.0 total) <br />
28.6% of appearances

In [46]:
negative_starts = allG_df.loc[allG_df['fantasyPoints'] < 0]
negative_starts = negative_starts.loc[negative_starts['start'] == 1]
negative_starts['opponent'].value_counts()
negatives = negative_starts.groupby('opponent').sum().sort_values('start', ascending=False)

print("<p>")
for index, row in negatives.iterrows():
    team = team_names_only.loc[team_names_only['abbreviation'] == index, 'name'].iloc[0]
    print(f"{index}: {int(row['start'])} negatives; {round(row['fantasyPoints'], 1)} fantasy points <br />")
print("</p>")

<p>
COL: 14 negatives; -50.8 fantasy points <br />
TBL: 12 negatives; -56.4 fantasy points <br />
NYR: 12 negatives; -36.2 fantasy points <br />
LAK: 12 negatives; -44.6 fantasy points <br />
DAL: 11 negatives; -33.8 fantasy points <br />
DET: 11 negatives; -40.6 fantasy points <br />
WPG: 10 negatives; -29.0 fantasy points <br />
VAN: 10 negatives; -52.2 fantasy points <br />
NJD: 9 negatives; -23.2 fantasy points <br />
BOS: 9 negatives; -19.4 fantasy points <br />
VGK: 9 negatives; -34.6 fantasy points <br />
TOR: 9 negatives; -34.0 fantasy points <br />
NSH: 8 negatives; -24.8 fantasy points <br />
PHI: 8 negatives; -29.2 fantasy points <br />
ANA: 8 negatives; -26.4 fantasy points <br />
FLA: 8 negatives; -23.4 fantasy points <br />
MTL: 7 negatives; -10.4 fantasy points <br />
BUF: 7 negatives; -20.2 fantasy points <br />
CAR: 7 negatives; -22.8 fantasy points <br />
OTT: 7 negatives; -29.8 fantasy points <br />
EDM: 7 negatives; -30.4 fantasy points <br />
PIT: 7 negatives; -24.

In [51]:
skinner = allG_df.loc[allG_df['name'] == 'A. Hill']
skinner = skinner.loc[skinner['gameDate'] >= '2023-11-12']
# skinner.groupby('name').sum()
skinner

Unnamed: 0,playerId,sweaterNumber,name,position,pim,goalsAgainst,toi,team,opponent,gameDate,...,saves,shots,evSaves,evShots,ppSaves,ppShots,gamesPlayed,fantasyPoints,decisionType,available
541,8478499,33,A. Hill,G,0,5,3600,VGK,MTL,2023-11-16 00:00:00,...,23,28,17,22,4,4,1,-1.4,W,6.91
571,8478499,33,A. Hill,G,0,2,3594,VGK,PIT,2023-11-19 00:00:00,...,30,32,26,28,4,4,1,2.0,L,6.91
623,8478499,33,A. Hill,G,0,1,3717,VGK,DAL,2023-11-22 00:00:00,...,31,32,27,28,3,3,1,8.2,W,6.91
693,8478499,33,A. Hill,G,0,2,3875,VGK,CGY,2023-11-27 00:00:00,...,32,34,25,27,4,4,1,3.4,OTL,6.91


In [55]:
summary_statsG.sort_values('fantasyPoints')

Unnamed: 0,playerId,name,team,position,toi,gamesPlayed,saves,shots,shutout,fantasyPoints,missedGames,creaseShare,gamesRemaining,FPP60,FPPG,fantasyPointsRemain
75,8482821,A. Soderblom,CHI,G,32396,10,255,289,0,-9.0,0,47.67,30,-1.00,-0.90,-30.04
10,8475789,J. Campbell,EDM,G,16007,5,137,157,0,-8.6,0,22.37,13,-1.93,-1.72,-26.83
56,8479406,F. Gustavsson,MIN,G,36534,11,296,336,1,-7.8,0,53.09,33,-0.77,-0.71,-25.71
17,8476341,A. Forsberg,OTT,G,22557,7,125,147,0,-7.0,0,36.77,23,-1.12,-1.00,-26.70
67,8480992,M. Chrona,SJS,G,1830,1,13,17,0,-5.4,0,2.32,1,-10.62,-5.40,-14.76
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30,8477465,T. Jarry,PIT,G,50175,15,379,413,3,44.8,0,70.00,43,3.21,2.99,139.51
8,8475683,S. Bobrovsky,FLA,G,60661,17,426,468,2,48.2,0,81.05,49,2.86,2.84,141.42
34,8477967,T. Demko,VAN,G,52944,15,395,427,2,61.0,0,67.19,40,4.15,4.07,167.21
7,8475660,C. Talbot,LAK,G,49933,14,376,404,1,63.2,0,72.50,45,4.56,4.51,208.11
