In [31]:
import pandas as pd
from tqdm import tqdm
import requests
from sqlite3 import connect
import time as tm

In [32]:
# Load data

shots = pd.read_csv('./shots/2024_shots_g.csv', encoding='latin-1')

In [34]:
# Create dictionaries for goalie statistics
goalies = {}

for i in tqdm(range(0, len(shots))):
    shot = shots.iloc[i]
    if str(shot['goalie_id']) == 'nan':
        continue
    goalie_id = str(int(shot['goalie_id']))[:7]
    goalies[goalie_id] = {}

100%|██████████| 57413/57413 [00:02<00:00, 23558.18it/s]


In [35]:
# Update Dictionaries with other statistics for goalies
shootout = requests.get("https://api.nhle.com/stats/rest/en/goalie/shootout?isAggregate=false&isGame=false&sort=%5B%7B%22property%22:%22shootoutWins%22,%22direction%22:%22DESC%22%7D,%7B%22property%22:%22shootoutSavePct%22,%22direction%22:%22DESC%22%7D,%7B%22property%22:%22playerId%22,%22direction%22:%22ASC%22%7D%5D&start=0&limit=500&factCayenneExp=gamesPlayed%3E=1&cayenneExp=gameTypeId=2%20and%20seasonId%3C=20232024%20and%20seasonId%3E=20232024").json();
for player_id, stats in tqdm(goalies.items()):
    player = requests.get(f'https://api-web.nhle.com/v1/player/{player_id}/landing').json()
    stats['position'] = 'G'

    # Populate Dictionary with relevant data
    stats['firstName'] = player['firstName']["default"]
    stats['lastName'] = player['lastName']["default"]
    stats['birthDate'] = player['birthDate']
    stats['nationality'] = player['birthCountry']
    stats['position'] = player['position']
    stats['height'] = player['heightInInches']
    stats['weight'] = player['weightInPounds']
    stats['team'] = player["fullTeamName"]["default"]
    stats['handedness'] = player['shootsCatches']

    # TOI
    nhl_seasons = list(filter(lambda x: x['leagueAbbrev'] == 'NHL' and x["season"] == 20232024, player['seasonTotals']))
    total_toi =0 
    for nhl_seas in nhl_seasons:
        seas = nhl_seas["timeOnIce"]
        m, s = map(int, seas.split(':'))
        total_toi += m + s/60

    stats['TOI'] = total_toi
    stats['starts'] = nhl_seasons[-1]["gamesStarted"]

    shootout_stats = list(filter(lambda x: int(x["playerId"]) == int(player_id), shootout["data"]))

    if len(shootout_stats) == 0:
        stats['shootout'] = 0
    else:
        stats['shootout'] = shootout_stats[0]["shootoutSavePct"]

    goalies[player_id] = stats

100%|██████████| 92/92 [00:19<00:00,  4.82it/s]


In [36]:
# Other stats

for player_id, stats in tqdm(goalies.items()):
    if stats['TOI'] == 0:
        continue
    # Low Danger Shots
    xG = shots[(shots['goalie_id'].astype(str).str[:7] == player_id) & (shots['xGF'] < 0.05)]['xGF'].sum()
    goals = shots[(shots['goalie_id'].astype(str).str[:7] == player_id) & (shots['xGF'] < 0.05)]['isGoal'].sum()

    stats['low_danger'] = (xG - goals) /stats['TOI']

    # Medium Danger Shots
    xG = shots[(shots['goalie_id'].astype(str).str[:7] == player_id) & (shots['xGF'] > 0.05) & (shots['xGF'] < 0.15)]['xGF'].sum()
    goals = shots[(shots['goalie_id'].astype(str).str[:7] == player_id) & (shots['xGF'] > 0.05) & (shots['xGF'] < 0.15)]['isGoal'].sum()
    freq = len(shots[(shots['goalie_id'].astype(str).str[:7] == player_id) & (shots['xGF'] > 0.05) & (shots['xGF'] < 0.15)]['xGF'])


    stats['medium_danger'] = (xG - goals) /stats['TOI']
    stats['medium_danger_freq'] = (freq / stats['TOI']) * 60 if stats['TOI'] != 0 else 0

    # High Danger Shots
    xG = shots[(shots['goalie_id'].astype(str).str[:7] == player_id) & (shots['xGF'] > 0.15)]['xGF'].sum()
    goals = shots[(shots['goalie_id'].astype(str).str[:7] == player_id) & (shots['xGF'] > 0.15)]['isGoal'].sum()
    freq = len(shots[(shots['goalie_id'].astype(str).str[:7] == player_id) & (shots['xGF'] > 0.15)]['xGF'])

    stats['high_danger'] = (xG - goals) /stats['TOI']
    stats['high_danger_freq'] = (freq / stats['TOI']) * 60 if stats['TOI'] != 0 else 0
    
    # PK
    xG = shots[(shots['goalie_id'].astype(str).str[:7] == player_id) & (shots['strength'] == 'Powerplay')]['xGF'].sum()
    goals = sum(shots[(shots['goalie_id'].astype(str).str[:7] == player_id) & (shots['strength'] == 'PP')]['isGoal'])


    stats['pk'] = (xG - goals) /stats['TOI']

    # EV
    xG = sum(shots[(shots['goalie_id'].astype(str).str[:7] == player_id) & (shots['strength'] == 'EV')]['xGF'])
    goals = sum(shots[(shots['goalie_id'].astype(str).str[:7] == player_id) & (shots['strength'] == 'EV')]['isGoal'])

    stats['ev'] = (xG - goals) /stats['TOI']

goalies[player_id] = stats


100%|██████████| 92/92 [00:30<00:00,  3.01it/s]


In [37]:
# Get Shots Per Player
shots_goals = shots[shots['isGoal'] == 1]
for i in tqdm(range(0, len(shots_goals))):
    shot = shots_goals.iloc[i]
    if str(shot['event']) != 'goal' or str(shot['goalie_id']) == 'nan':
        continue
    stats = goalies[str(int(shot['goalie_id']))[:7]]
    goalie_id = str(int(shot['goalie_id']))[:7]

    player_shots = stats.get('shots', [])

    event_id = str(shot['shot_id'])
    game = shot['gameID']
    link = ['No link found.']

    # Grab media (shoutout trusty)
    if event_id != 'nan':
        failed = True
        while failed:
            try:
                media = requests.get(f'https://forge-dapi.d3.nhle.com/v2/content/en-US/videos?context.slug=nhl&tags.slug=highlight&tags.slug=gameid-{game}').json()
                failed = False
            except (requests.exceptions.RequestException,
                ConnectionResetError) as err:
                tm.sleep(20)
                print('Taking a break...')
        highlights =  media["items"]
        shot_list = list(filter(lambda x: "fields" in x.keys(), highlights))
        shot_list = list(map(lambda x: x["fields"], shot_list))
        shot_list = list(filter(lambda x: "statsEventId" in x.keys() and x["statsEventId"] == event_id, shot_list))
        if len(shot_list) != 0:
            shot_id = shot_list[0]['statsEventId']
            if str(event_id) == shot_id:
                link = f'https://players.brightcove.net/6415718365001/EXtG1xJ7H_default/index.html?videoId={shot_list[0]["brightcoveId"]}'


    shooter = shot['shooter']


    player_shots.append({'date': shot['date'], 'shooter': shooter, 'goalie': shot['goalie'], 'strength': shot['strength'], 'away_goals': shot['away_goals'], 'home_goals': shot['home_goals'], 'home_team': shot['home'], 'away_team': shot['away'], 'x': shot['x'], 'y': shot['y'], 'prev_event': shot['prev_event'], 'type': shot['shotType'], 'xG': shot['xGF'], 'Outcome': shot['event'], 'Link': link})

    stats['shots'] = player_shots
    goalies[goalie_id] = stats

100%|██████████| 5486/5486 [10:56<00:00,  8.36it/s]


In [38]:
conn = connect("player_stats.db")
curr = conn.cursor()

# create a table 
curr.execute('CREATE TABLE IF NOT EXISTS goalies_2024 (TOI NUMBER, starts NUMBER, shootout NUMBER, shots TEXT, low_danger NUMBER, medium_danger NUMBER, medium_danger_freq NUMBER, high_danger NUMBER, high_danger_freq NUMBER,  pk NUMBER, ev NUMBER  firstName TEXT, lastName TEXT, birthDate TEXT, nationality TEXT, position TEXT, height TEXT, weight NUMBER, team TEXT, handedness TEXT, player_id TEXT)')
 
# commit the query
conn.commit()

In [39]:
# conn = connect("player_stats.db")
# curr = conn.cursor()
# conn.execute("VACUUM")
# conn.close()

In [40]:
# Convert Dict to Pandas

players = []

for player_id, stats in goalies.items():
    stats['player_id'] = str(player_id)
    players.append(pd.DataFrame.from_dict([stats]))

df = pd.concat(players)
    

In [41]:
# Make sure types match up
df['firstName'] = df['firstName'].astype(str)
df['lastName'] = df['lastName'].astype(str)
df['birthDate'] = df['birthDate'].astype(str)
df['nationality'] = df['nationality'].astype(str)
df['height'] = df['height'].astype(str)
df['team'] = df['team'].astype(str)
df['handedness'] = df['handedness'].astype(str)
df['player_id'] = df['player_id'].astype(str)
df['shots'] = df['shots'].astype(str)

In [42]:
df = df[df['TOI'] >= 300]

In [43]:
df['TOI'] = df.TOI.rank(pct = True)
df['starts'] = df.starts.rank(pct = True)
df['shootout'] = df.shootout.rank(pct = True)
df['low_danger'] = df.low_danger.rank(pct = True)
df['medium_danger'] = df.medium_danger.rank(pct = True)
df['medium_danger_freq'] = df.medium_danger_freq.rank(pct = True)
df['high_danger'] = df.high_danger.rank(pct = True)
df['high_danger_freq'] = df.high_danger_freq.rank(pct = True)
df['pk'] = df.pk.rank(pct = True)
df['ev'] = df.ev.rank(pct = True)

In [44]:
# Dump to SQL
df.to_sql('goalies_2024', conn, if_exists='replace')

74

In [45]:
conn.close()