
# NHL Team Game Stats Extraction

This notebook collects per-game team statistics for the entire 2024‑25 NHL regular season.

It assembles a comprehensive dataset that captures each team's performance in every game, including basic boxscore metrics, special teams efficiency, goaltending efficiency, possession proxies (Corsi and Fenwick), discipline metrics, and contextual information such as home/away status and opponent.

The resulting DataFrame will allow you to calculate moving averages, identify trends, and build predictive models for the upcoming season.

> **Note:** The functions defined below use the NHL's public `api-web.nhle.com` endpoints.  They require an active internet connection and may take several minutes to run for an entire season.


In [None]:

import requests
import pandas as pd
from datetime import datetime, timedelta
import time

# Base URL templates
SCHEDULE_URL_TEMPLATE = "https://api-web.nhle.com/v1/schedule/{date}"
GAME_FEED_URL_TEMPLATE = "https://api-web.nhle.com/v1/gamecenter/{game_pk}/feed/live"


def get_daily_schedule(date_str):
    # Retrieve the schedule for a single date and include only regular season games (gameType == 2)
    url = SCHEDULE_URL_TEMPLATE.format(date=date_str)
    for attempt in range(3):
        try:
            resp = requests.get(url)
            if resp.status_code == 200:
                data = resp.json()
                games = []
                for game in data.get('games', []):
                    if game.get('gameType') == 2:
                        games.append(game)
                return games
        except Exception:
            time.sleep(1)
    return []


def get_full_schedule(start_date, end_date):
    # Retrieve the full schedule between two dates and return a DataFrame
    dates = pd.date_range(start_date, end_date)
    games_list = []
    for date in dates:
        date_str = date.strftime('%Y-%m-%d')
        for game in get_daily_schedule(date_str):
            games_list.append({
                'gamePk': game['id'],
                'gameDate': game.get('startTimeUTC', date_str)[:10],
                'homeTeamId': game['homeTeam']['id'],
                'homeTeamName': game['homeTeam']['name']['default'] if isinstance(game['homeTeam']['name'], dict) else game['homeTeam']['name'],
                'awayTeamId': game['awayTeam']['id'],
                'awayTeamName': game['awayTeam']['name']['default'] if isinstance(game['awayTeam']['name'], dict) else game['awayTeam']['name'],
            })
    return pd.DataFrame(games_list)


def get_game_feed(game_pk):
    # Retrieve the full game feed (live data) for a single game
    url = GAME_FEED_URL_TEMPLATE.format(game_pk=game_pk)
    for attempt in range(3):
        try:
            resp = requests.get(url)
            if resp.status_code == 200:
                return resp.json()
        except Exception:
            time.sleep(1)
    return None


In [None]:

def compute_team_game_stats(game_json):
    # Compute per-team statistics for a single game using the NHL live feed
    if not game_json:
        return []

    live_data = game_json.get('liveData', {})
    linescore = live_data.get('linescore', {})
    has_shootout = linescore.get('hasShootout', False)

    # Determine overtime
    overtime = False
    for period in linescore.get('periods', []):
        pt = period.get('periodType') or period.get('ordinalNum')
        if pt in ['OVERTIME', 'OT']:
            overtime = True
            break

    boxscore_teams = live_data.get('boxscore', {}).get('teams', {})
    all_plays = live_data.get('plays', {}).get('allPlays', [])

    # Pre-compute shot attempts for Corsi and Fenwick
    shot_event_types = {'SHOT', 'GOAL', 'MISSED_SHOT', 'BLOCKED_SHOT'}
    fenwick_event_types = {'SHOT', 'GOAL', 'MISSED_SHOT'}
    corsi_counts = {}
    fenwick_counts = {}
    for play in all_plays:
        event_type = play.get('result', {}).get('eventTypeId')
        team = play.get('team', {})
        team_id = team.get('id') if team else None
        if not team_id or not event_type:
            continue
        if event_type in shot_event_types:
            corsi_counts[team_id] = corsi_counts.get(team_id, 0) + 1
        if event_type in fenwick_event_types:
            fenwick_counts[team_id] = fenwick_counts.get(team_id, 0) + 1

    stats_list = []
    for side in ['home', 'away']:
        team_data = boxscore_teams.get(side)
        if not team_data:
            continue
        team_id = team_data['team']['id']
        team_name = team_data['team']['name']
        team_stats = team_data.get('teamStats', {}).get('teamSkaterStats', {})

        goals_for = team_stats.get('goals')
        shots_for = team_stats.get('shots')
        pim = team_stats.get('pim')
        pp_goals = team_stats.get('powerPlayGoals')
        pp_opps = team_stats.get('powerPlayOpportunities')
        faceoff_win_pct = team_stats.get('faceOffWinPercentage')
        blocked = team_stats.get('blocked')
        takeaways = team_stats.get('takeaways')
        giveaways = team_stats.get('giveaways')
        hits = team_stats.get('hits')

        opponent_side = 'away' if side == 'home' else 'home'
        opponent_data = boxscore_teams.get(opponent_side)
        opp_team_id = opponent_data['team']['id']
        opp_team_name = opponent_data['team']['name']
        opp_stats = opponent_data.get('teamStats', {}).get('teamSkaterStats', {})
        goals_against = opp_stats.get('goals')
        shots_against = opp_stats.get('shots')
        opp_pp_goals = opp_stats.get('powerPlayGoals')
        opp_pp_opps = opp_stats.get('powerPlayOpportunities')

        # Percentages
        pp_pct = (pp_goals / pp_opps * 100) if pp_opps not in (None, 0) else 0.0
        pk_pct = (1 - (opp_pp_goals / opp_pp_opps)) * 100 if opp_pp_opps not in (None, 0) else 100.0
        save_pct = ((shots_against - goals_against) / shots_against * 100) if shots_against not in (None, 0) else None

        # Goals against average
        num_periods = len(linescore.get('periods', []))
        minutes_played = 60 + max(0, num_periods - 3) * 5
        gaa = (goals_against * 60 / minutes_played) if minutes_played > 0 else None

        # Win/loss logic
        win = 1 if (goals_for is not None and goals_against is not None and goals_for > goals_against) else 0
        loss = 1 if (goals_for is not None and goals_against is not None and goals_for < goals_against) else 0
        points = 0
        if win:
            points = 2
        elif loss and (overtime or has_shootout):
            points = 1

        ot_game = 1 if overtime else 0
        so_game = 1 if has_shootout else 0
        one_goal_game = 1 if (goals_for is not None and goals_against is not None and abs(goals_for - goals_against) == 1) else 0
        shutout = 1 if (goals_against == 0) else 0

        # Faceoff percentage
        try:
            faceoff_win_pct_float = float(faceoff_win_pct) if faceoff_win_pct not in (None, '') else None
        except (TypeError, ValueError):
            faceoff_win_pct_float = None

        # Corsi / Fenwick
        corsi_for = corsi_counts.get(team_id, 0)
        corsi_against = corsi_counts.get(opp_team_id, 0)
        corsi_for_pct = (corsi_for / (corsi_for + corsi_against) * 100) if (corsi_for + corsi_against) > 0 else None

        fenwick_for = fenwick_counts.get(team_id, 0)
        fenwick_against = fenwick_counts.get(opp_team_id, 0)
        fenwick_for_pct = (fenwick_for / (fenwick_for + fenwick_against) * 100) if (fenwick_for + fenwick_against) > 0 else None

        # Shooting % and PDO
        shooting_pct = (goals_for / shots_for * 100) if shots_for not in (None, 0) else 0
        save_pct_decimal = ((shots_against - goals_against) / shots_against) if shots_against not in (None, 0) else 1
        pdo = shooting_pct + save_pct_decimal * 100

        # Determine starting goalie
        goalie_id = None
        goalie_name = None
        max_toi = 0
        players = team_data.get('players', {})
        for player_data in players.values():
            person = player_data.get('person', {})
            stats = player_data.get('stats')
            if stats and 'goalieStats' in stats:
                toi_str = stats['goalieStats'].get('timeOnIce')
                if toi_str:
                    try:
                        minutes, seconds = map(int, toi_str.split(':'))
                        total_seconds = minutes * 60 + seconds
                    except Exception:
                        total_seconds = 0
                    if total_seconds > max_toi:
                        max_toi = total_seconds
                        goalie_id = person.get('id')
                        goalie_name = person.get('fullName')

        scratch_count = len(team_data.get('scratches', []) or [])

        stats_list.append({
            'game_id': game_json.get('gamePk'),
            'game_date': game_json.get('gameData', {}).get('datetime', {}).get('dateTime'),
            'team_id': team_id,
            'team_name': team_name,
            'opponent_team_id': opp_team_id,
            'opponent_team_name': opp_team_name,
            'home_away': 'home' if side == 'home' else 'away',
            'goals_for': goals_for,
            'goals_against': goals_against,
            'goal_diff': (goals_for or 0) - (goals_against or 0),
            'shots_for': shots_for,
            'shots_against': shots_against,
            'penalty_minutes': pim,
            'power_play_goals': pp_goals,
            'power_play_opportunities': pp_opps,
            'power_play_percentage': pp_pct,
            'penalty_kill_percentage': pk_pct,
            'faceoff_win_percentage': faceoff_win_pct_float,
            'blocked': blocked,
            'takeaways': takeaways,
            'giveaways': giveaways,
            'hits': hits,
            'save_percentage': save_pct,
            'goals_against_average': gaa,
            'shutout': shutout,
            'win': win,
            'loss': loss,
            'ot_game': ot_game,
            'shootout_game': so_game,
            'points': points,
            'one_goal_game': one_goal_game,
            'corsi_for': corsi_for,
            'corsi_against': corsi_against,
            'corsi_for_percentage': corsi_for_pct,
            'fenwick_for': fenwick_for,
            'fenwick_against': fenwick_against,
            'fenwick_for_percentage': fenwick_for_pct,
            'shooting_percentage': shooting_pct,
            'pdo': pdo,
            'goalie_id': goalie_id,
            'goalie_name': goalie_name,
            'scratch_count': scratch_count,
        })

    return stats_list


In [None]:

# Define the date range for the 2024-25 regular season.  Adjust if the season dates differ.
season_start = '2024-09-01'
season_end = '2025-06-30'

# Retrieve the schedule of regular season games
schedule_df = get_full_schedule(season_start, season_end)
print(f"Found {len(schedule_df)} regular season games between {season_start} and {season_end}.")

# Collect team-level stats for each game
team_stats_records = []
for idx, row in schedule_df.iterrows():
    game_pk = row['gamePk']
    game_data = get_game_feed(game_pk)
    if not game_data:
        print(f"Warning: Could not fetch game feed for game {game_pk}")
        continue
    team_stats_records.extend(compute_team_game_stats(game_data))

team_stats_df = pd.DataFrame(team_stats_records)

# Display a preview of the resulting data
team_stats_df.head()

# Save to CSV for further analysis or model training
output_csv = 'team_game_stats_2024_25.csv'
team_stats_df.to_csv(output_csv, index=False)
print(f"Saved team-level game stats to {output_csv}")
