## Setup

In [27]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.float_format', lambda x: '%.2f' % x)

## Find a Recent Completed Game

Before we can perform advanced analytics, we need to find a completed game to analyze. We'll search the schedule for games with `gameState == 'OFF'` (finished games).

In [28]:
from scrapernhl.scrapers.schedule import scrapeSchedule
from datetime import datetime

# Get recent games from any team
schedule = scrapeSchedule("MTL", "20252026")
completed = schedule[schedule['gameState'] == 'OFF']

if len(completed) > 0:
    game_id = completed.iloc[0]['id']
    game_info = completed.iloc[0]
    print(f"Using game: {game_info['awayTeam.abbrev']} @ {game_info['homeTeam.abbrev']}")
    print(f"Date: {game_info['gameDate']}")
    print(f"Game ID: {game_id}")
else:
    print("No completed games found. Using a known game ID...")
    game_id = 2024020001

Using game: MTL @ TOR
Date: 2025-10-08
Game ID: 2025020004


## 1. Complete Game Data with Metadata

The `scrape_game()` function returns comprehensive game data including play-by-play events and roster information in a tuple format.

In [None]:
from scrapernhl import scrape_game

# Get comprehensive game data
game_tuple = scrape_game(game_id=game_id, include_tuple=True)

pbp = game_tuple.data

print(f"Game: {game_tuple.awayTeam} @ {game_tuple.homeTeam}")
print(f"Total events: {len(pbp)}")

print("\nRosters:")
display(game_tuple.rosters)

Game: MTL @ TOR
Total events: 1887
Rosters : 


Unnamed: 0,teamId,playerId,sweaterNumber,positionCode,headshot,firstName.default,lastName.default,firstName.cs,firstName.de,firstName.es,firstName.fi,firstName.sk,firstName.sv,lastName.cs,lastName.fi,lastName.sk,isHome,fullName
0,10,8475166,91,C,https://assets.nhle.com/mugs/nhl/20252026/TOR/...,John,Tavares,,,,,,,,,,1,John Tavares
1,10,8475171,95,D,https://assets.nhle.com/mugs/nhl/20252026/TOR/...,Oliver,Ekman-Larsson,,,,,,,,,,1,Oliver Ekman-Larsson
2,10,8475690,8,D,https://assets.nhle.com/mugs/nhl/20252026/TOR/...,Chris,Tanev,Christopher,Christopher,Christopher,Christopher,Christopher,Christopher,,,,1,Chris Tanev
3,10,8475714,19,C,https://assets.nhle.com/mugs/nhl/20252026/TOR/...,Calle,Jarnkrok,,,,,,,Järnkrok,Järnkrok,Järnkrok,1,Calle Jarnkrok
4,8,8475848,11,R,https://assets.nhle.com/mugs/nhl/20252026/MTL/...,Brendan,Gallagher,,,,,,,,,,0,Brendan Gallagher
5,10,8476853,44,D,https://assets.nhle.com/mugs/nhl/20252026/TOR/...,Morgan,Rielly,,,,,,,,,,1,Morgan Rielly
6,8,8476875,8,D,https://assets.nhle.com/mugs/nhl/20252026/MTL/...,Mike,Matheson,,,,,,,,,,0,Mike Matheson
7,10,8476931,22,D,https://assets.nhle.com/mugs/nhl/20252026/TOR/...,Jake,McCabe,,,,,,,,,,1,Jake McCabe
8,10,8476932,41,G,https://assets.nhle.com/mugs/nhl/20252026/TOR/...,Anthony,Stolarz,,,,,,,,,,1,Anthony Stolarz
9,8,8476981,17,R,https://assets.nhle.com/mugs/nhl/20252026/MTL/...,Josh,Anderson,,,,,,,,,,0,Josh Anderson


## 2. Expected Goals (xG) Analysis

Expected Goals (xG) is a metric that estimates the probability of a shot resulting in a goal based on factors like shot location, type, and game situation.

In [None]:
from scrapernhl import engineer_xg_features, predict_xg_for_pbp

# Engineer xG features
pbp_with_features = engineer_xg_features(pbp)

# Predict xG
pbp_with_xg = predict_xg_for_pbp(pbp_with_features)

# Get shots and goals
shots = pbp_with_xg[pbp_with_xg['Event'].isin(['SHOT', 'GOAL', 'MISS'])].copy()

print(f"Total shot attempts: {len(shots)}")

print("\nShot attempts with xG:")
display(shots[['period', 'timeInPeriod', 'Event', 'eventTeam', 'player1Id', 'xG', 'distanceFromGoal']].head(10))

Total shot attempts: 82

Shot attempts with xG:


Unnamed: 0,period,timeInPeriod,Event,eventTeam,player1Id,xG,distanceFromGoal
19,1,00:27,SHOT,TOR,8476931,0.06,34.23
40,1,00:54,SHOT,TOR,8476853,0.06,22.67
49,1,01:00,GOAL,TOR,8482259,0.13,18.11
66,1,01:23,SHOT,MTL,8482964,0.02,53.71
105,1,02:16,SHOT,MTL,8483515,0.02,14.04
108,1,02:33,SHOT,MTL,8483515,0.08,16.16
158,1,04:07,SHOT,TOR,8481582,0.02,44.05
199,1,05:22,SHOT,MTL,8476981,0.1,24.0
206,1,05:39,GOAL,MTL,8482775,0.04,38.28
226,1,05:50,SHOT,TOR,8477503,0.03,39.82


In [None]:
# Calculate team xG totals
home_team = game_tuple.homeTeam
away_team = game_tuple.awayTeam

home_shots = shots[shots['eventTeam'] == home_team]
away_shots = shots[shots['eventTeam'] == away_team]

home_xg = home_shots['xG'].sum()
away_xg = away_shots['xG'].sum()

home_goals = len(pbp[(pbp['Event'] == 'GOAL') & (pbp['eventTeam'] == home_team)])
away_goals = len(pbp[(pbp['Event'] == 'GOAL') & (pbp['eventTeam'] == away_team)])

print(f"{away_team} @ {home_team}")
print(f"Score: {away_goals} - {home_goals}")
print(f"xG: {away_xg:.2f} - {home_xg:.2f}")
print(f"xG differential: {home_xg - away_xg:.2f} (positive favors {home_team})")


MTL @ TOR
Score: 2 - 5
xG: 2.93 - 3.50

xG differential: 0.57 (positive favors TOR)


## 3. Time on Ice (TOI) Analysis

Calculate how much time each player spent on the ice at different game strengths (5v5, power play, penalty kill, etc.).

In [32]:
from scrapernhl import toi_by_strength

# Calculate TOI by strength
toi_df = toi_by_strength(pbp)

print("TOI by strength:")
display(toi_df.head(10))

TOI by strength:


Unnamed: 0,strength,seconds,minutes
2,5v5,3015,50.25
0,4v5,240,4.0
1,5v4,240,4.0
4,6*v5,87,1.45
3,5v6*,18,0.3


In [33]:
from scrapernhl import combo_on_ice_stats_both_teams

# Individual player stats
combo_stats = combo_on_ice_stats_both_teams(
    pbp,
    n_team=1,
    m_opp=0,          # set 0 for "vs ANY"
    min_TOI=0,
    include_goalies=False,
    rates=True,
    player_df=game_tuple.rosters  # DataFrame with ids/teams/positions
)

display(combo_stats[['player1Id', 'player1Name', 'player1Position', 'player1Number', 'team', 'opp', 'strength', 'seconds', 'minutes']])

Unnamed: 0,player1Id,player1Name,player1Position,player1Number,team,opp,strength,seconds,minutes
0,8481618,Alex Newhook,F,15,MTL,TOR,4v5,128.00,2.13
1,8478851,Alexandre Carrier,D,45,MTL,TOR,4v5,131.00,2.18
2,8482964,Arber Xhekaj,D,72,MTL,TOR,4v5,5.00,0.08
3,8478133,Jake Evans,F,71,MTL,TOR,4v5,113.00,1.88
4,8476981,Josh Anderson,F,17,MTL,TOR,4v5,94.00,1.57
...,...,...,...,...,...,...,...,...,...
120,8476853,Morgan Rielly,D,44,TOR,MTL,6*v5,5.00,0.08
121,8478462,Nicolas Roy,F,55,TOR,MTL,6*v5,43.00,0.72
122,8481122,Simon Benoit,D,2,TOR,MTL,6*v5,36.00,0.60
123,8478904,Steven Lorentz,F,18,TOR,MTL,6*v5,65.00,1.08


## 4. Player Combinations Analysis

Analyze which players play together most often by examining 2-player (defensive pairs) and 3-player (forward lines) combinations.

### Defensive Pairs (2-player combinations)

Find the most common defensive pairings at 5v5.

In [34]:
# Get 2-player combinations (defensive pairs)
combo_stats_2 = combo_on_ice_stats_both_teams(
    pbp,
    n_team=2,
    m_opp=0,          # set 0 for "vs ANY"
    min_TOI=60,
    include_goalies=False,
    rates=True,
    player_df=game_tuple.rosters  # DataFrame with ids/teams/positions
)

top_10_pairs_5v5 = (combo_stats_2
                    .query("team_combo_pos == '2D'")  # Get defensive pairs
                    .query("strength == '5v5'")  # Filter for 5v5
                    .nlargest(10, 'seconds')
                    )[['team_combo', 'team_combo_ids', 'team', 'opp', 'strength', 'seconds', 'minutes']]

print("Most common defensive pairs (5v5):")
display(top_10_pairs_5v5)

Most common defensive pairs (5v5):


Unnamed: 0,team_combo,team_combo_ids,team,opp,strength,seconds,minutes
168,Chris Tanev / Jake McCabe,"(8475690, 8476931)",TOR,MTL,5v5,1065.0,17.75
89,Mike Matheson / Noah Dobson,"(8476875, 8480865)",MTL,TOR,5v5,1026.0,17.1
206,Morgan Rielly / Brandon Carlo,"(8476853, 8478443)",TOR,MTL,5v5,793.0,13.22
71,Kaiden Guhle / Lane Hutson,"(8482087, 8483457)",MTL,TOR,5v5,698.0,11.63
229,Oliver Ekman-Larsson / Simon Benoit,"(8475171, 8481122)",TOR,MTL,5v5,654.0,10.9
28,Alexandre Carrier / Arber Xhekaj,"(8478851, 8482964)",MTL,TOR,5v5,400.0,6.67
32,Alexandre Carrier / Kaiden Guhle,"(8478851, 8482087)",MTL,TOR,5v5,335.0,5.58
41,Arber Xhekaj / Lane Hutson,"(8482964, 8483457)",MTL,TOR,5v5,200.0,3.33
81,Mike Matheson / Alexandre Carrier,"(8476875, 8478851)",MTL,TOR,5v5,107.0,1.78
226,Oliver Ekman-Larsson / Morgan Rielly,"(8475171, 8476853)",TOR,MTL,5v5,104.0,1.73


### Forward Lines (3-player combinations)

Find the most common forward line combinations at 5v5.

In [35]:
# Get 3-player combinations (forward lines)
combo_stats_3 = combo_on_ice_stats_both_teams(
    pbp,
    n_team=3,
    m_opp=0,          # set 0 for "vs ANY"
    min_TOI=60,
    include_goalies=False,
    rates=True,
    player_df=game_tuple.rosters  # DataFrame with ids/teams/positions
)

top_10_lines_5v5 = (combo_stats_3
                    .query("team_combo_pos == '3F'")  # Get offensive lines
                    .query("strength == '5v5'")  # Filter for 5v5
                    .nlargest(10, 'seconds')
                    )[['team_combo', 'team_combo_ids', 'team', 'opp', 'strength', 'seconds', 'minutes']]

print("Most common offensive lines (5v5):")
display(top_10_lines_5v5)

Most common offensive lines (5v5):


Unnamed: 0,team_combo,team_combo_ids,team,opp,strength,seconds,minutes
124,Nick Suzuki / Cole Caufield / Juraj Slafkovský,"(8480018, 8481540, 8483515)",MTL,TOR,5v5,822.0,13.7
237,John Tavares / William Nylander / Bobby McMann,"(8475166, 8477939, 8482259)",TOR,MTL,5v5,686.0,11.43
175,Auston Matthews / Matias Maccelli / Matthew Knies,"(8479318, 8481711, 8482720)",TOR,MTL,5v5,632.0,10.53
24,Alex Newhook / Oliver Kapanen / Ivan Demidov,"(8481618, 8482775, 8484984)",MTL,TOR,5v5,589.0,9.82
53,Brendan Gallagher / Kirby Dach / Zachary Bolduc,"(8475848, 8481523, 8482737)",MTL,TOR,5v5,519.0,8.65
83,Josh Anderson / Jake Evans / Patrik Laine,"(8476981, 8478133, 8479339)",MTL,TOR,5v5,513.0,8.55
191,Calle Jarnkrok / Nicolas Roy / Steven Lorentz,"(8475714, 8478462, 8478904)",TOR,MTL,5v5,492.0,8.2
244,Max Domi / Dakota Joshua / Nicholas Robertson,"(8477503, 8478057, 8481582)",TOR,MTL,5v5,475.0,7.92
240,Max Domi / Auston Matthews / Matthew Knies,"(8477503, 8479318, 8482720)",TOR,MTL,5v5,103.0,1.72


## 5. On-Ice Statistics by Player

Calculate advanced metrics for each player including Corsi (shot attempts for/against), expected goals, and actual goals when they're on the ice.

In [36]:
from scrapernhl import on_ice_stats_by_player_strength

# Calculate on-ice stats
player_stats = on_ice_stats_by_player_strength(
    pbp_with_xg,
    include_goalies=False,
    rates=True  # Per-60 rates
)

print("On-ice statistics by player:")
display(player_stats.head(10))

On-ice statistics by player:


Unnamed: 0,player1Id,player1Name,eventTeam,strength,seconds,minutes,CF,CA,FF,FA,SF,SA,GF,GA,xG,xGA,PF,PA,CF_per60,CA_per60,FF_per60,FA_per60,SF_per60,SA_per60,xG_per60,xGA_per60,GF_per60,GA_per60,PF_per60,PA_per60
0,8481618,Alex Newhook,MTL,4v5,128.0,2.13,3.0,0.0,2.0,0.0,2.0,0.0,1.0,0.0,0.1,0.0,0.0,0.0,84.38,0.0,56.25,0.0,56.25,0.0,2.85,0.0,28.12,0.0,0.0,0.0
1,8481618,Alex Newhook,MTL,5v4,12.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,8481618,Alex Newhook,MTL,5v5,775.0,12.92,6.0,15.0,5.0,12.0,2.0,9.0,0.0,0.0,0.12,0.45,0.0,0.0,27.87,69.68,23.23,55.74,9.29,41.81,0.57,2.09,0.0,0.0,0.0,0.0
3,8478851,Alexandre Carrier,MTL,4v5,131.0,2.18,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.03,0.0,0.0,27.48,27.48,0.0,27.48,0.0,27.48,0.0,0.87,0.0,0.0,0.0,0.0
4,8478851,Alexandre Carrier,MTL,5v5,913.0,15.22,14.0,17.0,9.0,11.0,6.0,10.0,0.0,1.0,0.27,0.57,0.0,0.0,55.2,67.03,35.49,43.37,23.66,39.43,1.07,2.26,0.0,3.94,0.0,0.0
5,8482964,Arber Xhekaj,MTL,4v5,5.0,0.08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,8482964,Arber Xhekaj,MTL,5v5,663.0,11.05,15.0,13.0,9.0,7.0,6.0,6.0,1.0,1.0,0.49,0.43,0.0,0.0,81.45,70.59,48.87,38.01,32.58,32.58,2.65,2.34,5.43,5.43,0.0,0.0
7,8475848,Brendan Gallagher,MTL,5v4,46.0,0.77,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,78.26,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,8475848,Brendan Gallagher,MTL,5v5,746.0,12.43,14.0,15.0,7.0,12.0,7.0,8.0,1.0,2.0,0.77,0.82,1.0,0.0,67.56,72.39,33.78,57.91,33.78,38.61,3.69,3.94,4.83,9.65,4.83,0.0
9,8475848,Brendan Gallagher,MTL,5v6*,15.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,240.0


In [None]:
# Best Corsi For % (min 5 min TOI)
qualified = player_stats[player_stats['minutes'] >= 5].copy()
qualified['CF%'] = qualified['CF'] / (qualified['CF'] + qualified['CA']) * 100

print("Best Corsi For % (5+ min TOI):")
display(qualified.nlargest(10, 'CF%')[['player1Name', 'eventTeam', 'strength', 'minutes', 'CF', 'CA', 'CF%']])


Best Corsi For % (5+ min TOI):


Unnamed: 0,player1Name,eventTeam,strength,minutes,CF,CA,CF%
23,Juraj Slafkovský,MTL,5v5,15.32,22.0,9.0,70.97
41,Nick Suzuki,MTL,5v5,15.92,21.0,9.0,70.0
103,Morgan Rielly,TOR,5v5,17.38,24.0,11.0,68.57
115,Simon Benoit,TOR,5v5,14.4,12.0,6.0,66.67
12,Cole Caufield,MTL,5v5,16.0,22.0,12.0,64.71
68,Brandon Carlo,TOR,5v5,16.08,19.0,12.0,61.29
79,Dakota Joshua,TOR,5v5,9.6,11.0,8.0,57.89
86,John Tavares,TOR,5v5,13.58,13.0,10.0,56.52
55,Zachary Bolduc,MTL,5v5,10.1,14.0,11.0,56.0
45,Noah Dobson,MTL,5v5,18.35,19.0,15.0,55.88


In [None]:
# Best xG differential
qualified['xG_diff'] = qualified['xG'] - qualified['xGA']

print("Best xG differential (5+ min TOI):")
display(qualified.nlargest(10, 'xG_diff')[['player1Name', 'eventTeam', 'strength', 'minutes', 'xG', 'xGA', 'xG_diff']])


Best xG differential (5+ min TOI):


Unnamed: 0,player1Name,eventTeam,strength,minutes,xG,xGA,xG_diff
12,Cole Caufield,MTL,5v5,16.0,1.53,0.57,0.97
45,Noah Dobson,MTL,5v5,18.35,1.3,0.52,0.78
23,Juraj Slafkovský,MTL,5v5,15.32,1.27,0.52,0.76
41,Nick Suzuki,MTL,5v5,15.92,1.25,0.52,0.74
103,Morgan Rielly,TOR,5v5,17.38,1.07,0.36,0.7
36,Mike Matheson,MTL,5v5,20.08,1.41,0.85,0.56
122,William Nylander,TOR,5v5,14.5,0.91,0.38,0.53
68,Brandon Carlo,TOR,5v5,16.08,0.9,0.43,0.47
86,John Tavares,TOR,5v5,13.58,0.69,0.26,0.43
115,Simon Benoit,TOR,5v5,14.4,0.46,0.06,0.39


## 6. Team-Level Aggregates

Aggregate statistics to the team level, showing performance at different game strengths (5v5, PP, PK, etc.).

In [39]:
from scrapernhl import team_strength_aggregates

# Calculate team stats by strength
team_stats = team_strength_aggregates(
    pbp_with_xg,
    include_goalies=False,
    rates=True,
    min_TOI=1
)

print("Team statistics by strength:")
display(team_stats[['team', 'minutes', 'CF', 'CA', 'xG', 'xGA', 'GF', 'GA']].sort_values(by=['minutes'], ascending=False))

Team statistics by strength:


Unnamed: 0,team,minutes,CF,CA,xG,xGA,GF,GA
2,MTL,50.25,48.0,48.0,2.31,1.86,1.0,3.0
7,TOR,50.25,48.0,48.0,1.86,2.31,3.0,1.0
0,MTL,4.0,5.0,2.0,0.21,0.09,1.0,0.0
1,MTL,4.0,4.0,3.0,0.21,0.0,0.0,0.0
5,TOR,4.0,2.0,5.0,0.09,0.21,0.0,1.0
6,TOR,4.0,3.0,4.0,0.0,0.21,0.0,0.0
4,MTL,1.45,4.0,3.0,0.21,1.55,0.0,2.0
9,TOR,1.45,3.0,4.0,1.55,0.21,2.0,0.0
3,MTL,0.3,0.0,0.0,0.0,0.0,0.0,0.0
8,TOR,0.3,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
# 5v5 stats only
stats_5v5 = team_stats[team_stats['strength'] == '5v5'].copy()

print("5v5 Team Stats:")
display(stats_5v5[['team', 'minutes', 'CF', 'CA', 'xG', 'xGA', 'GF', 'GA']])


5v5 Team Stats:


Unnamed: 0,team,minutes,CF,CA,xG,xGA,GF,GA
2,MTL,50.25,48.0,48.0,2.31,1.86,1.0,3.0
7,TOR,50.25,48.0,48.0,1.86,2.31,3.0,1.0


## 7. Multi-Game Season Analysis (Preview)

Demonstrate how to scrape and analyze multiple games to build season-long statistics. This example processes 3 games for demonstration.

In [None]:
# Scrape multiple games (just 3 for demonstration)
print("Scraping multiple games for season analysis...")

game_ids_to_scrape = completed.head(3)['id'].tolist()
all_team_stats = []

for gid in game_ids_to_scrape:
    try:
        print(f"Processing game {gid}...")
        game_tuple = scrape_game(gid, include_tuple=True)
        pbp = game_tuple.data
        pbp = engineer_xg_features(pbp)
        pbp = predict_xg_for_pbp(pbp)
        
        stats = team_strength_aggregates(pbp)
        stats['game_id'] = gid
        all_team_stats.append(stats)
    except Exception as e:
        print(f"Error with game {gid}: {e}")

# Combine all games
if all_team_stats:
    season_stats = pd.concat(all_team_stats, ignore_index=True)
    
    # Aggregate by team
    team_summary = season_stats.groupby('team').agg({
        'minutes': 'sum',
        'CF': 'sum',
        'CA': 'sum',
        'xG': 'sum',
        'xGA': 'sum',
        'GF': 'sum',
        'GA': 'sum'
    }).reset_index()
    
    team_summary['CF%'] = 100 * team_summary['CF'] / (team_summary['CF'] + team_summary['CA'])
    
    print("\nSeason stats across sampled games:")
    display(team_summary)

Scraping multiple games for season analysis...

Processing game 2025020004...
Processing game 2025020010...
Processing game 2025020031...

Season stats across sampled games:


Unnamed: 0,team,minutes,CF,CA,xG,xGA,GF,GA,CF%
0,CHI,60.0,52.0,53.0,1.72,3.76,2.0,3.0,49.52
1,DET,60.0,57.0,42.0,2.24,1.61,1.0,5.0,57.58
2,MTL,180.0,156.0,165.0,8.3,7.46,10.0,8.0,48.6
3,TOR,60.0,56.0,61.0,3.5,2.93,5.0,2.0,47.86
