## Setup

In [1]:
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.float_format', lambda x: '%.2f' % x)

## üèí Main Function: scrapeGame()

**`scrapeGame()` is the primary function** of this package - it retrieves complete game data including all play-by-play events, rosters, and metadata in one call. This is what most users will use!

In [4]:
from scrapernhl import scrapeGame

# Get complete game data (play-by-play + rosters + metadata)
# Using a known completed game ID
game_id = 2024020001

game_data = scrapeGame(game_id, include_tuple=True)

print(f"Game: {game_data.awayTeam} @ {game_data.homeTeam}")
print(f"Total events: {len(game_data.data)}")
# print(f"Home goals: {game_data.homeGoals}, Away goals: {game_data.awayGoals}")

# Access play-by-play data
pbp = game_data.data
print(f"\nFirst few events:")
display(pbp[['period', 'timeInPeriod', 'Event', 'eventTeam', 'gameId']].head())

# Access rosters
print(f"\nRoster info:")
display(game_data.rosters.head())

Game: NJD @ BUF
Total events: 1985

First few events:


Unnamed: 0,period,timeInPeriod,Event,eventTeam,gameId
0,,,PGSTR,,2024020001
1,,,PGEND,,2024020001
2,,,ANTHEM,,2024020001
3,1.0,00:00,PSTR,NJD,2024020001
4,1.0,0:00,ON,BUF,2024020001



Roster info:


Unnamed: 0,teamId,playerId,sweaterNumber,positionCode,headshot,firstName.default,lastName.default,lastName.cs,lastName.fi,lastName.sk,lastName.sv,firstName.cs,firstName.sk,isHome,fullName
0,1,8474593,25,G,https://assets.nhle.com/mugs/nhl/20242025/NJD/...,Jacob,Markstrom,Markstr√∂m,Markstr√∂m,Markstr√∂m,Markstr√∂m,,,0,Jacob Markstrom
1,1,8474596,34,G,https://assets.nhle.com/mugs/nhl/20242025/NJD/...,Jake,Allen,,,,,,,0,Jake Allen
2,1,8475193,90,L,https://assets.nhle.com/mugs/nhl/20242025/NJD/...,Tomas,Tatar,,,,,Tom√°≈°,Tom√°≈°,0,Tomas Tatar
3,1,8475287,56,L,https://assets.nhle.com/mugs/nhl/20242025/NJD/...,Erik,Haula,,,,,,,0,Erik Haula
4,1,8475455,5,D,https://assets.nhle.com/mugs/nhl/20242025/NJD/...,Brenden,Dillon,,,,,,,0,Brenden Dillon


## 1. Scraping NHL Teams

Retrieve information about all NHL teams including their names, IDs, and locations.

In [7]:
from scrapernhl import scrapeTeams

# Get all NHL teams
teams = scrapeTeams()
print(f"Found {len(teams)} teams")

# Display selected columns
display(teams[['fullName', 'triCode','id', 'placeName', 'commonName']].head(10))

Found 32 teams


Unnamed: 0,fullName,triCode,id,placeName,commonName
0,New Jersey Devils,NJD,1,New Jersey,Devils
1,New York Islanders,NYI,2,NY Islanders,Islanders
2,New York Rangers,NYR,3,NY Rangers,Rangers
3,Philadelphia Flyers,PHI,4,Philadelphia,Flyers
4,Pittsburgh Penguins,PIT,5,Pittsburgh,Penguins
5,Boston Bruins,BOS,6,Boston,Bruins
6,Buffalo Sabres,BUF,7,Buffalo,Sabres
7,Montr√©al Canadiens,MTL,8,Montr√©al,Canadiens
8,Ottawa Senators,OTT,9,Ottawa,Senators
9,Toronto Maple Leafs,TOR,10,Toronto,Maple Leafs


## 2. Scraping Team Schedule

Get the complete schedule for a specific team and season, including game dates, opponents, scores, and game states.

In [8]:
from scrapernhl import scrapeSchedule

# Get Montreal Canadiens schedule for current season
schedule = scrapeSchedule("MTL", "20252026")
print(f"MTL has {len(schedule)} games this season")

# Show first 5 games with key information
display(schedule[['gameDate', 'gameType', 'homeTeam.abbrev', 'homeTeam.score',
                  'awayTeam.abbrev', 'awayTeam.score', 'gameOutcome.lastPeriodType', 'gameState']].head())

‚Ñπ Fetching schedule for MTL (20252026)...
MTL has 88 games this season


Unnamed: 0,gameDate,gameType,homeTeam.abbrev,homeTeam.score,awayTeam.abbrev,awayTeam.score,gameOutcome.lastPeriodType,gameState
0,2025-09-22,1,MTL,2.0,PIT,1.0,SO,FINAL
1,2025-09-23,1,MTL,4.0,PHI,2.0,REG,FINAL
2,2025-09-25,1,MTL,2.0,TOR,7.0,REG,FINAL
3,2025-09-27,1,TOR,2.0,MTL,4.0,REG,FINAL
4,2025-09-30,1,OTT,0.0,MTL,5.0,REG,FINAL


## 3. Current Standings

Fetch the league standings for a specific date, including wins, losses, points, and point percentage.

In [9]:
from scrapernhl import scrapeStandings
from datetime import datetime

# Get current standings
today = datetime.now().strftime("%Y-%m-%d")
standings = scrapeStandings(today)

print(f"Standings as of {today}:")
display(standings[['teamName.default', 'teamAbbrev.default', 'gamesPlayed', 'wins', 'losses', 'otLosses', 'points', 'pointPctg', 'date']]
        .sort_values(by='pointPctg', ascending=False)
        .head(10))

‚Ñπ Fetching standings for 2026-01-06...
Standings as of 2026-01-06:


Unnamed: 0,teamName.default,teamAbbrev.default,gamesPlayed,wins,losses,otLosses,points,pointPctg,date
0,Colorado Avalanche,COL,42,31,4,7,69,0.82,2026-01-06
1,Dallas Stars,DAL,43,25,10,8,58,0.67,2026-01-06
2,Minnesota Wild,MIN,44,25,11,8,58,0.66,2026-01-06
3,Tampa Bay Lightning,TBL,42,26,13,3,55,0.65,2026-01-06
4,Carolina Hurricanes,CAR,43,26,14,3,55,0.64,2026-01-06
8,Philadelphia Flyers,PHI,41,22,12,7,51,0.62,2026-01-06
6,Montr√©al Canadiens,MTL,42,23,13,6,52,0.62,2026-01-06
5,Detroit Red Wings,DET,44,25,15,4,54,0.61,2026-01-06
7,New York Islanders,NYI,43,24,15,4,52,0.6,2026-01-06
10,Pittsburgh Penguins,PIT,41,20,12,9,49,0.6,2026-01-06


## 4. Team Roster

Get the complete roster for a team, including player names, positions, physical attributes, and biographical information.

In [10]:
from scrapernhl import scrapeRoster

# Get Montreal Canadiens roster
roster = scrapeRoster("MTL", "20252026")

# Separate by position
forwards = roster[roster['positionCode'].isin(['C', 'L', 'R'])]  # Forwards: Centers, Left Wings, Right Wings
defensemen = roster[roster['positionCode'] == 'D']
goalies = roster[roster['positionCode'] == 'G']

print(f"Forwards: {len(forwards)}, Defense: {len(defensemen)}, Goalies: {len(goalies)}")

print("\nForwards:")
display(forwards[['id', 'firstName.default', 'lastName.default', 'positionCode', 'shootsCatches', 
                  'sweaterNumber', 'heightInInches', 'weightInPounds', 'birthDate', 'birthCountry']]
        .assign(team="MTL")
        .head(10))

Forwards: 17, Defense: 8, Goalies: 3

Forwards:


Unnamed: 0,id,firstName.default,lastName.default,positionCode,shootsCatches,sweaterNumber,heightInInches,weightInPounds,birthDate,birthCountry,team
0,8476981,Josh,Anderson,R,R,17,75,226,1994-05-07,CAN,MTL
1,8483424,Owen,Beck,C,R,62,72,199,2004-02-03,CAN,MTL
2,8478104,Sammy,Blais,L,L,27,74,206,1996-06-17,CAN,MTL
3,8482737,Zachary,Bolduc,R,L,76,72,187,2003-02-24,CAN,MTL
4,8481540,Cole,Caufield,R,R,13,68,175,2001-01-02,USA,MTL
5,8481523,Kirby,Dach,C,R,77,76,221,2001-01-21,CAN,MTL
6,8476479,Phillip,Danault,C,L,24,73,200,1993-02-24,CAN,MTL
7,8484984,Ivan,Demidov,R,L,93,73,192,2005-12-10,RUS,MTL
8,8478133,Jake,Evans,C,R,71,72,190,1996-06-02,CAN,MTL
9,8475848,Brendan,Gallagher,R,R,11,69,185,1992-05-06,CAN,MTL


## 5. Player Statistics

Scrape player statistics for both skaters and goalies, including goals, assists, points, wins, and save percentage.

In [11]:
from scrapernhl import scrapeTeamStats

# Get skater stats
skaters = scrapeTeamStats("MTL", "20252026", session=2, goalies=False)
print("Top 10 scorers:")
display(skaters
        .nlargest(10, 'points')[['playerId', 'firstName.default', 'lastName.default', 'positionCode', 
                                  'gamesPlayed', 'goals', 'assists', 'points']]
        .assign(pointsPerGame=lambda df: df['points'].div(df['gamesPlayed'])))

Top 10 scorers:


Unnamed: 0,playerId,firstName.default,lastName.default,positionCode,gamesPlayed,goals,assists,points,pointsPerGame
8,8480018,Nick,Suzuki,C,42,14,32,46,1.1
22,8483457,Lane,Hutson,D,42,7,35,42,1.0
13,8481540,Cole,Caufield,R,42,20,20,40,0.95
27,8484984,Ivan,Demidov,R,42,10,26,36,0.86
23,8483515,Juraj,Slafkovsk√Ω,L,42,15,17,32,0.76
11,8480865,Noah,Dobson,D,42,7,20,27,0.64
19,8482775,Oliver,Kapanen,C,42,13,9,22,0.52
2,8476875,Mike,Matheson,D,39,4,14,18,0.46
17,8482737,Zachary,Bolduc,R,42,10,7,17,0.4
0,8475848,Brendan,Gallagher,R,42,3,11,14,0.33


In [12]:
# Get goalie stats
goalies = scrapeTeamStats("MTL", "20252026", session=2, goalies=True)
print("Goalie statistics:")
display(goalies[['playerId', 'firstName.default', 'lastName.default', 'gamesPlayed', 'wins', 'losses',
                 'overtimeLosses', 'goalsAgainstAverage', 'savePercentage']])

Goalie statistics:


Unnamed: 0,playerId,firstName.default,lastName.default,gamesPlayed,wins,losses,overtimeLosses,goalsAgainstAverage,savePercentage
0,8478470,Samuel,Montembeault,17,7,6,1,3.48,0.86
1,8482487,Jakub,Dobes,21,13,5,3,3.01,0.89
2,8484170,Jacob,Fowler,7,3,2,2,2.57,0.9


## 6. Play-by-Play Data

Retrieve detailed play-by-play data for a specific game, including all events like shots, goals, hits, and faceoffs.

In [13]:
from scrapernhl import scrapePlays

# Get a recent game ID from schedule
completed_games = schedule[schedule['gameState'] == 'OFF']
if len(completed_games) > 0:
    game_id = completed_games.iloc[0]['id']
    print(f"Scraping game {game_id}...")
    
    pbp = scrapePlays(game_id)
    print(f"Game has {len(pbp)} events")
    
    # Show event types
    print("\nEvent counts:")
    display(pbp['typeDescKey'].value_counts())
    
    # Show first few events
    print("\nFirst 10 events:")
    display(pbp[['periodDescriptor.number', 'timeInPeriod', 'typeDescKey', 'details.eventOwnerTeamId', 'gameId']].head(10))
else:
    print("No completed games found in schedule")

Scraping game 2025020004...
Game has 328 events

Event counts:


typeDescKey
hit                62
faceoff            59
shot-on-goal       51
stoppage           46
blocked-shot       35
giveaway           25
missed-shot        24
goal                7
takeaway            5
penalty             4
period-start        3
delayed-penalty     3
period-end          3
game-end            1
Name: count, dtype: int64


First 10 events:


Unnamed: 0,periodDescriptor.number,timeInPeriod,typeDescKey,details.eventOwnerTeamId,gameId
0,1,00:00,period-start,,2025020004
1,1,00:00,faceoff,8.0,2025020004
2,1,00:17,giveaway,8.0,2025020004
3,1,00:19,hit,8.0,2025020004
4,1,00:27,shot-on-goal,10.0,2025020004
5,1,00:54,shot-on-goal,10.0,2025020004
6,1,00:55,stoppage,,2025020004
7,1,00:55,faceoff,8.0,2025020004
8,1,01:00,goal,10.0,2025020004
9,1,01:00,faceoff,10.0,2025020004


## 7. Draft Data

Access historical NHL draft data including player information, draft position, and team selections.

In [None]:
from scrapernhl import scrapeDraftData

# Get 2025 first round picks
draft_2025_r1 = scrapeDraftData("2025", 1)
print(f"2025 Draft - Round 1: {len(draft_2025_r1)} picks")
display(draft_2025_r1[['round', 'pickInRound', 'overallPick', 'teamAbbrev', 'firstName.default', 'lastName.default',
                       'positionCode', 'countryCode', 'height', 'weight', 'year']].head(10))

2025 Draft - Round 1: 32 picks


Unnamed: 0,round,pickInRound,overallPick,teamAbbrev,firstName.default,lastName.default,positionCode,countryCode,height,weight,year
0,1,1,1,NYI,Matthew,Schaefer,D,CAN,74,186,2025
1,1,2,2,SJS,Michael,Misa,C,CAN,73,182,2025
2,1,3,3,CHI,Anton,Frondell,C,SWE,74,204,2025
3,1,4,4,UTA,Caleb,Desnoyers,C,CAN,74,182,2025
4,1,5,5,NSH,Brady,Martin,C,CAN,72,186,2025
5,1,6,6,PHI,Porter,Martone,RW,CAN,75,204,2025
6,1,7,7,BOS,James,Hagens,C,USA,71,186,2025
7,1,8,8,SEA,Jake,O'Brien,C,CAN,74,177,2025
8,1,9,9,BUF,Radim,Mrtka,D,CZE,78,218,2025
9,1,10,10,ANA,Roger,McQueen,C,CAN,78,198,2025


## 8. Using Polars (Alternative to Pandas)

Polars is a faster alternative to Pandas for large datasets. The scraper supports both output formats.

In [14]:
# Get data as Polars DataFrame (faster for large datasets)
teams_pl = scrapeTeams(output_format="polars")
print(f"Type: {type(teams_pl)}")
print(f"Shape: {teams_pl.shape}")

# Polars syntax
display(teams_pl.select(['name', 'abbrev','id', 'placeName', 'commonName']).head(5))

Type: <class 'polars.dataframe.frame.DataFrame'>
Shape: (32, 12)


name,abbrev,id,placeName,commonName
struct[2],str,i64,struct[2],struct[2]
"{""New Jersey Devils"",""Devils du New Jersey""}","""NJD""",1,"{""New Jersey"",null}","{""Devils"",null}"
"{""New York Islanders"",""Islanders de New York""}","""NYI""",2,"{""NY Islanders"",null}","{""Islanders"",null}"
"{""New York Rangers"",""Rangers de New York""}","""NYR""",3,"{""NY Rangers"",null}","{""Rangers"",null}"
"{""Philadelphia Flyers"",""Flyers de Philadelphie""}","""PHI""",4,"{""Philadelphia"",""Philadelphie""}","{""Flyers"",null}"
"{""Pittsburgh Penguins"",""Penguins de Pittsburgh""}","""PIT""",5,"{""Pittsburgh"",null}","{""Penguins"",null}"


## 9. Backward Compatibility Test

The package maintains backward compatibility with older import styles for ease of migration.

In [None]:
# The old import style still works
from scrapernhl import scrapeTeams, scrapeSchedule

teams_old_style = scrapeTeams()
print(f"Old import style works: {len(teams_old_style)} teams scraped")

Old import style works: 32 teams scraped
