# NHL Official Data Preparation
## Collecting NHL Players' Stats from Official NHL Website
1. Collect Metadata(player_name, player_link) from NHL Official Website by Season and Team
2. Collect Stats from Each Player's Page

### Import Libraries

In [1]:
import nhl_scraper_api as nhl_scraper
import pandas as pd
import os
import time
import random
import glob

### Define Valid Teams and Valid Seasons

In [None]:
valid_teams = [
    "bruins", "sabres", "redwings", "panthers", "canadiens",
    "senators", "lightning", "mapleleafs", "hurricanes", "bluejackets",
    "devils", "islanders", "rangers", "flyers", "penguins",
    "capitals", "blackhawks", "avalanche", "stars", "wild",
    "predators", "blues", "jets", "ducks", "flames",
    "oilers", "kings", "sharks", "kraken", "canucks",
    "goldenknights", "utah"
]

# Valid seasons from 2000-2025 in YYYY-YYYY format
valid_seasons = [f'20{str(i).zfill(2)}-20{str(i + 1).zfill(2)}' for i in range(0, 25)]

### Validate Team Links' Format

In [None]:
nhl_scraper.print_team_links('2024-2025')

### Collect Metadata(player_name, player_link) from NHL Official Website by Season and Team

#### Test API By Collecting 2024-2025 Avalanche Players' Metadata

In [None]:
avalanche_2425_metadata = nhl_scraper.get_player_by_team("avalanche", "2024-2025")

In [None]:
avalanche_2425_metadata

#### Collect All Teams' Metadata for All Seasons

In [None]:
def get_all_teams_metadata(curr_team, driver, wait):
    # Print a divider after each team
    print("================================================================")

    # Print team name
    print(f"Collecting metadata for {curr_team}")

    for i, season in enumerate(valid_seasons):
        # Define output path and ensure directory exists
        output_dir = f'./data/nhl/official/teams/{curr_team}'
        os.makedirs(output_dir, exist_ok=True)  # Creates the folder if it doesn't exist

        curr_team_output_path = f'{output_dir}/{curr_team}_{season}.csv'

        # skip scraping if the file exists
        if os.path.exists(curr_team_output_path):
            print(f'{curr_team_output_path} already exists. Skipping scraping.')
            continue

        # Print Divider
        if i % 5 == 0:
            print("----------------------------------------------------------------")

        curr_team_metadata = nhl_scraper.get_player_by_team_with_reusable_driver(curr_team, season, driver, wait)
        if curr_team_metadata is None:
            print(f'Failed to scrape {curr_team} for {season}')
            continue

        # Write to CSV
        curr_team_metadata.to_csv(curr_team_output_path, index=False, encoding='utf-8-sig')
        print(f'Finished scraping {curr_team} for {season}')

        # Add random sleep to prevent getting blocked
        sleep_time = random.uniform(10, 30)
        print(f"Sleep for {sleep_time / 60:.2f} minutes to prevent getting blocked\n")
        time.sleep(sleep_time)

##### Collecting All Teams' Metadata

In [None]:
from selenium.webdriver.support.wait import WebDriverWait
import undetected_chromedriver as uc

# Setup Chrome Driver ONCE
chrome_options = uc.ChromeOptions()
chrome_options.add_argument("--headless=new")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")

driver = uc.Chrome(version_main=138, options=chrome_options)
wait = WebDriverWait(driver, 15)

In [None]:
for team in valid_teams:
    get_all_teams_metadata(team, driver, wait)

driver.quit()

#### Get unique players' metadata from all teams and seasons

In [None]:
# Get unique players' metadata from all teams and seasons
nhl_players_metadata = pd.DataFrame()

# Read in all csv files in ./data/nhl/official/teams/ and merge them
players_files = glob.glob('./data/nhl/official/teams/**/*.csv', recursive=True)

for player_file in players_files:
    curr_team_metadata = pd.read_csv(player_file)
    nhl_players_metadata = pd.concat([nhl_players_metadata, curr_team_metadata]).reset_index(drop=True)
    nhl_players_metadata = nhl_players_metadata.drop_duplicates(subset=['player_name']).reset_index(drop=True)
    print(f'Finished merging {player_file}')

# Write to CSV
nhl_players_metadata.to_csv('./data/nhl/official/nhl_players_metadata.csv', index=False, encoding='utf-8-sig')

In [None]:
# Read in nhl_players_metadata.csv
nhl_players_metadata_official = pd.read_csv('./data/nhl/official/nhl_players_metadata.csv')

In [None]:
len(nhl_players_metadata_official)

##### Exclude Goalies from the nhl_players_metadata_official

In [None]:
# Exclude rows where the position is G -> Exclude Goalies
nhl_skaters_metadata_official = nhl_players_metadata_official[nhl_players_metadata_official['player_pos'] != 'G']

# Write to CSV
nhl_skaters_metadata_official.to_csv('./data/nhl/official/nhl_skaters_metadata_official.csv', index=False, encoding='utf-8-sig')

In [None]:
len(nhl_skaters_metadata_official)

#### Remove Accent Characters from EP Metadata

In [None]:
from unidecode import unidecode

# Load eliteprospects meta
nhl_skaters_metadata_ep = pd.read_csv('./data/nhl/nhl_players_metadata.csv')
nhl_players_metadata_ep = pd.read_csv('./data/nhl/nhl_players_metadata.csv')

# Apply unidecode to all string columns
for col in nhl_players_metadata_ep.columns:
    if nhl_players_metadata_ep[col].dtype == 'object':
        nhl_players_metadata_ep[col] = nhl_players_metadata_ep[col].apply(unidecode)

# Save the cleaned CSV
nhl_skaters_metadata_ep.to_csv('./data/nhl/nhl_skaters_metadata_accent_cleaned.csv', index=False, encoding='utf-8-sig')

#### Merged EP Metadata with Official Metadata and Skip the Missing Players

In [None]:
# Merge the two dataframes on player_name -> keep only players present in the official dataset
nhl_skaters_metadata_official_ep_merge = pd.merge(nhl_skaters_metadata_official, nhl_skaters_metadata_ep, on='player_name', how='left')

# Rename columns
nhl_skaters_metadata_official_ep_merge = nhl_skaters_metadata_official_ep_merge.rename(columns={
        'player_link': 'player_link_official',
        'link': 'player_link_ep'
    }
)

# Select only the columns we need
nhl_skaters_metadata_official_ep_merge = nhl_skaters_metadata_official_ep_merge[['player_name', 'player_pos', 'player_link_official', 'player_link_ep', 'player_image']]


In [None]:
# Write to CSV
nhl_skaters_metadata_official_ep_merge.to_csv('./data/nhl/nhl_skaters_metadata_official_ep_merge.csv', index=False, encoding='utf-8-sig')

#### Find all players without player_link_ep


In [None]:
# Find all the missing player-rows in official data after merging
players_missing_ep_link = nhl_skaters_metadata_official_ep_merge[nhl_skaters_metadata_official_ep_merge['player_link_ep'].isnull()]

In [None]:
players_missing_ep_link

In [None]:
# Write to CSV
players_missing_ep_link.to_csv('./data/nhl/missing_players_in_official_after_merged.csv', index=False, encoding='utf-8-sig')

#### Manually Add the Missing Players' EP Links
- Later on, we can use the EP Links to match the players' stats from EP to the official data

In [None]:
# Read in manually added missing players' EP links
nhl_skaters_metadata_official_ep_merge_complete = pd.read_csv('./data/nhl/nhl_skaters_metadata_official_ep_merge_complete.csv')

In [None]:
# Find all rows missing player_link_ep
players_missing_ep_link = nhl_skaters_metadata_official_ep_merge_complete[nhl_skaters_metadata_official_ep_merge_complete['player_link_ep'].isnull()]

players_missing_ep_link

##### Find players which is in nhl_skaters_metadata_official_ep_merge_complete, but not in nhl_players_metadata by player_link_ep

In [None]:
# Find players which are in nhl_skaters_metadata_official_ep_merge_complete, but not in nhl_players_metadata by player_link_ep by player_link_ep
# Load eliteprospects meta
nhl_players_metadata_ep = pd.read_csv('./data/nhl/nhl_players_metadata.csv')
nhl_skaters_metadata_official_ep_merge_complete = pd.read_csv('./data/nhl/nhl_skaters_metadata_official_ep_merge_complete_2.csv')

# Find players which are in nhl_skaters_metadata_official_ep_merge_complete, but not in nhl_players_metadata by player_link_ep
players_missing_after_manual_merge = nhl_players_metadata_ep[~nhl_players_metadata_ep['link'].isin(nhl_skaters_metadata_official_ep_merge_complete['player_link_ep'])]

In [None]:
print(f"nhl_players_metadata_ep: {len(nhl_players_metadata_ep)}")
print(f"nhl_skaters_metadata_official_ep_merge_complete: {len(nhl_skaters_metadata_official_ep_merge_complete)}")

In [None]:
players_missing_after_manual_merge

In [None]:
# Remove duplicates based on player_link_ep for nhl_skaters_metadata_official_ep_merge_complete_2
nhl_skaters_metadata_official_ep_merge_complete = nhl_skaters_metadata_official_ep_merge_complete.drop_duplicates(subset=['player_link_ep']).reset_index(drop=True)

In [None]:
len(nhl_skaters_metadata_official_ep_merge_complete)

In [None]:
# Write to CSV
nhl_skaters_metadata_official_ep_merge_complete.to_csv('./data/nhl/nhl_skaters_metadata_official_ep_merge_complete_final.csv', index=False, encoding='utf-8-sig')

##### Update player_image URL to the latest
- original: https://assets.nhle.com/mugs/nhl/{season}/{team}/{player_id}.png
- latest: https://assets.nhle.com/mugs/nhl/latest/{player_id}

In [None]:
# Load nhl_skaters_metadata_official_ep_merge_complete_final.csv
nhl_skaters_metadata_official_ep_merge_complete_final = pd.read_csv('./data/nhl/nhl_skaters_metadata_official_ep_merge_complete_final.csv')

# Update player_image URL to the latest
nhl_skaters_metadata_official_ep_merge_complete_final['player_image'] = nhl_skaters_metadata_official_ep_merge_complete_final['player_link_official'].apply(lambda x: f"https://assets.nhle.com/mugs/nhl/latest/{x.split('/')[-1]}.png")

In [None]:
nhl_skaters_metadata_official_ep_merge_complete_final.head(10)

In [None]:
# Write to CSV to update image
nhl_skaters_metadata_official_ep_merge_complete_final.to_csv('./data/nhl/nhl_skaters_metadata_official_ep_merge_complete_final.csv', index=False, encoding='utf-8-sig')

#### Collect Stats from Each Player's Page

##### Initiate the Chrome Driver

In [2]:
from selenium.webdriver.support.wait import WebDriverWait
import undetected_chromedriver as uc

# Setup Chrome Driver ONCE
chrome_options = uc.ChromeOptions()
chrome_options.add_argument("--headless=new")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")

driver = uc.Chrome(version_main=138, options=chrome_options)
wait = WebDriverWait(driver, 15)

##### Test get_player_stats API

In [3]:
# Load CSV File
nhl_skaters_metadata_official_ep_merge_complete_final = pd.read_csv('./data/nhl/nhl_skaters_metadata_official_ep_merge_complete_final.csv')

In [None]:
# Get Cale Makar's Metadata which is at index 233
cale_makar_metadata = nhl_skaters_metadata_official_ep_merge_complete_final.iloc[233]
cale_makar_metadata

In [None]:
# Passed in to the get stats function
cale_makar_stats = nhl_scraper.get_player_stats_with_reusable_driver(cale_makar_metadata, driver, wait)

In [None]:
cale_makar_stats

##### Test Player without Playoffs Data in All Leagues Tab: Rob Blake

In [4]:
# Get Rob Blake's Metadata which is at index 233
rob_blake_metadata = nhl_skaters_metadata_official_ep_merge_complete_final.iloc[17]
rob_blake_metadata

player_name                                                     Rob Blake
player_pos                                                              D
player_link_official                   https://www.nhl.com/player/8445550
player_link_ep          https://www.eliteprospects.com/player/8580/rob...
player_image            https://assets.nhle.com/mugs/nhl/latest/844555...
Name: 17, dtype: object

In [5]:
# Passed in to the get stats function
rob_blake_stats = nhl_scraper.get_player_stats_with_reusable_driver(rob_blake_metadata, driver, wait)

Collecting Rob Blake's stats from https://www.nhl.com/player/8445550
Scraping 'All Leagues' regular season stats for Rob Blake
Successfully located dropdown button
Successfully clicked dropdown button
Successfully located all leagues option
Successfully clicked all leagues option
Successfully scraped regular season stats
Scraping 'playoff stats' for Rob Blake
Successfully located game-type dropdown button
Successfully clicked playoffs dropdown button
Failed to scrape playoff stats for Rob Blake in 'All Leagues' Tab
Scraping 'playoff stats' for Rob Blake in 'NHL' Tab
Successfully located dropdown button
Successfully clicked dropdown button
Successfully located NHL option
Successfully clicked NHL option
Successfully located game-type dropdown button
Successfully clicked playoffs dropdown button
Successfully selected 'Playoffs' option
Successfully scraped playoff stats in 'NHL' Tab


In [6]:
rob_blake_stats

Unnamed: 0,player_name,season,league,team,gp_regular,g_regular,a_regular,p_regular,plus_minus_regular,pim_regular,...,ppg_playoffs,ppp_playoffs,shg_playoffs,shp_playoffs,toi_per_game_playoffs,gwg_playoffs,otg_playoffs,sog_playoffs,shooting_pct_playoffs,fo_pct_playoffs
0,Rob Blake,1985-86,OHA-B,Brantford,39,3,13,16,,43,...,,,,,,,,,,
1,Rob Blake,1986-87,OHA-B,Stratford,31,11,20,31,,115,...,,,,,,,,,,
2,Rob Blake,1987-88,CCHA,Bowling Green,43,5,8,13,,88,...,,,,,,,,,,
3,Rob Blake,1988-89,CCHA,Bowling Green,46,11,21,32,,140,...,,,,,,,,,,
4,Rob Blake,1989-90,NHL,Los Angeles Kings,4,0,0,0,0.0,4,...,1.0,1.0,0.0,0.0,,0.0,0.0,11.0,9.1,
5,Rob Blake,1989-90,CCHA,Bowling Green,42,23,36,59,,140,...,,,,,,,,,,
6,Rob Blake,1990-91,NHL,Los Angeles Kings,75,12,34,46,3.0,125,...,1.0,3.0,0.0,0.0,,0.0,0.0,19.0,5.3,
7,Rob Blake,1990-91,WEC-A,Canada,2,0,2,2,,0,...,,,,,,,,,,
8,Rob Blake,1991-92,NHL,Los Angeles Kings,57,7,13,20,-5.0,102,...,0.0,1.0,0.0,0.0,,0.0,0.0,12.0,16.7,
9,Rob Blake,1992-93,NHL,Los Angeles Kings,76,16,43,59,18.0,152,...,1.0,3.0,1.0,1.0,,0.0,0.0,60.0,6.7,


##### Test Player without Playoffs Data in All Leagues Tab but has data in NHL Tab: Landon Slaggert

In [4]:
# Get Landon Slaggert's Metadata which is at index 614
landon_slaggert_metadata = nhl_skaters_metadata_official_ep_merge_complete_final.iloc[613]

In [5]:
landon_slaggert_metadata

player_name                                               Landon Slaggert
player_pos                                                             LW
player_link_official                   https://www.nhl.com/player/8482172
player_link_ep          https://www.eliteprospects.com/player/418245/l...
player_image            https://assets.nhle.com/mugs/nhl/latest/848217...
Name: 613, dtype: object

In [6]:
# Passed in to the get stats function
landon_slaggert_stats = nhl_scraper.get_player_stats_with_reusable_driver(landon_slaggert_metadata, driver, wait)

Collecting Landon Slaggert's stats from https://www.nhl.com/player/8482172
Scraping 'All Leagues' regular season stats for Landon Slaggert
Successfully located dropdown button
Successfully clicked dropdown button
Successfully located all leagues option
Successfully clicked all leagues option
Successfully scraped regular season stats
Scraping 'playoff stats' for Landon Slaggert
Successfully located game-type dropdown button
Successfully clicked game-type dropdown button
Failed to scrape playoff stats for Landon Slaggert in 'All Leagues' Tab
Scraping 'playoff stats' for Landon Slaggert in 'NHL' Tab
Successfully located dropdown button
Successfully clicked dropdown button
Successfully located NHL option
Successfully clicked NHL option
Successfully located game-type dropdown button
Successfully clicked game-type dropdown button
Failed to scrape playoff stats for Landon Slaggert in 'NHL' Tab
Failed to scrape playoff stats for Landon Slaggert in both 'All Leagues' and 'NHL' Tabs
No playoff s

In [7]:
landon_slaggert_stats

Unnamed: 0,Player,Season,League,Team,GP,G,A,P,+/-,PIM,PPG,PPP,SHG,SHP,TOI/G,GWG,OTG,S,S%,FO%
0,Landon Slaggert,2016-17,HPHL 14U,Chicago Mission 14U AAA,20,6,10,16,,10.0,,,,,,,,,,
1,Landon Slaggert,2016-17,WSI U15,STX Selects U15,7,4,4,8,,2.0,,,,,,,,,,
2,Landon Slaggert,2017-18,HPHL 16U,Chicago Mission 16U AAA,20,8,12,20,,0.0,,,,,,,,,,
3,Landon Slaggert,2017-18,USA-S15,Team Kelly,3,2,1,3,,,,,,,,,,,,
4,Landon Slaggert,2018-19,NTDP,U.S. National U18 Team,3,0,0,0,,2.0,,,,,,,,,,
5,Landon Slaggert,2018-19,NTDP,U.S. National U17 Team,48,10,14,24,,56.0,,,,,,,,,,
6,Landon Slaggert,2018-19,USHL,USNTDP Juniors,30,6,8,14,-10.0,16.0,,,,,,,,,,
7,Landon Slaggert,2018-19,WHC-17,USA U17,5,2,1,3,,14.0,,,,,,,,,,
8,Landon Slaggert,2019-20,NTDP,U.S. National U18 Team,47,13,11,24,,32.0,,,,,,,,,,
9,Landon Slaggert,2019-20,USHL,USNTDP Juniors,19,6,4,10,-3.0,14.0,,,,,,,,,,


In [3]:
driver.quit()

### Collect Stats for All Players

In [4]:
# Load CSV File
nhl_skaters_metadata_official_ep_merge_complete_final = pd.read_csv('./data/nhl/nhl_skaters_metadata_official_ep_merge_complete_final.csv')

#### Initiate Reusable Driver

In [5]:
from selenium.webdriver.support.wait import WebDriverWait
import undetected_chromedriver as uc

# Set up Chrome Driver ONCE
chrome_options = uc.ChromeOptions()
chrome_options.add_argument("--headless=new")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")

driver = uc.Chrome(version_main=138, options=chrome_options)
wait = WebDriverWait(driver, 15)

#### Define Output File

In [6]:
official_stats_output_path = './data/nhl/official/stats/nhl_players_official_stats.csv'

#### Define Function to Get Player Stats in Batches

In [7]:
def get_players_stats_by_batch(players_to_scrape, driver, wait, output_path):
    curr_len = len(players_to_scrape)
    fail_count = 0

    for i in range(curr_len):
        player_metadata = players_to_scrape.iloc[i]
        player_name = player_metadata['player_name']
        player_url = player_metadata['player_link_official']
        print(f"\n [{i + 1}] Collecting stats for {player_name} at {player_url}")

        try:
            player_stats = nhl_scraper.get_player_stats_with_reusable_driver(player_metadata, driver, wait)

            # Write to CSV file
            if os.path.exists(output_path):
                player_stats.to_csv(output_path, mode='a', header=False, index=False, encoding='utf-8-sig')
            else:
                player_stats.to_csv(output_path, index=False, encoding='utf-8-sig')
            print(f'Successfully scraped stats for {player_name}')

            # Print Fail Rate
            print(f'Failed rate: {fail_count / (i + 1):.2f}')

            # Add random sleep to prevent getting blocked
            if i < curr_len - 1:
                sleep_time = random.uniform(10, 120)
                print(f"Sleep for {sleep_time / 60:.2f} minutes to prevent getting blocked")
                time.sleep(sleep_time)
        except Exception as e:
            print(f"Failed to get stats for {player_name}: {e}")

            fail_count += 1

            if i < curr_len - 1:
                # Sleep for 15-60 seconds before trying the next player
                sleep_time = random.uniform(10, 60)
                print(f"Sleeping for {sleep_time / 60:.2f} seconds before trying the next player")
                time.sleep(sleep_time)

##### Collect Stats for All Players

In [8]:
# Scrape 0 to 10 players' official stats
curr_players_metadata = nhl_skaters_metadata_official_ep_merge_complete_final[0:10]
get_players_stats_by_batch(curr_players_metadata, driver, wait, official_stats_output_path)


 [1] Collecting stats for Joe Sakic at https://www.nhl.com/player/8451101
Collecting Joe Sakic's stats from https://www.nhl.com/player/8451101
Scraping 'All Leagues' regular season stats for Joe Sakic
Successfully located dropdown button
Successfully clicked dropdown button
Successfully located all leagues option
Successfully clicked all leagues option
Successfully scraped regular season stats
Scraping 'playoff stats' for Joe Sakic
Successfully located game-type dropdown button
Successfully clicked game-type dropdown button
Successfully selected 'Playoffs' option
Successfully scraped playoff stats
Successfully scraped stats for Joe Sakic
Failed rate: 0.00
Sleep for 0.64 minutes to prevent getting blocked

 [2] Collecting stats for Peter Forsberg at https://www.nhl.com/player/8458520
Collecting Peter Forsberg's stats from https://www.nhl.com/player/8458520
Scraping 'All Leagues' regular season stats for Peter Forsberg
Successfully located dropdown button
Successfully clicked dropdown b

In [9]:
# Scrape 10 to 40 players' official stats
curr_players_metadata = nhl_skaters_metadata_official_ep_merge_complete_final[10:40]
get_players_stats_by_batch(curr_players_metadata, driver, wait, official_stats_output_path)


 [1] Collecting stats for Greg De Vries at https://www.nhl.com/player/8460254
Collecting Greg De Vries's stats from https://www.nhl.com/player/8460254
Scraping 'All Leagues' regular season stats for Greg De Vries
Successfully located dropdown button
Successfully clicked dropdown button
Successfully located all leagues option
Successfully clicked all leagues option
Successfully scraped regular season stats
Scraping 'playoff stats' for Greg De Vries
Successfully located game-type dropdown button
Successfully clicked game-type dropdown button
Successfully selected 'Playoffs' option
Successfully scraped playoff stats
Successfully scraped stats for Greg De Vries
Failed rate: 0.00
Sleep for 1.95 minutes to prevent getting blocked

 [2] Collecting stats for Dan Hinote at https://www.nhl.com/player/8465175
Collecting Dan Hinote's stats from https://www.nhl.com/player/8465175
Scraping 'All Leagues' regular season stats for Dan Hinote
Successfully located dropdown button
Successfully clicked dr

In [10]:
# Scrape 40 to 50 players' official stats
curr_players_metadata = nhl_skaters_metadata_official_ep_merge_complete_final[40:50]
get_players_stats_by_batch(curr_players_metadata, driver, wait, official_stats_output_path)


 [1] Collecting stats for Jaroslav Obsut at https://www.nhl.com/player/8462220
Collecting Jaroslav Obsut's stats from https://www.nhl.com/player/8462220
Scraping 'All Leagues' regular season stats for Jaroslav Obsut
Successfully located dropdown button
Successfully clicked dropdown button
Successfully located all leagues option
Successfully clicked all leagues option
Successfully scraped regular season stats
Scraping 'playoff stats' for Jaroslav Obsut
Successfully located game-type dropdown button
Successfully clicked game-type dropdown button
Successfully selected 'Playoffs' option
Successfully scraped playoff stats
Successfully scraped stats for Jaroslav Obsut
Failed rate: 0.00
Sleep for 0.79 minutes to prevent getting blocked

 [2] Collecting stats for Steve Moore at https://www.nhl.com/player/8467379
Collecting Steve Moore's stats from https://www.nhl.com/player/8467379
Scraping 'All Leagues' regular season stats for Steve Moore
Successfully located dropdown button
Successfully cl

In [11]:
# Scrape 50 to 100 players' official stats
curr_players_metadata = nhl_skaters_metadata_official_ep_merge_complete_final[50:100]
get_players_stats_by_batch(curr_players_metadata, driver, wait, official_stats_output_path)


 [1] Collecting stats for Chris McAllister at https://www.nhl.com/player/8462068
Collecting Chris McAllister's stats from https://www.nhl.com/player/8462068
Scraping 'All Leagues' regular season stats for Chris McAllister
Successfully located dropdown button
Successfully clicked dropdown button
Successfully located all leagues option
Successfully clicked all leagues option
Successfully scraped regular season stats
Scraping 'playoff stats' for Chris McAllister
Successfully located game-type dropdown button
Successfully clicked game-type dropdown button
Successfully selected 'Playoffs' option
Successfully scraped playoff stats
Successfully scraped stats for Chris McAllister
Failed rate: 0.00
Sleep for 0.33 minutes to prevent getting blocked

 [2] Collecting stats for Steve Brule at https://www.nhl.com/player/8459566
Collecting Steve Brule's stats from https://www.nhl.com/player/8459566
Scraping 'All Leagues' regular season stats for Steve Brule
Successfully located dropdown button
Succe

In [12]:
# Scrape 100 to 200 players' official stats
curr_players_metadata = nhl_skaters_metadata_official_ep_merge_complete_final[100:200]
get_players_stats_by_batch(curr_players_metadata, driver, wait, official_stats_output_path)


 [1] Collecting stats for David Jones at https://www.nhl.com/player/8470877
Collecting David Jones's stats from https://www.nhl.com/player/8470877
Scraping 'All Leagues' regular season stats for David Jones
Successfully located dropdown button
Successfully clicked dropdown button
Successfully located all leagues option
Successfully clicked all leagues option
Successfully scraped regular season stats
Scraping 'playoff stats' for David Jones
Successfully located game-type dropdown button
Successfully clicked game-type dropdown button
Successfully selected 'Playoffs' option
Successfully scraped playoff stats
Successfully scraped stats for David Jones
Failed rate: 0.00
Sleep for 0.78 minutes to prevent getting blocked

 [2] Collecting stats for Wyatt Smith at https://www.nhl.com/player/8466357
Collecting Wyatt Smith's stats from https://www.nhl.com/player/8466357
Scraping 'All Leagues' regular season stats for Wyatt Smith
Successfully located dropdown button
Successfully clicked dropdown 

In [13]:
# Scrape 200 to 300 players' official stats
curr_players_metadata = nhl_skaters_metadata_official_ep_merge_complete_final[200:300]
get_players_stats_by_batch(curr_players_metadata, driver, wait, official_stats_output_path)


 [1] Collecting stats for Rene Bourque at https://www.nhl.com/player/8471504
Collecting Rene Bourque's stats from https://www.nhl.com/player/8471504
Scraping 'All Leagues' regular season stats for Rene Bourque
Successfully located dropdown button
Successfully clicked dropdown button
Successfully located all leagues option
Successfully clicked all leagues option
Successfully scraped regular season stats
Scraping 'playoff stats' for Rene Bourque
Successfully located game-type dropdown button
Successfully clicked game-type dropdown button
Successfully selected 'Playoffs' option
Successfully scraped playoff stats
Successfully scraped stats for Rene Bourque
Failed rate: 0.00
Sleep for 1.70 minutes to prevent getting blocked

 [2] Collecting stats for Sven Andrighetto at https://www.nhl.com/player/8477413
Collecting Sven Andrighetto's stats from https://www.nhl.com/player/8477413
Scraping 'All Leagues' regular season stats for Sven Andrighetto
Successfully located dropdown button
Successful

In [14]:
# Scrape 300 to 400 players' official stats
curr_players_metadata = nhl_skaters_metadata_official_ep_merge_complete_final[300:400]
get_players_stats_by_batch(curr_players_metadata, driver, wait, official_stats_output_path)


 [1] Collecting stats for Caleb Jones at https://www.nhl.com/player/8478452
Collecting Caleb Jones's stats from https://www.nhl.com/player/8478452
Scraping 'All Leagues' regular season stats for Caleb Jones
Successfully located dropdown button
Successfully clicked dropdown button
Successfully located all leagues option
Successfully clicked all leagues option
Successfully scraped regular season stats
Scraping 'playoff stats' for Caleb Jones
Successfully located game-type dropdown button
Successfully clicked game-type dropdown button
Successfully selected 'Playoffs' option
Successfully scraped playoff stats
Successfully scraped stats for Caleb Jones
Failed rate: 0.00
Sleep for 0.35 minutes to prevent getting blocked

 [2] Collecting stats for Yakov Trenin at https://www.nhl.com/player/8478508
Collecting Yakov Trenin's stats from https://www.nhl.com/player/8478508
Scraping 'All Leagues' regular season stats for Yakov Trenin
Successfully located dropdown button
Successfully clicked dropdo

In [15]:
# Scrape 400 to 500 players' official stats
curr_players_metadata = nhl_skaters_metadata_official_ep_merge_complete_final[400:500]
get_players_stats_by_batch(curr_players_metadata, driver, wait, official_stats_output_path)


 [1] Collecting stats for Dustin Byfuglien at https://www.nhl.com/player/8470834
Collecting Dustin Byfuglien's stats from https://www.nhl.com/player/8470834
Scraping 'All Leagues' regular season stats for Dustin Byfuglien
Successfully located dropdown button
Successfully clicked dropdown button
Successfully located all leagues option
Successfully clicked all leagues option
Successfully scraped regular season stats
Scraping 'playoff stats' for Dustin Byfuglien
Successfully located game-type dropdown button
Successfully clicked game-type dropdown button
Successfully selected 'Playoffs' option
Successfully scraped playoff stats
Successfully scraped stats for Dustin Byfuglien
Failed rate: 0.00
Sleep for 1.72 minutes to prevent getting blocked

 [2] Collecting stats for Brandon Bochenski at https://www.nhl.com/player/8469666
Collecting Brandon Bochenski's stats from https://www.nhl.com/player/8469666
Scraping 'All Leagues' regular season stats for Brandon Bochenski
Successfully located dro

In [16]:
# Scrape 500 to 600 players' official stats
curr_players_metadata = nhl_skaters_metadata_official_ep_merge_complete_final[500:600]
get_players_stats_by_batch(curr_players_metadata, driver, wait, official_stats_output_path)


 [1] Collecting stats for Klas Dahlbeck at https://www.nhl.com/player/8476403
Collecting Klas Dahlbeck's stats from https://www.nhl.com/player/8476403
Scraping 'All Leagues' regular season stats for Klas Dahlbeck
Successfully located dropdown button
Successfully clicked dropdown button
Successfully located all leagues option
Successfully clicked all leagues option
Successfully scraped regular season stats
Scraping 'playoff stats' for Klas Dahlbeck
Successfully located game-type dropdown button
Successfully clicked game-type dropdown button
Successfully selected 'Playoffs' option
Successfully scraped playoff stats
Successfully scraped stats for Klas Dahlbeck
Failed rate: 0.00
Sleep for 1.59 minutes to prevent getting blocked

 [2] Collecting stats for Michael Paliotta at https://www.nhl.com/player/8476394
Collecting Michael Paliotta's stats from https://www.nhl.com/player/8476394
Scraping 'All Leagues' regular season stats for Michael Paliotta
Successfully located dropdown button
Succe

In [17]:
# Scrape 600 to 700 players' official stats
curr_players_metadata = nhl_skaters_metadata_official_ep_merge_complete_final[600:700]
get_players_stats_by_batch(curr_players_metadata, driver, wait, official_stats_output_path)


 [1] Collecting stats for Nikita Zaitsev at https://www.nhl.com/player/8479458
Collecting Nikita Zaitsev's stats from https://www.nhl.com/player/8479458
Scraping 'All Leagues' regular season stats for Nikita Zaitsev
Successfully located dropdown button
Successfully clicked dropdown button
Successfully located all leagues option
Successfully clicked all leagues option
Successfully scraped regular season stats
Scraping 'playoff stats' for Nikita Zaitsev
Successfully located game-type dropdown button
Successfully clicked game-type dropdown button
Successfully selected 'Playoffs' option
Successfully scraped playoff stats
Successfully scraped stats for Nikita Zaitsev
Failed rate: 0.00
Sleep for 1.15 minutes to prevent getting blocked

 [2] Collecting stats for Wyatt Kaiser at https://www.nhl.com/player/8482176
Collecting Wyatt Kaiser's stats from https://www.nhl.com/player/8482176
Scraping 'All Leagues' regular season stats for Wyatt Kaiser
Successfully located dropdown button
Successfully

In [18]:
# Scrape 700 to 800 players' official stats
curr_players_metadata = nhl_skaters_metadata_official_ep_merge_complete_final[700:800]
get_players_stats_by_batch(curr_players_metadata, driver, wait, official_stats_output_path)


 [1] Collecting stats for Jason Chimera at https://www.nhl.com/player/8466251
Collecting Jason Chimera's stats from https://www.nhl.com/player/8466251
Scraping 'All Leagues' regular season stats for Jason Chimera
Successfully located dropdown button
Successfully clicked dropdown button
Successfully located all leagues option
Successfully clicked all leagues option
Successfully scraped regular season stats
Scraping 'playoff stats' for Jason Chimera
Successfully located game-type dropdown button
Successfully clicked game-type dropdown button
Successfully selected 'Playoffs' option
Successfully scraped playoff stats
Successfully scraped stats for Jason Chimera
Failed rate: 0.00
Sleep for 0.71 minutes to prevent getting blocked

 [2] Collecting stats for Jaroslav Balastik at https://www.nhl.com/player/8470185
Collecting Jaroslav Balastik's stats from https://www.nhl.com/player/8470185
Scraping 'All Leagues' regular season stats for Jaroslav Balastik
Successfully located dropdown button
Su

In [19]:
# Scrape 800 to 900 players' official stats
curr_players_metadata = nhl_skaters_metadata_official_ep_merge_complete_final[800:900]
get_players_stats_by_batch(curr_players_metadata, driver, wait, official_stats_output_path)


 [1] Collecting stats for Gregory Campbell at https://www.nhl.com/player/8470169
Collecting Gregory Campbell's stats from https://www.nhl.com/player/8470169
Scraping 'All Leagues' regular season stats for Gregory Campbell
Successfully located dropdown button
Successfully clicked dropdown button
Successfully located all leagues option
Successfully clicked all leagues option
Successfully scraped regular season stats
Scraping 'playoff stats' for Gregory Campbell
Successfully located game-type dropdown button
Successfully clicked game-type dropdown button
Successfully selected 'Playoffs' option
Successfully scraped playoff stats
Successfully scraped stats for Gregory Campbell
Failed rate: 0.00
Sleep for 1.34 minutes to prevent getting blocked

 [2] Collecting stats for Oliver Bjorkstrand at https://www.nhl.com/player/8477416
Collecting Oliver Bjorkstrand's stats from https://www.nhl.com/player/8477416
Scraping 'All Leagues' regular season stats for Oliver Bjorkstrand
Successfully located 

In [20]:
# Scrape 900 to 1000 players' official stats
curr_players_metadata = nhl_skaters_metadata_official_ep_merge_complete_final[900:1000]
get_players_stats_by_batch(curr_players_metadata, driver, wait, official_stats_output_path)


 [1] Collecting stats for Jeff Finley at https://www.nhl.com/player/8446826
Collecting Jeff Finley's stats from https://www.nhl.com/player/8446826
Scraping 'All Leagues' regular season stats for Jeff Finley
Successfully located dropdown button
Successfully clicked dropdown button
Successfully located all leagues option
Successfully clicked all leagues option
Successfully scraped regular season stats
Scraping 'playoff stats' for Jeff Finley
Successfully located game-type dropdown button
Successfully clicked game-type dropdown button
Successfully selected 'Playoffs' option
Successfully scraped playoff stats
Successfully scraped stats for Jeff Finley
Failed rate: 0.00
Sleep for 0.99 minutes to prevent getting blocked

 [2] Collecting stats for Bryce Salvador at https://www.nhl.com/player/8460626
Collecting Bryce Salvador's stats from https://www.nhl.com/player/8460626
Scraping 'All Leagues' regular season stats for Bryce Salvador
Successfully located dropdown button
Successfully clicked 

In [21]:
# Scrape 1000 to 1100 players' official stats
curr_players_metadata = nhl_skaters_metadata_official_ep_merge_complete_final[1000:1100]
get_players_stats_by_batch(curr_players_metadata, driver, wait, official_stats_output_path)


 [1] Collecting stats for Jaden Schwartz at https://www.nhl.com/player/8475768
Collecting Jaden Schwartz's stats from https://www.nhl.com/player/8475768
Scraping 'All Leagues' regular season stats for Jaden Schwartz
Successfully located dropdown button
Successfully clicked dropdown button
Successfully located all leagues option
Successfully clicked all leagues option
Successfully scraped regular season stats
Scraping 'playoff stats' for Jaden Schwartz
Successfully located game-type dropdown button
Successfully clicked game-type dropdown button
Successfully selected 'Playoffs' option
Successfully scraped playoff stats
Successfully scraped stats for Jaden Schwartz
Failed rate: 0.00
Sleep for 0.88 minutes to prevent getting blocked

 [2] Collecting stats for Cade Fairchild at https://www.nhl.com/player/8474029
Collecting Cade Fairchild's stats from https://www.nhl.com/player/8474029
Scraping 'All Leagues' regular season stats for Cade Fairchild
Successfully located dropdown button
Succes

In [22]:
# Scrape 1100 to 1200 players' official stats
curr_players_metadata = nhl_skaters_metadata_official_ep_merge_complete_final[1100:1200]
get_players_stats_by_batch(curr_players_metadata, driver, wait, official_stats_output_path)


 [1] Collecting stats for Jonathan Girard at https://www.nhl.com/player/8467374
Collecting Jonathan Girard's stats from https://www.nhl.com/player/8467374
Scraping 'All Leagues' regular season stats for Jonathan Girard
Successfully located dropdown button
Successfully clicked dropdown button
Successfully located all leagues option
Successfully clicked all leagues option
Successfully scraped regular season stats
Scraping 'playoff stats' for Jonathan Girard
Successfully located game-type dropdown button
Successfully clicked game-type dropdown button
Successfully selected 'Playoffs' option
Successfully scraped playoff stats
Successfully scraped stats for Jonathan Girard
Failed rate: 0.00
Sleep for 0.79 minutes to prevent getting blocked

 [2] Collecting stats for Don Sweeney at https://www.nhl.com/player/8451819
Collecting Don Sweeney's stats from https://www.nhl.com/player/8451819
Scraping 'All Leagues' regular season stats for Don Sweeney
Successfully located dropdown button
Successful

In [23]:
# Scrape 1200 to 1300 players' official stats
curr_players_metadata = nhl_skaters_metadata_official_ep_merge_complete_final[1200:1300]
get_players_stats_by_batch(curr_players_metadata, driver, wait, official_stats_output_path)


 [1] Collecting stats for Zach Hamill at https://www.nhl.com/player/8474045
Collecting Zach Hamill's stats from https://www.nhl.com/player/8474045
Scraping 'All Leagues' regular season stats for Zach Hamill
Successfully located dropdown button
Successfully clicked dropdown button
Successfully located all leagues option
Successfully clicked all leagues option
Successfully scraped regular season stats
Scraping 'playoff stats' for Zach Hamill
Successfully located game-type dropdown button
Successfully clicked game-type dropdown button
Successfully selected 'Playoffs' option
Successfully scraped playoff stats
Successfully scraped stats for Zach Hamill
Failed rate: 0.00
Sleep for 1.31 minutes to prevent getting blocked

 [2] Collecting stats for Brad Marchand at https://www.nhl.com/player/8473419
Collecting Brad Marchand's stats from https://www.nhl.com/player/8473419
Scraping 'All Leagues' regular season stats for Brad Marchand
Successfully located dropdown button
Successfully clicked dro

In [24]:
# Scrape 1300 to 1400 players' official stats
curr_players_metadata = nhl_skaters_metadata_official_ep_merge_complete_final[1300:1400]
get_players_stats_by_batch(curr_players_metadata, driver, wait, official_stats_output_path)


 [1] Collecting stats for Jakub Lauko at https://www.nhl.com/player/8480880
Collecting Jakub Lauko's stats from https://www.nhl.com/player/8480880
Scraping 'All Leagues' regular season stats for Jakub Lauko
Successfully located dropdown button
Successfully clicked dropdown button
Successfully located all leagues option
Successfully clicked all leagues option
Successfully scraped regular season stats
Scraping 'playoff stats' for Jakub Lauko
Successfully located game-type dropdown button
Successfully clicked game-type dropdown button
Successfully selected 'Playoffs' option
Successfully scraped playoff stats
Successfully scraped stats for Jakub Lauko
Failed rate: 0.00
Sleep for 1.67 minutes to prevent getting blocked

 [2] Collecting stats for Garnet Hathaway at https://www.nhl.com/player/8477903
Collecting Garnet Hathaway's stats from https://www.nhl.com/player/8477903
Scraping 'All Leagues' regular season stats for Garnet Hathaway
Successfully located dropdown button
Successfully click

In [25]:
# Scrape 1400 to 1500 players' official stats
curr_players_metadata = nhl_skaters_metadata_official_ep_merge_complete_final[1400:1500]
get_players_stats_by_batch(curr_players_metadata, driver, wait, official_stats_output_path)


 [1] Collecting stats for Georges Laraque at https://www.nhl.com/player/8462060
Collecting Georges Laraque's stats from https://www.nhl.com/player/8462060
Scraping 'All Leagues' regular season stats for Georges Laraque
Successfully located dropdown button
Successfully clicked dropdown button
Successfully located all leagues option
Successfully clicked all leagues option
Successfully scraped regular season stats
Scraping 'playoff stats' for Georges Laraque
Successfully located game-type dropdown button
Successfully clicked game-type dropdown button
Successfully selected 'Playoffs' option
Successfully scraped playoff stats
Successfully scraped stats for Georges Laraque
Failed rate: 0.00
Sleep for 0.48 minutes to prevent getting blocked

 [2] Collecting stats for Yannick Weber at https://www.nhl.com/player/8474134
Collecting Yannick Weber's stats from https://www.nhl.com/player/8474134
Scraping 'All Leagues' regular season stats for Yannick Weber
Successfully located dropdown button
Succ

In [26]:
# Scrape 1500 to 1600 players' official stats
curr_players_metadata = nhl_skaters_metadata_official_ep_merge_complete_final[1500:1600]
get_players_stats_by_batch(curr_players_metadata, driver, wait, official_stats_output_path)


 [1] Collecting stats for Christian Dvorak at https://www.nhl.com/player/8477989
Collecting Christian Dvorak's stats from https://www.nhl.com/player/8477989
Scraping 'All Leagues' regular season stats for Christian Dvorak
Successfully located dropdown button
Successfully clicked dropdown button
Successfully located all leagues option
Successfully clicked all leagues option
Successfully scraped regular season stats
Scraping 'playoff stats' for Christian Dvorak
Successfully located game-type dropdown button
Successfully clicked game-type dropdown button
Successfully selected 'Playoffs' option
Successfully scraped playoff stats
Successfully scraped stats for Christian Dvorak
Failed rate: 0.00
Sleep for 1.16 minutes to prevent getting blocked

 [2] Collecting stats for Chris Wideman at https://www.nhl.com/player/8475227
Collecting Chris Wideman's stats from https://www.nhl.com/player/8475227
Scraping 'All Leagues' regular season stats for Chris Wideman
Successfully located dropdown button

In [27]:
# Scrape 1600 to 1700 players' official stats
curr_players_metadata = nhl_skaters_metadata_official_ep_merge_complete_final[1600:1700]
get_players_stats_by_batch(curr_players_metadata, driver, wait, official_stats_output_path)


 [1] Collecting stats for Luc Bourdon at https://www.nhl.com/player/8471684
Collecting Luc Bourdon's stats from https://www.nhl.com/player/8471684
Scraping 'All Leagues' regular season stats for Luc Bourdon
Successfully located dropdown button
Successfully clicked dropdown button
Successfully located all leagues option
Successfully clicked all leagues option
Successfully scraped regular season stats
Scraping 'playoff stats' for Luc Bourdon
Successfully located game-type dropdown button
Successfully clicked game-type dropdown button
Successfully selected 'Playoffs' option
Successfully scraped playoff stats
Successfully scraped stats for Luc Bourdon
Failed rate: 0.00
Sleep for 1.80 minutes to prevent getting blocked

 [2] Collecting stats for Mason Raymond at https://www.nhl.com/player/8471664
Collecting Mason Raymond's stats from https://www.nhl.com/player/8471664
Scraping 'All Leagues' regular season stats for Mason Raymond
Successfully located dropdown button
Successfully clicked dro

In [28]:
# Scrape 1700 to 1800 players' official stats
curr_players_metadata = nhl_skaters_metadata_official_ep_merge_complete_final[1700:1800]
get_players_stats_by_batch(curr_players_metadata, driver, wait, official_stats_output_path)


 [1] Collecting stats for J.T. Miller at https://www.nhl.com/player/8476468
Collecting J.T. Miller's stats from https://www.nhl.com/player/8476468
Scraping 'All Leagues' regular season stats for J.T. Miller
Successfully located dropdown button
Successfully clicked dropdown button
Successfully located all leagues option
Successfully clicked all leagues option
Successfully scraped regular season stats
Scraping 'playoff stats' for J.T. Miller
Successfully located game-type dropdown button
Successfully clicked game-type dropdown button
Successfully selected 'Playoffs' option
Successfully scraped playoff stats
Successfully scraped stats for J.T. Miller
Failed rate: 0.00
Sleep for 0.34 minutes to prevent getting blocked

 [2] Collecting stats for Tyler Myers at https://www.nhl.com/player/8474574
Collecting Tyler Myers's stats from https://www.nhl.com/player/8474574
Scraping 'All Leagues' regular season stats for Tyler Myers
Successfully located dropdown button
Successfully clicked dropdown 

In [29]:
# Scrape 1800 to 1900 players' official stats
curr_players_metadata = nhl_skaters_metadata_official_ep_merge_complete_final[1800:1900]
get_players_stats_by_batch(curr_players_metadata, driver, wait, official_stats_output_path)


 [1] Collecting stats for Chris Hajt at https://www.nhl.com/player/8464986
Collecting Chris Hajt's stats from https://www.nhl.com/player/8464986
Scraping 'All Leagues' regular season stats for Chris Hajt
Successfully located dropdown button
Successfully clicked dropdown button
Successfully located all leagues option
Successfully clicked all leagues option
Successfully scraped regular season stats
Scraping 'playoff stats' for Chris Hajt
Successfully located game-type dropdown button
Successfully clicked game-type dropdown button
Successfully selected 'Playoffs' option
Successfully scraped playoff stats
Successfully scraped stats for Chris Hajt
Failed rate: 0.00
Sleep for 1.51 minutes to prevent getting blocked

 [2] Collecting stats for Alex Ovechkin at https://www.nhl.com/player/8471214
Collecting Alex Ovechkin's stats from https://www.nhl.com/player/8471214
Scraping 'All Leagues' regular season stats for Alex Ovechkin
Successfully located dropdown button
Successfully clicked dropdown

In [30]:
# Scrape 1900 to 2000 players' official stats
curr_players_metadata = nhl_skaters_metadata_official_ep_merge_complete_final[1900:2000]
get_players_stats_by_batch(curr_players_metadata, driver, wait, official_stats_output_path)


 [1] Collecting stats for Jakob Chychrun at https://www.nhl.com/player/8479345
Collecting Jakob Chychrun's stats from https://www.nhl.com/player/8479345
Scraping 'All Leagues' regular season stats for Jakob Chychrun
Successfully located dropdown button
Successfully clicked dropdown button
Successfully located all leagues option
Successfully clicked all leagues option
Successfully scraped regular season stats
Scraping 'playoff stats' for Jakob Chychrun
Successfully located game-type dropdown button
Successfully clicked game-type dropdown button
Successfully selected 'Playoffs' option
Successfully scraped playoff stats
Successfully scraped stats for Jakob Chychrun
Failed rate: 0.00
Sleep for 0.62 minutes to prevent getting blocked

 [2] Collecting stats for Matt Roy at https://www.nhl.com/player/8478911
Collecting Matt Roy's stats from https://www.nhl.com/player/8478911
Scraping 'All Leagues' regular season stats for Matt Roy
Successfully located dropdown button
Successfully clicked dro

In [31]:
# Scrape 2000 to 2100 players' official stats
curr_players_metadata = nhl_skaters_metadata_official_ep_merge_complete_final[2000:2100]
get_players_stats_by_batch(curr_players_metadata, driver, wait, official_stats_output_path)


 [1] Collecting stats for Mike Sislo at https://www.nhl.com/player/8476209
Collecting Mike Sislo's stats from https://www.nhl.com/player/8476209
Scraping 'All Leagues' regular season stats for Mike Sislo
Successfully located dropdown button
Successfully clicked dropdown button
Successfully located all leagues option
Successfully clicked all leagues option
Successfully scraped regular season stats
Scraping 'playoff stats' for Mike Sislo
Successfully located game-type dropdown button
Successfully clicked game-type dropdown button
Successfully selected 'Playoffs' option
Successfully scraped playoff stats
Successfully scraped stats for Mike Sislo
Failed rate: 0.00
Sleep for 1.40 minutes to prevent getting blocked

 [2] Collecting stats for Seth Helgeson at https://www.nhl.com/player/8475274
Collecting Seth Helgeson's stats from https://www.nhl.com/player/8475274
Scraping 'All Leagues' regular season stats for Seth Helgeson
Successfully located dropdown button
Successfully clicked dropdown

In [32]:
# Scrape 2100 to 2200 players' official stats
curr_players_metadata = nhl_skaters_metadata_official_ep_merge_complete_final[2100:2200]
get_players_stats_by_batch(curr_players_metadata, driver, wait, official_stats_output_path)


 [1] Collecting stats for Sandis Ozolinsh at https://www.nhl.com/player/8458544
Collecting Sandis Ozolinsh's stats from https://www.nhl.com/player/8458544
Scraping 'All Leagues' regular season stats for Sandis Ozolinsh
Successfully located dropdown button
Successfully clicked dropdown button
Successfully located all leagues option
Successfully clicked all leagues option
Successfully scraped regular season stats
Scraping 'playoff stats' for Sandis Ozolinsh
Successfully located game-type dropdown button
Successfully clicked game-type dropdown button
Successfully selected 'Playoffs' option
Successfully scraped playoff stats
Successfully scraped stats for Sandis Ozolinsh
Failed rate: 0.00
Sleep for 1.75 minutes to prevent getting blocked

 [2] Collecting stats for Fredrik Olausson at https://www.nhl.com/player/8449971
Collecting Fredrik Olausson's stats from https://www.nhl.com/player/8449971
Scraping 'All Leagues' regular season stats for Fredrik Olausson
Successfully located dropdown bu

In [33]:
# Scrape 2200 to 2300 players' official stats
curr_players_metadata = nhl_skaters_metadata_official_ep_merge_complete_final[2200:2300]
get_players_stats_by_batch(curr_players_metadata, driver, wait, official_stats_output_path)


 [1] Collecting stats for Brayden Tracey at https://www.nhl.com/player/8481530
Collecting Brayden Tracey's stats from https://www.nhl.com/player/8481530
Scraping 'All Leagues' regular season stats for Brayden Tracey
Successfully located dropdown button
Successfully clicked dropdown button
Successfully located all leagues option
Successfully clicked all leagues option
Successfully scraped regular season stats
Scraping 'playoff stats' for Brayden Tracey
Successfully located game-type dropdown button
Successfully clicked game-type dropdown button
Successfully selected 'Playoffs' option
Successfully scraped playoff stats
Successfully scraped stats for Brayden Tracey
Failed rate: 0.00
Sleep for 1.64 minutes to prevent getting blocked

 [2] Collecting stats for Bryce Kindopp at https://www.nhl.com/player/8481815
Collecting Bryce Kindopp's stats from https://www.nhl.com/player/8481815
Scraping 'All Leagues' regular season stats for Bryce Kindopp
Successfully located dropdown button
Successfu

In [34]:
# Scrape 2300 to 2400 players' official stats
curr_players_metadata = nhl_skaters_metadata_official_ep_merge_complete_final[2300:2400]
get_players_stats_by_batch(curr_players_metadata, driver, wait, official_stats_output_path)


 [1] Collecting stats for Jiri Hudler at https://www.nhl.com/player/8470201
Collecting Jiri Hudler's stats from https://www.nhl.com/player/8470201
Scraping 'All Leagues' regular season stats for Jiri Hudler
Successfully located dropdown button
Successfully clicked dropdown button
Successfully located all leagues option
Successfully clicked all leagues option
Successfully scraped regular season stats
Scraping 'playoff stats' for Jiri Hudler
Successfully located game-type dropdown button
Successfully clicked game-type dropdown button
Successfully selected 'Playoffs' option
Successfully scraped playoff stats
Successfully scraped stats for Jiri Hudler
Failed rate: 0.00
Sleep for 0.69 minutes to prevent getting blocked

 [2] Collecting stats for Roman Cervenka at https://www.nhl.com/player/8476834
Collecting Roman Cervenka's stats from https://www.nhl.com/player/8476834
Scraping 'All Leagues' regular season stats for Roman Cervenka
Successfully located dropdown button
Successfully clicked 

In [35]:
# Scrape 2400 to 2500 players' official stats
curr_players_metadata = nhl_skaters_metadata_official_ep_merge_complete_final[2400:2500]
get_players_stats_by_batch(curr_players_metadata, driver, wait, official_stats_output_path)


 [1] Collecting stats for Claude Lapointe at https://www.nhl.com/player/8448674
Collecting Claude Lapointe's stats from https://www.nhl.com/player/8448674
Scraping 'All Leagues' regular season stats for Claude Lapointe
Successfully located dropdown button
Successfully clicked dropdown button
Successfully located all leagues option
Successfully clicked all leagues option
Successfully scraped regular season stats
Scraping 'playoff stats' for Claude Lapointe
Successfully located game-type dropdown button
Successfully clicked game-type dropdown button
Successfully selected 'Playoffs' option
Successfully scraped playoff stats
Successfully scraped stats for Claude Lapointe
Failed rate: 0.00
Sleep for 1.13 minutes to prevent getting blocked

 [2] Collecting stats for Dmitry Yushkevich at https://www.nhl.com/player/8458636
Collecting Dmitry Yushkevich's stats from https://www.nhl.com/player/8458636
Scraping 'All Leagues' regular season stats for Dmitry Yushkevich
Successfully located dropdown

In [36]:
# Scrape 2500 to 2600 players' official stats
curr_players_metadata = nhl_skaters_metadata_official_ep_merge_complete_final[2500:2600]
get_players_stats_by_batch(curr_players_metadata, driver, wait, official_stats_output_path)


 [1] Collecting stats for Owen Tippett at https://www.nhl.com/player/8480015
Collecting Owen Tippett's stats from https://www.nhl.com/player/8480015
Scraping 'All Leagues' regular season stats for Owen Tippett
Successfully located dropdown button
Successfully clicked dropdown button
Successfully located all leagues option
Successfully clicked all leagues option
Successfully scraped regular season stats
Scraping 'playoff stats' for Owen Tippett
Successfully located game-type dropdown button
Successfully clicked game-type dropdown button
Successfully selected 'Playoffs' option
Successfully scraped playoff stats
Successfully scraped stats for Owen Tippett
Failed rate: 0.00
Sleep for 1.15 minutes to prevent getting blocked

 [2] Collecting stats for Ryan Ellis at https://www.nhl.com/player/8475176
Collecting Ryan Ellis's stats from https://www.nhl.com/player/8475176
Scraping 'All Leagues' regular season stats for Ryan Ellis
Successfully located dropdown button
Successfully clicked dropdow

In [37]:
# Scrape 2600 to 2700 players' official stats
curr_players_metadata = nhl_skaters_metadata_official_ep_merge_complete_final[2600:2700]
get_players_stats_by_batch(curr_players_metadata, driver, wait, official_stats_output_path)


 [1] Collecting stats for Dwight Helminen at https://www.nhl.com/player/8470131
Collecting Dwight Helminen's stats from https://www.nhl.com/player/8470131
Scraping 'All Leagues' regular season stats for Dwight Helminen
Successfully located dropdown button
Successfully clicked dropdown button
Successfully located all leagues option
Successfully clicked all leagues option
Successfully scraped regular season stats
Scraping 'playoff stats' for Dwight Helminen
Successfully located game-type dropdown button
Successfully clicked game-type dropdown button
Successfully selected 'Playoffs' option
Successfully scraped playoff stats
Successfully scraped stats for Dwight Helminen
Failed rate: 0.00
Sleep for 0.86 minutes to prevent getting blocked

 [2] Collecting stats for Bryan Rodney at https://www.nhl.com/player/8470988
Collecting Bryan Rodney's stats from https://www.nhl.com/player/8470988
Scraping 'All Leagues' regular season stats for Bryan Rodney
Successfully located dropdown button
Success

In [38]:
# Scrape 2700 to 2800 players' official stats
curr_players_metadata = nhl_skaters_metadata_official_ep_merge_complete_final[2700:2800]
get_players_stats_by_batch(curr_players_metadata, driver, wait, official_stats_output_path)


 [1] Collecting stats for Derek Bekar at https://www.nhl.com/player/8462239
Collecting Derek Bekar's stats from https://www.nhl.com/player/8462239
Scraping 'All Leagues' regular season stats for Derek Bekar
Successfully located dropdown button
Successfully clicked dropdown button
Successfully located all leagues option
Successfully clicked all leagues option
Successfully scraped regular season stats
Scraping 'playoff stats' for Derek Bekar
Successfully located game-type dropdown button
Successfully clicked game-type dropdown button
Successfully selected 'Playoffs' option
Successfully scraped playoff stats
Successfully scraped stats for Derek Bekar
Failed rate: 0.00
Sleep for 1.94 minutes to prevent getting blocked

 [2] Collecting stats for Robert Nilsson at https://www.nhl.com/player/8470608
Collecting Robert Nilsson's stats from https://www.nhl.com/player/8470608
Scraping 'All Leagues' regular season stats for Robert Nilsson
Successfully located dropdown button
Successfully clicked 

In [39]:
# Scrape 2800 to 2900 players' official stats
curr_players_metadata = nhl_skaters_metadata_official_ep_merge_complete_final[2800:2900]
get_players_stats_by_batch(curr_players_metadata, driver, wait, official_stats_output_path)


 [1] Collecting stats for Zach Bogosian at https://www.nhl.com/player/8474567
Collecting Zach Bogosian's stats from https://www.nhl.com/player/8474567
Scraping 'All Leagues' regular season stats for Zach Bogosian
Successfully located dropdown button
Successfully clicked dropdown button
Successfully located all leagues option
Successfully clicked all leagues option
Successfully scraped regular season stats
Scraping 'playoff stats' for Zach Bogosian
Successfully located game-type dropdown button
Successfully clicked game-type dropdown button
Successfully selected 'Playoffs' option
Successfully scraped playoff stats
Successfully scraped stats for Zach Bogosian
Failed rate: 0.00
Sleep for 0.83 minutes to prevent getting blocked

 [2] Collecting stats for Tim Stapleton at https://www.nhl.com/player/8472379
Collecting Tim Stapleton's stats from https://www.nhl.com/player/8472379
Scraping 'All Leagues' regular season stats for Tim Stapleton
Successfully located dropdown button
Successfully c

In [40]:
# Scrape 2900 to 3000 players' official stats
curr_players_metadata = nhl_skaters_metadata_official_ep_merge_complete_final[2900:3000]
get_players_stats_by_batch(curr_players_metadata, driver, wait, official_stats_output_path)


 [1] Collecting stats for Oscar Moller at https://www.nhl.com/player/8474114
Collecting Oscar Moller's stats from https://www.nhl.com/player/8474114
Scraping 'All Leagues' regular season stats for Oscar Moller
Successfully located dropdown button
Successfully clicked dropdown button
Successfully located all leagues option
Successfully clicked all leagues option
Successfully scraped regular season stats
Scraping 'playoff stats' for Oscar Moller
Successfully located game-type dropdown button
Successfully clicked game-type dropdown button
Successfully selected 'Playoffs' option
Successfully scraped playoff stats
Successfully scraped stats for Oscar Moller
Failed rate: 0.00
Sleep for 0.75 minutes to prevent getting blocked

 [2] Collecting stats for Matt Greene at https://www.nhl.com/player/8470121
Collecting Matt Greene's stats from https://www.nhl.com/player/8470121
Scraping 'All Leagues' regular season stats for Matt Greene
Successfully located dropdown button
Successfully clicked drop

In [41]:
# Scrape 3000 to 3100 players' official stats
curr_players_metadata = nhl_skaters_metadata_official_ep_merge_complete_final[3000:3100]
get_players_stats_by_batch(curr_players_metadata, driver, wait, official_stats_output_path)


 [1] Collecting stats for Steven Stamkos at https://www.nhl.com/player/8474564
Collecting Steven Stamkos's stats from https://www.nhl.com/player/8474564
Scraping 'All Leagues' regular season stats for Steven Stamkos
Successfully located dropdown button
Successfully clicked dropdown button
Successfully located all leagues option
Successfully clicked all leagues option
Successfully scraped regular season stats
Scraping 'playoff stats' for Steven Stamkos
Successfully located game-type dropdown button
Successfully clicked game-type dropdown button
Successfully selected 'Playoffs' option
Successfully scraped playoff stats
Successfully scraped stats for Steven Stamkos
Failed rate: 0.00
Sleep for 1.12 minutes to prevent getting blocked

 [2] Collecting stats for Ryan Malone at https://www.nhl.com/player/8467988
Collecting Ryan Malone's stats from https://www.nhl.com/player/8467988
Scraping 'All Leagues' regular season stats for Ryan Malone
Successfully located dropdown button
Successfully cl

In [42]:
# Scrape 3100 to 3200 players' official stats
curr_players_metadata = nhl_skaters_metadata_official_ep_merge_complete_final[3100:3200]
get_players_stats_by_batch(curr_players_metadata, driver, wait, official_stats_output_path)


 [1] Collecting stats for Frazer McLaren at https://www.nhl.com/player/8473712
Collecting Frazer McLaren's stats from https://www.nhl.com/player/8473712
Scraping 'All Leagues' regular season stats for Frazer McLaren
Successfully located dropdown button
Successfully clicked dropdown button
Successfully located all leagues option
Successfully clicked all leagues option
Successfully scraped regular season stats
Scraping 'playoff stats' for Frazer McLaren
Successfully located game-type dropdown button
Successfully clicked game-type dropdown button
Successfully selected 'Playoffs' option
Successfully scraped playoff stats
Successfully scraped stats for Frazer McLaren
Failed rate: 0.00
Sleep for 0.61 minutes to prevent getting blocked

 [2] Collecting stats for Morgan Rielly at https://www.nhl.com/player/8476853
Collecting Morgan Rielly's stats from https://www.nhl.com/player/8476853
Scraping 'All Leagues' regular season stats for Morgan Rielly
Successfully located dropdown button
Successfu

In [43]:
# Scrape 3200 to 3300 players' official stats
curr_players_metadata = nhl_skaters_metadata_official_ep_merge_complete_final[3200:3300]
get_players_stats_by_batch(curr_players_metadata, driver, wait, official_stats_output_path)


 [1] Collecting stats for Darnell Nurse at https://www.nhl.com/player/8477498
Collecting Darnell Nurse's stats from https://www.nhl.com/player/8477498
Scraping 'All Leagues' regular season stats for Darnell Nurse
Successfully located dropdown button
Successfully clicked dropdown button
Successfully located all leagues option
Successfully clicked all leagues option
Successfully scraped regular season stats
Scraping 'playoff stats' for Darnell Nurse
Successfully located game-type dropdown button
Successfully clicked game-type dropdown button
Successfully selected 'Playoffs' option
Successfully scraped playoff stats
Successfully scraped stats for Darnell Nurse
Failed rate: 0.00
Sleep for 1.56 minutes to prevent getting blocked

 [2] Collecting stats for Connor McDavid at https://www.nhl.com/player/8478402
Collecting Connor McDavid's stats from https://www.nhl.com/player/8478402
Scraping 'All Leagues' regular season stats for Connor McDavid
Successfully located dropdown button
Successfull

In [44]:
# Scrape 3300 to 3400 players' official stats
curr_players_metadata = nhl_skaters_metadata_official_ep_merge_complete_final[3300:3400]
get_players_stats_by_batch(curr_players_metadata, driver, wait, official_stats_output_path)


 [1] Collecting stats for Anton Lundell at https://www.nhl.com/player/8482113
Collecting Anton Lundell's stats from https://www.nhl.com/player/8482113
Scraping 'All Leagues' regular season stats for Anton Lundell
Successfully located dropdown button
Successfully clicked dropdown button
Successfully located all leagues option
Successfully clicked all leagues option
Successfully scraped regular season stats
Scraping 'playoff stats' for Anton Lundell
Successfully located game-type dropdown button
Successfully clicked game-type dropdown button
Successfully selected 'Playoffs' option
Successfully scraped playoff stats
Successfully scraped stats for Anton Lundell
Failed rate: 0.00
Sleep for 0.40 minutes to prevent getting blocked

 [2] Collecting stats for Chase Priskie at https://www.nhl.com/player/8479597
Collecting Chase Priskie's stats from https://www.nhl.com/player/8479597
Scraping 'All Leagues' regular season stats for Chase Priskie
Successfully located dropdown button
Successfully c

In [45]:
# Scrape 3400 to 3500 players' official stats
curr_players_metadata = nhl_skaters_metadata_official_ep_merge_complete_final[3400:3500]
get_players_stats_by_batch(curr_players_metadata, driver, wait, official_stats_output_path)


 [1] Collecting stats for Andreas Thuresson at https://www.nhl.com/player/8474154
Collecting Andreas Thuresson's stats from https://www.nhl.com/player/8474154
Scraping 'All Leagues' regular season stats for Andreas Thuresson
Successfully located dropdown button
Successfully clicked dropdown button
Successfully located all leagues option
Successfully clicked all leagues option
Successfully scraped regular season stats
Scraping 'playoff stats' for Andreas Thuresson
Successfully located game-type dropdown button
Successfully clicked game-type dropdown button
Successfully selected 'Playoffs' option
Successfully scraped playoff stats
Successfully scraped stats for Andreas Thuresson
Failed rate: 0.00
Sleep for 0.87 minutes to prevent getting blocked

 [2] Collecting stats for Peter Olvecky at https://www.nhl.com/player/8471290
Collecting Peter Olvecky's stats from https://www.nhl.com/player/8471290
Scraping 'All Leagues' regular season stats for Peter Olvecky
Successfully located dropdown b

In [46]:
# Scrape 3500 to 3600 players' official stats
curr_players_metadata = nhl_skaters_metadata_official_ep_merge_complete_final[3500:3600]
get_players_stats_by_batch(curr_players_metadata, driver, wait, official_stats_output_path)


 [1] Collecting stats for Brennan Othmann at https://www.nhl.com/player/8482747
Collecting Brennan Othmann's stats from https://www.nhl.com/player/8482747
Scraping 'All Leagues' regular season stats for Brennan Othmann
Successfully located dropdown button
Successfully clicked dropdown button
Successfully located all leagues option
Successfully clicked all leagues option
Successfully scraped regular season stats
Scraping 'playoff stats' for Brennan Othmann
Successfully located game-type dropdown button
Successfully clicked game-type dropdown button
Successfully selected 'Playoffs' option
Successfully scraped playoff stats
Successfully scraped stats for Brennan Othmann
Failed rate: 0.00
Sleep for 1.81 minutes to prevent getting blocked

 [2] Collecting stats for Brett Berard at https://www.nhl.com/player/8482132
Collecting Brett Berard's stats from https://www.nhl.com/player/8482132
Scraping 'All Leagues' regular season stats for Brett Berard
Successfully located dropdown button
Success

In [47]:
# Scrape 3600 to 3700 players' official stats
curr_players_metadata = nhl_skaters_metadata_official_ep_merge_complete_final[3600:3700]
get_players_stats_by_batch(curr_players_metadata, driver, wait, official_stats_output_path)


 [1] Collecting stats for Jacob Bryson at https://www.nhl.com/player/8480196
Collecting Jacob Bryson's stats from https://www.nhl.com/player/8480196
Scraping 'All Leagues' regular season stats for Jacob Bryson
Successfully located dropdown button
Successfully clicked dropdown button
Successfully located all leagues option
Successfully clicked all leagues option
Successfully scraped regular season stats
Scraping 'playoff stats' for Jacob Bryson
Successfully located game-type dropdown button
Successfully clicked game-type dropdown button
Successfully selected 'Playoffs' option
Successfully scraped playoff stats
Successfully scraped stats for Jacob Bryson
Failed rate: 0.00
Sleep for 1.99 minutes to prevent getting blocked

 [2] Collecting stats for Arttu Ruotsalainen at https://www.nhl.com/player/8481626
Collecting Arttu Ruotsalainen's stats from https://www.nhl.com/player/8481626
Scraping 'All Leagues' regular season stats for Arttu Ruotsalainen
Successfully located dropdown button
Succ

In [48]:
# Scrape 3700 to 3800 players' official stats
curr_players_metadata = nhl_skaters_metadata_official_ep_merge_complete_final[3700:3800]
get_players_stats_by_batch(curr_players_metadata, driver, wait, official_stats_output_path)


 [1] Collecting stats for John McCarthy at https://www.nhl.com/player/8473562
Collecting John McCarthy's stats from https://www.nhl.com/player/8473562
Scraping 'All Leagues' regular season stats for John McCarthy
Successfully located dropdown button
Successfully clicked dropdown button
Successfully located all leagues option
Successfully clicked all leagues option
Successfully scraped regular season stats
Scraping 'playoff stats' for John McCarthy
Successfully located game-type dropdown button
Successfully clicked game-type dropdown button
Successfully selected 'Playoffs' option
Successfully scraped playoff stats
Successfully scraped stats for John McCarthy
Failed rate: 0.00
Sleep for 0.83 minutes to prevent getting blocked

 [2] Collecting stats for Mike Moore at https://www.nhl.com/player/8474531
Collecting Mike Moore's stats from https://www.nhl.com/player/8474531
Scraping 'All Leagues' regular season stats for Mike Moore
Successfully located dropdown button
Successfully clicked dr

In [49]:
# Scrape 3800 to len(nhl_skaters_metadata_official_ep_merge_complete_final) players' official stats
curr_players_metadata = nhl_skaters_metadata_official_ep_merge_complete_final[3800:]
get_players_stats_by_batch(curr_players_metadata, driver, wait, official_stats_output_path)


 [1] Collecting stats for Barrett Hayton at https://www.nhl.com/player/8480849
Collecting Barrett Hayton's stats from https://www.nhl.com/player/8480849
Scraping 'All Leagues' regular season stats for Barrett Hayton
Successfully located dropdown button
Successfully clicked dropdown button
Successfully located all leagues option
Successfully clicked all leagues option
Successfully scraped regular season stats
Scraping 'playoff stats' for Barrett Hayton
Successfully located game-type dropdown button
Successfully clicked game-type dropdown button
Successfully selected 'Playoffs' option
Successfully scraped playoff stats
Successfully scraped stats for Barrett Hayton
Failed rate: 0.00
Sleep for 0.24 minutes to prevent getting blocked

 [2] Collecting stats for Jack McBain at https://www.nhl.com/player/8480855
Collecting Jack McBain's stats from https://www.nhl.com/player/8480855
Scraping 'All Leagues' regular season stats for Jack McBain
Successfully located dropdown button
Successfully cl

### Fetch Missing Players' Stats

In [59]:
# Read in nhl_skaters_metadata_official_ep_merge_complete_final.csv
nhl_skaters_metadata_official_ep_merge_complete_final = pd.read_csv('./data/nhl/nhl_skaters_metadata_official_ep_merge_complete_final.csv')

In [60]:
# Get unique players from official_stats_output_path
if os.path.exists(official_stats_output_path):
    # Read in the CSV file and set low_memory = False
    nhl_players_official_stats = pd.read_csv(official_stats_output_path, low_memory=False)
    unique_players = set(nhl_players_official_stats['player_name'].unique())
else:
    unique_players = set()

In [61]:
# Find players in nhl_players_metadata not in unique_players
subset = nhl_skaters_metadata_official_ep_merge_complete_final[0:]
players_to_scrape = subset[~subset['player_name'].isin(unique_players)].reset_index(drop=True)
players_to_scrape

Unnamed: 0,player_name,player_pos,player_link_official,player_link_ep,player_image


In [62]:
# Fetch Missing Players' Stats
get_players_stats_by_batch(players_to_scrape, driver, wait, official_stats_output_path)

In [63]:
# Calculate the unique players in official_stats_output_path
unique_players = set(nhl_players_official_stats['player_name'].unique())
len(unique_players)

3928

### Close the Driver

In [64]:
driver.quit()

## Check if we have already gathered every player's official stats
- Player Source: ./data/nhl/final/nhl_players_metadata_facts_merged_final.csv
- Stats Output: ./data/nhl/final/nhl_players_official_stats.csv

In [10]:
# Read in player source
nhl_players_metadata_facts = pd.read_csv('./data/nhl/final/nhl_players_metadata_facts_merged_final.csv')

In [11]:
nhl_players_metadata_facts

Unnamed: 0,player_id,player_name_official,player_name_ep,player_pos_official,player_pos_ep,player_link_official,player_link_ep,player_image_official,date_of_birth,nation,height_cm,weight_kg,shoots,player_type,nhl_rights,draft,highlights,description
0,1,Joe Sakic,Joe Sakic,C,C,https://www.nhl.com/player/8451101,https://www.eliteprospects.com/player/8862/joe...,https://assets.nhle.com/mugs/nhl/latest/845110...,7/7/1969,Canada,180,88,L,,,"('1', '15', '1987')","['1-time U20 WJC Gold Medal', '1-time World Ch...",Brother: Brian Sakic\r\nSons: Chase Sakic & Mi...
1,2,Peter Forsberg,Peter Forsberg,C,C,https://www.nhl.com/player/8458520,https://www.eliteprospects.com/player/710/pete...,https://assets.nhle.com/mugs/nhl/latest/845852...,7/20/1973,Sweden,185,93,L,"['Cerebral Tactician', 'Playmaker', 'Two-Way C...",,"('1', '6', '1991')","['1-time TV-Pucken Gold Medal', '1-time J18 SM...",Arguably the greatest Swedish forward to ever ...
2,3,Milan Hejduk,Milan Hejduk,RW,RW,https://www.nhl.com/player/8460577,https://www.eliteprospects.com/player/8603/mil...,https://assets.nhle.com/mugs/nhl/latest/846057...,2/14/1976,Czechia / USA,183,87,R,['Sniper'],,"('4', '87', '1994')","['1-time Olympic Gold Medal', '1-time World Ch...",Father: Milan Hejduk\r\nCousin: Tomas Hejduk\r...
3,4,Alex Tanguay,Alex Tanguay,LW,LW,https://www.nhl.com/player/8467338,https://www.eliteprospects.com/player/8779/ale...,https://assets.nhle.com/mugs/nhl/latest/846733...,11/21/1979,Canada,185,88,L,['Playmaker'],,"('1', '12', '1998')","['1-time CHL All-Rookie Team', '1-time NHL Sta...",Brother: Maxime Tanguay\r\nCousin: Tyler Tangu...
4,5,Chris Drury,Chris Drury,C,C,https://www.nhl.com/player/8460562,https://www.eliteprospects.com/player/8804/chr...,https://assets.nhle.com/mugs/nhl/latest/846056...,8/20/1976,USA,178,86,R,,,"('3', '72', '1994')","['2-time Olympic Silver Medal', '1-time World ...",Son: Luke Drury\r\nBrothers: Ted Drury & Jim D...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3923,3924,Alexander Nikishin,Alexander Nikishin,D,D,https://www.nhl.com/hurricanes/player/alexande...,https://www.eliteprospects.com/player/514652/a...,https://assets.nhle.com/mugs/nhl/latest/alexan...,10/2/2001,Russia,192,98,L,,Carolina Hurricanes / Signed,"('3', '69', '2020')","['1-time Olympic Silver Medal', '1-time KHL Mo...",His size is a real advantage at the MHL level....
3924,3925,Daniil Misyul,Daniil Misyul,D,D,https://www.nhl.com/player/daniil-misyul-8481536,https://www.eliteprospects.com/player/395127/d...,https://assets.nhle.com/mugs/nhl/latest/daniil...,10/20/2000,Russia / Belarus,191,89,L,,,"('3', '70', '2019')","['1-time U20 WJC Silver Medal', '1-time U17 WH...",Brother: Oleg Misyul
3925,3926,Nate Clurman,Nate Clurman,D,D,https://www.nhl.com/canadiens/player/nathan-cl...,https://www.eliteprospects.com/player/353810/n...,https://assets.nhle.com/mugs/nhl/latest/nathan...,5/8/1998,USA,188,93,R,['Defensive Defenseman'],Montréal Canadiens / Signed,"('6', '161', '2016')",['1-time NCAA (B1G) Champion'],
3926,3927,Jacob Gaucher,Jacob Gaucher,C,C,https://www.nhl.com/flyers/player/jacob-gauche...,https://www.eliteprospects.com/player/290162/j...,https://assets.nhle.com/mugs/nhl/latest/jacob-...,3/9/2001,Canada,191,84,R,,Philadelphia Flyers / Signed,,[],Father: Yannick Gaucher Brother: Nathan Gaucher


In [28]:
# Read in stats output
nhl_players_official_stats = pd.read_csv('./data/nhl/official/stats/nhl_players_official_stats.csv', low_memory=False)

In [29]:
nhl_players_official_stats

Unnamed: 0,player_id,player_name_official,season,league,team,gp_regular,g_regular,a_regular,p_regular,plus_minus_regular,...,ppg_playoffs,ppp_playoffs,shg_playoffs,shp_playoffs,toi_per_game_playoffs,gwg_playoffs,otg_playoffs,sog_playoffs,shooting_pct_playoffs,fo_pct_playoffs
0,,Joe Sakic,1985-86,Minor-BC,Burnaby,80.0,83.0,73.0,156.0,,...,,,,,,,,,,
1,,Joe Sakic,1985-86,WHL,Lethbridge,3.0,0.0,0.0,0.0,,...,,,,,,,,,,
2,,Joe Sakic,1986-87,WHL,Swift Current,72.0,60.0,73.0,133.0,,...,,,,,,,,,,
3,,Joe Sakic,1986-87,Nat-Tm,Canada,1.0,0.0,0.0,0.0,,...,,,,,,,,,,
4,,Joe Sakic,1987-88,WHL,Swift Current,64.0,78.0,82.0,160.0,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
106411,,Cole Guttman,2022-23,NHL,Chicago Blackhawks,14.0,4.0,2.0,6.0,1.0,...,,,,,,,,,,
106412,,Cole Guttman,2022-23,AHL,Rockford IceHogs,39.0,16.0,14.0,30.0,6.0,...,,,,,,,,,,
106413,,Cole Guttman,2023-24,NHL,Chicago Blackhawks,27.0,4.0,4.0,8.0,-17.0,...,,,,,,,,,,
106414,,Cole Guttman,2023-24,AHL,Rockford IceHogs,39.0,16.0,24.0,40.0,20.0,...,,,,,,,,,,


In [30]:
# Fill in player_id
for player_id, play_name_official in nhl_players_metadata_facts[['player_id', 'player_name_official']].values:
    # If player_id is NaN, fill in the player_id in nhl_players_metadata
    if nhl_players_official_stats[nhl_players_official_stats['player_name_official'] == play_name_official]['player_id'].isnull().values[0]:
        nhl_players_official_stats.loc[nhl_players_official_stats['player_name_official'] == play_name_official, 'player_id'] = player_id
    # else skip rows
    else:
        continue

In [31]:
nhl_players_official_stats

Unnamed: 0,player_id,player_name_official,season,league,team,gp_regular,g_regular,a_regular,p_regular,plus_minus_regular,...,ppg_playoffs,ppp_playoffs,shg_playoffs,shp_playoffs,toi_per_game_playoffs,gwg_playoffs,otg_playoffs,sog_playoffs,shooting_pct_playoffs,fo_pct_playoffs
0,1.0,Joe Sakic,1985-86,Minor-BC,Burnaby,80.0,83.0,73.0,156.0,,...,,,,,,,,,,
1,1.0,Joe Sakic,1985-86,WHL,Lethbridge,3.0,0.0,0.0,0.0,,...,,,,,,,,,,
2,1.0,Joe Sakic,1986-87,WHL,Swift Current,72.0,60.0,73.0,133.0,,...,,,,,,,,,,
3,1.0,Joe Sakic,1986-87,Nat-Tm,Canada,1.0,0.0,0.0,0.0,,...,,,,,,,,,,
4,1.0,Joe Sakic,1987-88,WHL,Swift Current,64.0,78.0,82.0,160.0,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
106411,597.0,Cole Guttman,2022-23,NHL,Chicago Blackhawks,14.0,4.0,2.0,6.0,1.0,...,,,,,,,,,,
106412,597.0,Cole Guttman,2022-23,AHL,Rockford IceHogs,39.0,16.0,14.0,30.0,6.0,...,,,,,,,,,,
106413,597.0,Cole Guttman,2023-24,NHL,Chicago Blackhawks,27.0,4.0,4.0,8.0,-17.0,...,,,,,,,,,,
106414,597.0,Cole Guttman,2023-24,AHL,Rockford IceHogs,39.0,16.0,24.0,40.0,20.0,...,,,,,,,,,,


In [32]:
# Count unique numbers of (player_id, player_name_official) in nhl_players_official_stats
nhl_players_official_stats.groupby(['player_id', 'player_name_official']).size().reset_index(name='counts')

Unnamed: 0,player_id,player_name_official,counts
0,1.0,Joe Sakic,35
1,2.0,Peter Forsberg,41
2,3.0,Milan Hejduk,30
3,4.0,Alex Tanguay,25
4,5.0,Chris Drury,28
...,...,...,...
3923,3924.0,Alexander Nikishin,15
3924,3925.0,Daniil Misyul,22
3925,3926.0,Nate Clurman,22
3926,3927.0,Jacob Gaucher,19


In [33]:
# Find the missing player by player_id
for i in range(1, 3929):
    if i not in nhl_players_official_stats['player_id'].values:
        print(i)

## Write to CSV

In [34]:
# Write to CSV
nhl_players_official_stats.to_csv('./data/nhl/final/nhl_players_official_stats_with_id.csv', index=False, encoding='utf-8-sig')


### Sort the nhl_players_official stats by player_id and season

In [35]:
# Sort the nhl_players_official_stats by player_id and season
nhl_players_official_stats = nhl_players_official_stats.sort_values(by=['player_id', 'season'], ascending=[True, True]).reset_index(drop=True)

### Write to CSV

In [36]:
# Write to CSV
nhl_players_official_stats.to_csv('./data/nhl/final/nhl_players_official_stats_with_id_sorted.csv', index=False, encoding='utf-8-sig')