# NHL Official Data Preparation
## Collecting NHL Players' Stats from Official NHL Website
1. Collect Metadata(player_name, player_link) from NHL Official Website by Season and Team
2. Collect Stats from Each Player's Page

### Import Libraries

In [1]:
import nhl_scraper_api as nhl_scraper
import pandas as pd
import os
import time
import random
import glob

### Define Valid Teams and Valid Seasons

In [None]:
valid_teams = [
    "bruins", "sabres", "redwings", "panthers", "canadiens",
    "senators", "lightning", "mapleleafs", "hurricanes", "bluejackets",
    "devils", "islanders", "rangers", "flyers", "penguins",
    "capitals", "blackhawks", "avalanche", "stars", "wild",
    "predators", "blues", "jets", "ducks", "flames",
    "oilers", "kings", "sharks", "kraken", "canucks",
    "goldenknights", "utah"
]

# Valid seasons from 2000-2025 in YYYY-YYYY format
valid_seasons = [f'20{str(i).zfill(2)}-20{str(i + 1).zfill(2)}' for i in range(0, 25)]

### Validate Team Links' Format

In [None]:
nhl_scraper.print_team_links('2024-2025')

### Collect Metadata(player_name, player_link) from NHL Official Website by Season and Team

#### Test API By Collecting 2024-2025 Avalanche Players' Metadata

In [None]:
avalanche_2425_metadata = nhl_scraper.get_player_by_team("avalanche", "2024-2025")

In [None]:
avalanche_2425_metadata

#### Collect All Teams' Metadata for All Seasons

In [None]:
def get_all_teams_metadata(curr_team, driver, wait):
    # Print a divider after each team
    print("================================================================")

    # Print team name
    print(f"Collecting metadata for {curr_team}")

    for i, season in enumerate(valid_seasons):
        # Define output path and ensure directory exists
        output_dir = f'./data/nhl/official/teams/{curr_team}'
        os.makedirs(output_dir, exist_ok=True)  # Creates the folder if it doesn't exist

        curr_team_output_path = f'{output_dir}/{curr_team}_{season}.csv'

        # skip scraping if the file exists
        if os.path.exists(curr_team_output_path):
            print(f'{curr_team_output_path} already exists. Skipping scraping.')
            continue

        # Print Divider
        if i % 5 == 0:
            print("----------------------------------------------------------------")

        curr_team_metadata = nhl_scraper.get_player_by_team_with_reusable_driver(curr_team, season, driver, wait)
        if curr_team_metadata is None:
            print(f'Failed to scrape {curr_team} for {season}')
            continue

        # Write to CSV
        curr_team_metadata.to_csv(curr_team_output_path, index=False, encoding='utf-8-sig')
        print(f'Finished scraping {curr_team} for {season}')

        # Add random sleep to prevent getting blocked
        sleep_time = random.uniform(10, 30)
        print(f"Sleep for {sleep_time / 60:.2f} minutes to prevent getting blocked\n")
        time.sleep(sleep_time)

##### Collecting All Teams' Metadata

In [None]:
from selenium.webdriver.support.wait import WebDriverWait
import undetected_chromedriver as uc

# Setup Chrome Driver ONCE
chrome_options = uc.ChromeOptions()
chrome_options.add_argument("--headless=new")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")

driver = uc.Chrome(version_main=138, options=chrome_options)
wait = WebDriverWait(driver, 15)

In [None]:
for team in valid_teams:
    get_all_teams_metadata(team, driver, wait)

driver.quit()

#### Get unique players' metadata from all teams and seasons

In [None]:
# Get unique players' metadata from all teams and seasons
nhl_players_metadata = pd.DataFrame()

# Read in all csv files in ./data/nhl/official/teams/ and merge them
players_files = glob.glob('./data/nhl/official/teams/**/*.csv', recursive=True)

for player_file in players_files:
    curr_team_metadata = pd.read_csv(player_file)
    nhl_players_metadata = pd.concat([nhl_players_metadata, curr_team_metadata]).reset_index(drop=True)
    nhl_players_metadata = nhl_players_metadata.drop_duplicates(subset=['player_name']).reset_index(drop=True)
    print(f'Finished merging {player_file}')

# Write to CSV
nhl_players_metadata.to_csv('./data/nhl/official/nhl_players_metadata.csv', index=False, encoding='utf-8-sig')

In [None]:
# Read in nhl_players_metadata.csv
nhl_players_metadata_official = pd.read_csv('./data/nhl/official/nhl_players_metadata.csv')

In [None]:
len(nhl_players_metadata_official)

##### Exclude Goalies from the nhl_players_metadata_official

In [None]:
# Exclude rows where the position is G -> Exclude Goalies
nhl_skaters_metadata_official = nhl_players_metadata_official[nhl_players_metadata_official['player_pos'] != 'G']

# Write to CSV
nhl_skaters_metadata_official.to_csv('./data/nhl/official/nhl_skaters_metadata_official.csv', index=False, encoding='utf-8-sig')

In [None]:
len(nhl_skaters_metadata_official)

#### Remove Accent Characters from EP Metadata

In [None]:
from unidecode import unidecode

# Load eliteprospects meta
nhl_skaters_metadata_ep = pd.read_csv('./data/nhl/nhl_players_metadata.csv')
nhl_players_metadata_ep = pd.read_csv('./data/nhl/nhl_players_metadata.csv')

# Apply unidecode to all string columns
for col in nhl_players_metadata_ep.columns:
    if nhl_players_metadata_ep[col].dtype == 'object':
        nhl_players_metadata_ep[col] = nhl_players_metadata_ep[col].apply(unidecode)

# Save the cleaned CSV
nhl_skaters_metadata_ep.to_csv('./data/nhl/nhl_skaters_metadata_accent_cleaned.csv', index=False, encoding='utf-8-sig')

#### Merged EP Metadata with Official Metadata and Skip the Missing Players

In [None]:
# Merge the two dataframes on player_name -> keep only players present in the official dataset
nhl_skaters_metadata_official_ep_merge = pd.merge(nhl_skaters_metadata_official, nhl_skaters_metadata_ep, on='player_name', how='left')

# Rename columns
nhl_skaters_metadata_official_ep_merge = nhl_skaters_metadata_official_ep_merge.rename(columns={
        'player_link': 'player_link_official',
        'link': 'player_link_ep'
    }
)

# Select only the columns we need
nhl_skaters_metadata_official_ep_merge = nhl_skaters_metadata_official_ep_merge[['player_name', 'player_pos', 'player_link_official', 'player_link_ep', 'player_image']]


In [None]:
# Write to CSV
nhl_skaters_metadata_official_ep_merge.to_csv('./data/nhl/nhl_skaters_metadata_official_ep_merge.csv', index=False, encoding='utf-8-sig')

#### Find all players without player_link_ep


In [None]:
# Find all the missing player-rows in official data after merging
players_missing_ep_link = nhl_skaters_metadata_official_ep_merge[nhl_skaters_metadata_official_ep_merge['player_link_ep'].isnull()]

In [None]:
players_missing_ep_link

In [None]:
# Write to CSV
players_missing_ep_link.to_csv('./data/nhl/missing_players_in_official_after_merged.csv', index=False, encoding='utf-8-sig')

#### Manually Add the Missing Players' EP Links
- Later on, we can use the EP Links to match the players' stats from EP to the official data

In [None]:
# Read in manually added missing players' EP links
nhl_skaters_metadata_official_ep_merge_complete = pd.read_csv('./data/nhl/nhl_skaters_metadata_official_ep_merge_complete.csv')

In [None]:
# Find all rows missing player_link_ep
players_missing_ep_link = nhl_skaters_metadata_official_ep_merge_complete[nhl_skaters_metadata_official_ep_merge_complete['player_link_ep'].isnull()]

players_missing_ep_link

##### Find players which is in nhl_skaters_metadata_official_ep_merge_complete, but not in nhl_players_metadata by player_link_ep

In [None]:
# Find players which are in nhl_skaters_metadata_official_ep_merge_complete, but not in nhl_players_metadata by player_link_ep by player_link_ep
# Load eliteprospects meta
nhl_players_metadata_ep = pd.read_csv('./data/nhl/nhl_players_metadata.csv')
nhl_skaters_metadata_official_ep_merge_complete = pd.read_csv('./data/nhl/nhl_skaters_metadata_official_ep_merge_complete_2.csv')

# Find players which are in nhl_skaters_metadata_official_ep_merge_complete, but not in nhl_players_metadata by player_link_ep
players_missing_after_manual_merge = nhl_players_metadata_ep[~nhl_players_metadata_ep['link'].isin(nhl_skaters_metadata_official_ep_merge_complete['player_link_ep'])]

In [None]:
print(f"nhl_players_metadata_ep: {len(nhl_players_metadata_ep)}")
print(f"nhl_skaters_metadata_official_ep_merge_complete: {len(nhl_skaters_metadata_official_ep_merge_complete)}")

In [None]:
players_missing_after_manual_merge

In [None]:
# Remove duplicates based on player_link_ep for nhl_skaters_metadata_official_ep_merge_complete_2
nhl_skaters_metadata_official_ep_merge_complete = nhl_skaters_metadata_official_ep_merge_complete.drop_duplicates(subset=['player_link_ep']).reset_index(drop=True)

In [None]:
len(nhl_skaters_metadata_official_ep_merge_complete)

In [None]:
# Write to CSV
nhl_skaters_metadata_official_ep_merge_complete.to_csv('./data/nhl/nhl_skaters_metadata_official_ep_merge_complete_final.csv', index=False, encoding='utf-8-sig')

##### Update player_image URL to the latest
- original: https://assets.nhle.com/mugs/nhl/{season}/{team}/{player_id}.png
- latest: https://assets.nhle.com/mugs/nhl/latest/{player_id}

In [None]:
# Load nhl_skaters_metadata_official_ep_merge_complete_final.csv
nhl_skaters_metadata_official_ep_merge_complete_final = pd.read_csv('./data/nhl/nhl_skaters_metadata_official_ep_merge_complete_final.csv')

# Update player_image URL to the latest
nhl_skaters_metadata_official_ep_merge_complete_final['player_image'] = nhl_skaters_metadata_official_ep_merge_complete_final['player_link_official'].apply(lambda x: f"https://assets.nhle.com/mugs/nhl/latest/{x.split('/')[-1]}.png")

In [None]:
nhl_skaters_metadata_official_ep_merge_complete_final.head(10)

In [None]:
# Write to CSV to update image
nhl_skaters_metadata_official_ep_merge_complete_final.to_csv('./data/nhl/nhl_skaters_metadata_official_ep_merge_complete_final.csv', index=False, encoding='utf-8-sig')

#### Collect Stats from Each Player's Page

##### Initiate the Chrome Driver

In [2]:
from selenium.webdriver.support.wait import WebDriverWait
import undetected_chromedriver as uc

# Setup Chrome Driver ONCE
chrome_options = uc.ChromeOptions()
chrome_options.add_argument("--headless=new")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")

driver = uc.Chrome(version_main=138, options=chrome_options)
wait = WebDriverWait(driver, 15)

##### Test get_player_stats API

In [3]:
# Load CSV File
nhl_skaters_metadata_official_ep_merge_complete_final = pd.read_csv('./data/nhl/nhl_skaters_metadata_official_ep_merge_complete_final.csv')

In [None]:
# Get Cale Makar's Metadata which is at index 233
cale_makar_metadata = nhl_skaters_metadata_official_ep_merge_complete_final.iloc[233]
cale_makar_metadata

In [None]:
# Passed in to the get stats function
cale_makar_stats = nhl_scraper.get_player_stats_with_reusable_driver(cale_makar_metadata, driver, wait)

In [None]:
cale_makar_stats

##### Test Player without Playoffs Data in All Leagues Tab: Rob Blake

In [4]:
# Get Rob Blake's Metadata which is at index 233
rob_blake_metadata = nhl_skaters_metadata_official_ep_merge_complete_final.iloc[17]
rob_blake_metadata

player_name                                                     Rob Blake
player_pos                                                              D
player_link_official                   https://www.nhl.com/player/8445550
player_link_ep          https://www.eliteprospects.com/player/8580/rob...
player_image            https://assets.nhle.com/mugs/nhl/latest/844555...
Name: 17, dtype: object

In [5]:
# Passed in to the get stats function
rob_blake_stats = nhl_scraper.get_player_stats_with_reusable_driver(rob_blake_metadata, driver, wait)

Collecting Rob Blake's stats from https://www.nhl.com/player/8445550
Scraping 'All Leagues' regular season stats for Rob Blake
Successfully located dropdown button
Successfully clicked dropdown button
Successfully located all leagues option
Successfully clicked all leagues option
Successfully scraped regular season stats
Scraping 'playoff stats' for Rob Blake
Successfully located game-type dropdown button
Successfully clicked playoffs dropdown button
Failed to scrape playoff stats for Rob Blake in 'All Leagues' Tab
Scraping 'playoff stats' for Rob Blake in 'NHL' Tab
Successfully located dropdown button
Successfully clicked dropdown button
Successfully located NHL option
Successfully clicked NHL option
Successfully located game-type dropdown button
Successfully clicked playoffs dropdown button
Successfully selected 'Playoffs' option
Successfully scraped playoff stats in 'NHL' Tab


In [6]:
rob_blake_stats

Unnamed: 0,player_name,season,league,team,gp_regular,g_regular,a_regular,p_regular,plus_minus_regular,pim_regular,...,ppg_playoffs,ppp_playoffs,shg_playoffs,shp_playoffs,toi_per_game_playoffs,gwg_playoffs,otg_playoffs,sog_playoffs,shooting_pct_playoffs,fo_pct_playoffs
0,Rob Blake,1985-86,OHA-B,Brantford,39,3,13,16,,43,...,,,,,,,,,,
1,Rob Blake,1986-87,OHA-B,Stratford,31,11,20,31,,115,...,,,,,,,,,,
2,Rob Blake,1987-88,CCHA,Bowling Green,43,5,8,13,,88,...,,,,,,,,,,
3,Rob Blake,1988-89,CCHA,Bowling Green,46,11,21,32,,140,...,,,,,,,,,,
4,Rob Blake,1989-90,NHL,Los Angeles Kings,4,0,0,0,0.0,4,...,1.0,1.0,0.0,0.0,,0.0,0.0,11.0,9.1,
5,Rob Blake,1989-90,CCHA,Bowling Green,42,23,36,59,,140,...,,,,,,,,,,
6,Rob Blake,1990-91,NHL,Los Angeles Kings,75,12,34,46,3.0,125,...,1.0,3.0,0.0,0.0,,0.0,0.0,19.0,5.3,
7,Rob Blake,1990-91,WEC-A,Canada,2,0,2,2,,0,...,,,,,,,,,,
8,Rob Blake,1991-92,NHL,Los Angeles Kings,57,7,13,20,-5.0,102,...,0.0,1.0,0.0,0.0,,0.0,0.0,12.0,16.7,
9,Rob Blake,1992-93,NHL,Los Angeles Kings,76,16,43,59,18.0,152,...,1.0,3.0,1.0,1.0,,0.0,0.0,60.0,6.7,


##### Test Player without Playoffs Data in All Leagues Tab but has data in NHL Tab: Landon Slaggert

In [4]:
# Get Landon Slaggert's Metadata which is at index 614
landon_slaggert_metadata = nhl_skaters_metadata_official_ep_merge_complete_final.iloc[613]

In [5]:
landon_slaggert_metadata

player_name                                               Landon Slaggert
player_pos                                                             LW
player_link_official                   https://www.nhl.com/player/8482172
player_link_ep          https://www.eliteprospects.com/player/418245/l...
player_image            https://assets.nhle.com/mugs/nhl/latest/848217...
Name: 613, dtype: object

In [6]:
# Passed in to the get stats function
landon_slaggert_stats = nhl_scraper.get_player_stats_with_reusable_driver(landon_slaggert_metadata, driver, wait)

Collecting Landon Slaggert's stats from https://www.nhl.com/player/8482172
Scraping 'All Leagues' regular season stats for Landon Slaggert
Successfully located dropdown button
Successfully clicked dropdown button
Successfully located all leagues option
Successfully clicked all leagues option
Successfully scraped regular season stats
Scraping 'playoff stats' for Landon Slaggert
Successfully located game-type dropdown button
Successfully clicked game-type dropdown button
Failed to scrape playoff stats for Landon Slaggert in 'All Leagues' Tab
Scraping 'playoff stats' for Landon Slaggert in 'NHL' Tab
Successfully located dropdown button
Successfully clicked dropdown button
Successfully located NHL option
Successfully clicked NHL option
Successfully located game-type dropdown button
Successfully clicked game-type dropdown button
Failed to scrape playoff stats for Landon Slaggert in 'NHL' Tab
Failed to scrape playoff stats for Landon Slaggert in both 'All Leagues' and 'NHL' Tabs
No playoff s

In [7]:
landon_slaggert_stats

Unnamed: 0,Player,Season,League,Team,GP,G,A,P,+/-,PIM,PPG,PPP,SHG,SHP,TOI/G,GWG,OTG,S,S%,FO%
0,Landon Slaggert,2016-17,HPHL 14U,Chicago Mission 14U AAA,20,6,10,16,,10.0,,,,,,,,,,
1,Landon Slaggert,2016-17,WSI U15,STX Selects U15,7,4,4,8,,2.0,,,,,,,,,,
2,Landon Slaggert,2017-18,HPHL 16U,Chicago Mission 16U AAA,20,8,12,20,,0.0,,,,,,,,,,
3,Landon Slaggert,2017-18,USA-S15,Team Kelly,3,2,1,3,,,,,,,,,,,,
4,Landon Slaggert,2018-19,NTDP,U.S. National U18 Team,3,0,0,0,,2.0,,,,,,,,,,
5,Landon Slaggert,2018-19,NTDP,U.S. National U17 Team,48,10,14,24,,56.0,,,,,,,,,,
6,Landon Slaggert,2018-19,USHL,USNTDP Juniors,30,6,8,14,-10.0,16.0,,,,,,,,,,
7,Landon Slaggert,2018-19,WHC-17,USA U17,5,2,1,3,,14.0,,,,,,,,,,
8,Landon Slaggert,2019-20,NTDP,U.S. National U18 Team,47,13,11,24,,32.0,,,,,,,,,,
9,Landon Slaggert,2019-20,USHL,USNTDP Juniors,19,6,4,10,-3.0,14.0,,,,,,,,,,


In [8]:
driver.quit()

### Collect Stats for All Players

In [None]:
# Load CSV File
nhl_skaters_metadata_official_ep_merge_complete_final = pd.read_csv('./data/nhl/nhl_skaters_metadata_official_ep_merge_complete_final.csv')

#### Initiate Reusable Driver

In [None]:
from selenium.webdriver.support.wait import WebDriverWait
import undetected_chromedriver as uc

# Set up Chrome Driver ONCE
chrome_options = uc.ChromeOptions()
chrome_options.add_argument("--headless=new")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")

driver = uc.Chrome(version_main=138, options=chrome_options)
wait = WebDriverWait(driver, 15)

#### Define Output File

In [None]:
official_stats_output_path = './data/nhl/official/stats/nhl_players_official_stats.csv'

#### Define Function to Get Player Stats in Batches

In [None]:
def get_players_stats_by_batch(players_to_scrape, driver, wait, output_path):
    curr_len = len(players_to_scrape)
    fail_count = 0

    for i in range(curr_len):
        player_metadata = players_to_scrape.iloc[i]
        player_name = player_metadata['player_name']
        player_url = player_metadata['player_link_official']
        print(f"\n [{i + 1}] Collecting stats for {player_name} at {player_url}")

        try:
            player_stats = nhl_scraper.get_player_stats_with_reusable_driver(player_metadata, driver, wait)

            # Write to CSV file
            if os.path.exists(output_path):
                player_stats.to_csv(output_path, mode='a', header=False, index=False, encoding='utf-8-sig')
            else:
                player_stats.to_csv(output_path, index=False, encoding='utf-8-sig')
            print(f'Successfully scraped stats for {player_name}')

            # Print Fail Rate
            print(f'Failed rate: {fail_count / (i + 1):.2f}')

            # Add random sleep to prevent getting blocked
            if i < curr_len - 1:
                sleep_time = random.uniform(10, 120)
                print(f"Sleep for {sleep_time / 60:.2f} minutes to prevent getting blocked")
                time.sleep(sleep_time)
        except Exception as e:
            print(f"Failed to get stats for {player_name}: {e}")

            fail_count += 1

            if i < curr_len - 1:
                # Sleep for 15-60 seconds before trying the next player
                sleep_time = random.uniform(15, 60)
                print(f"Sleeping for {sleep_time / 60:.2f} seconds before trying the next player")
                time.sleep(sleep_time)

##### Collect Stats for All Players

In [None]:
# Scrape 0 to 10 players' official stats
curr_players_metadata = nhl_skaters_metadata_official_ep_merge_complete_final[0:10]
get_players_stats_by_batch(curr_players_metadata, driver, wait, official_stats_output_path)

In [None]:
# Scrape 10 to 40 players' official stats
curr_players_metadata = nhl_skaters_metadata_official_ep_merge_complete_final[10:40]
get_players_stats_by_batch(curr_players_metadata, driver, wait, official_stats_output_path)

In [None]:
# Scrape 40 to 45 players' official stats
curr_players_metadata = nhl_skaters_metadata_official_ep_merge_complete_final[40:45]
get_players_stats_by_batch(curr_players_metadata, driver, wait, official_stats_output_path)

In [None]:
# Scrape 45 to 50 players' official stats
curr_players_metadata = nhl_skaters_metadata_official_ep_merge_complete_final[45:50]
get_players_stats_by_batch(curr_players_metadata, driver, wait, official_stats_output_path)

In [None]:
# Scrape 50 to 100 players' official stats
curr_players_metadata = nhl_skaters_metadata_official_ep_merge_complete_final[50:100]
get_players_stats_by_batch(curr_players_metadata, driver, wait, official_stats_output_path)

In [None]:
# Scrape 100 to 200 players' official stats
curr_players_metadata = nhl_skaters_metadata_official_ep_merge_complete_final[100:200]
get_players_stats_by_batch(curr_players_metadata, driver, wait, official_stats_output_path)

In [None]:
# Scrape 200 to 300 players' official stats
curr_players_metadata = nhl_skaters_metadata_official_ep_merge_complete_final[200:300]
get_players_stats_by_batch(curr_players_metadata, driver, wait, official_stats_output_path)

In [None]:
# Scrape 300 to 400 players' official stats
curr_players_metadata = nhl_skaters_metadata_official_ep_merge_complete_final[300:400]
get_players_stats_by_batch(curr_players_metadata, driver, wait, official_stats_output_path)

In [None]:
# Scrape 400 to 500 players' official stats
curr_players_metadata = nhl_skaters_metadata_official_ep_merge_complete_final[400:500]
get_players_stats_by_batch(curr_players_metadata, driver, wait, official_stats_output_path)

In [None]:
# Scrape 500 to 600 players' official stats
curr_players_metadata = nhl_skaters_metadata_official_ep_merge_complete_final[500:600]
get_players_stats_by_batch(curr_players_metadata, driver, wait, official_stats_output_path)

In [None]:
# Scrape 600 to 700 players' official stats
curr_players_metadata = nhl_skaters_metadata_official_ep_merge_complete_final[600:700]
get_players_stats_by_batch(curr_players_metadata, driver, wait, official_stats_output_path)

##### Failed Case:
- Corey Perry
- Landon Slaggert

In [None]:
# Scrape 700 to 800 players' official stats
curr_players_metadata = nhl_skaters_metadata_official_ep_merge_complete_final[700:800]
get_players_stats_by_batch(curr_players_metadata, driver, wait, official_stats_output_path)

### Fetch Missing Players' Stats

In [None]:
# Read in nhl_skaters_metadata_official_ep_merge_complete_final.csv
nhl_skaters_metadata_official_ep_merge_complete_final = pd.read_csv('./data/nhl/nhl_skaters_metadata_official_ep_merge_complete_final.csv')

In [None]:
# Get unique players from official_stats_output_path
if os.path.exists(official_stats_output_path):
    # Read in the CSV file and set low_memory = False
    nhl_players_official_stats = pd.read_csv(official_stats_output_path, low_memory=False)
    unique_players = set(nhl_players_official_stats['player_name'].unique())
else:
    unique_players = set()

In [None]:
# Find players in nhl_players_metadata 0-600 not in unique_players
subset = nhl_skaters_metadata_official_ep_merge_complete_final[0:700]
players_to_scrape = subset[~subset['player_name'].isin(unique_players)].reset_index(drop=True)
players_to_scrape

In [None]:
# Fetch Missing Players' Stats
get_players_stats_by_batch(players_to_scrape, driver, wait, official_stats_output_path)

In [None]:
# Calculate the unique players in official_stats_output_path
unique_players = set(nhl_players_official_stats['player_name'].unique())
len(unique_players)

In [None]:
# Remove Duplicates Rows
nhl_players_official_stats = nhl_players_official_stats.drop_duplicates(subset=['player_name', 'season', 'team', 'league', 'gp_regular', 'g_regular', 'a_regular', 'p_regular', 'plus_minus_regular', 'pim_regular', 'ppg_regular', 'ppp_regular', 'shg_regular', 'shp_regular', 'toi_per_game_regular', 'gwg_regular', 'otg_regular', 'sog_regular', 'shooting_pct_regular', 'fo_pct_regular', 'gp_playoffs', 'g_playoffs', 'a_playoffs', 'p_playoffs', 'plus_minus_playoffs', 'pim_playoffs', 'ppg_playoffs', 'ppp_playoffs', 'shg_playoffs', 'shp_playoffs', 'toi_per_game_playoffs', 'gwg_playoffs', 'otg_playoffs', 'sog_playoffs', 'shooting_pct_playoffs', 'fo_pct_playoffs']).reset_index(drop=True)

In [None]:
# Write to CSV
nhl_players_official_stats.to_csv(official_stats_output_path, index=False, encoding='utf-8-sig')

### Close the Driver

In [None]:
driver.quit()