## WNBA Player Stats 2025

RUN THIS PROGRAM BEFORE EACH WORKSHOP TO GET THE LATEST DATA.

THE PROGRAM PROMPTS FOR YOU TO PRESS "SHOW MORE" TWICE.

In [2]:
"""
Program: WNBA Player Stats Scraper
Author: Peter Beens
Date: 2024-05-19
Description: This program scrapes WNBA player statistics from ESPN's website using Selenium and Pandas.
             It initializes a WebDriver, navigates to the stats page, and allows the user to manually
             interact with the page to load additional data. The program then extracts player names and
             their statistics, merges the data into a single DataFrame, and saves it to a CSV file.
Usage: Run the script in an environment with Selenium, Pandas, and a WebDriver installed. Ensure geckodriver
       is installed and in your PATH for Firefox.
Dependencies: 
    - selenium
    - pandas
    - time
Notes: 
    - Ensure you have the necessary WebDriver installed (geckodriver for Firefox).
    - Manual interaction is required to load additional data by pressing the 'Show More' button twice.
"""

import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By

# Initialize the WebDriver for Firefox. Ensure geckodriver is installed and in your PATH.
wd = webdriver.Firefox()

# Define the URL to scrape
url = "https://www.espn.com/wnba/stats/player/_/season/2025/seasontype/2"

# Open the URL in the WebDriver
wd.get(url)

# Pause to allow manual interaction
input("Press Enter after pressing 'Show More' button twice...")

# Extract the player names using Selenium by locating elements that match the given XPath
player_elements = wd.find_elements(By.XPATH, "//tr[contains(@class, 'Table__TR')]//td[2]//a")

# Extract the text from each WebElement and store it in a list
names = [element.text for element in player_elements if element.text]

# Print the number of player names extracted and the player names themselves for verification
print(f"Number of player names extracted: {len(names)}")
print(names)

# Use pandas to read the HTML tables from the webpage source fetched by Selenium
html_source = wd.page_source
tables = pd.read_html(html_source)

# Print the number of tables found on the webpage for verification
# print(f"Number of tables found: {len(tables)}")

# Assuming the first table contains 'RK' and 'NAME' columns, and the second table contains the rest of the statistics
players = tables[0]
stats = tables[1]

# Drop the 'RK' column from the players DataFrame
players = players.drop(columns=['RK'])

# Add the extracted player names to the players DataFrame
players['Name'] = names

# Merge the players and stats DataFrames on their index to create a complete dataset
df = pd.concat([players, stats], axis=1)

# Display the final DataFrame to ensure correctness
print(df)

# Save the DataFrame to a CSV file named 'wnba_player_stats_2025.csv'
df.to_csv('wnba_player_stats_2025.csv', index=False)

# Close the WebDriver
wd.quit()


Number of player names extracted: 50
["A'ja Wilson", 'Napheesa Collier', 'Kelsey Mitchell', 'Kelsey Plum', 'Paige Bueckers', 'Dearica Hamby', 'Allisha Gray', 'Breanna Stewart', 'Nneka Ogwumike', 'Sabrina Ionescu', 'Rhyne Howard', 'Jackie Young', 'Caitlin Clark', 'Satou Sabally', 'Tina Charles', 'Kahleah Copper', 'Arike Ogunbowale', 'Skylar Diggins', 'Brittney Sykes', 'Alyssa Thomas', 'Aliyah Boston', 'Sonia Citron', 'Rickea Jackson', 'Angel Reese', 'Marina Mabrey', 'Kayla McBride', 'Kayla Thornton', 'Jonquel Jones', 'Courtney Williams', 'Kamilla Cardoso', 'Emma Meesseman', 'Kiki Iriafen', 'Ariel Atkins', 'Brionna Jones', 'Azura Stevens', 'Maddy Siegrist', 'Shakira Austin', 'Veronica Burton', 'Brittney Sykes', 'Tiffany Hayes', 'Gabby Williams', 'Natasha Howard', 'Janelle Salaun', 'Jordin Canada', 'Chelsea Gray', 'Jewell Loyd', 'Amy Okonkwo', 'DeWanna Bonner', 'Courtney Vandersloot', 'Cecilia Zandalasini']
                    Name POS  GP   MIN   PTS  FGM   FGA   FG%  3PM  3PA   3P%  \
0

  tables = pd.read_html(html_source)


## Combine All Years of WNBA Player Stats

RUN THIS PROGRAM BEFORE EACH WORKSHOP TO ADD THE LATEST DATA.

In [None]:
import pandas as pd
import glob
import os

# Define the column order to ensure consistency in the final output
column_order = ['Name', 'Year', 'POS', 'GP', 'MIN', 'PTS', 'FGM', 'FGA', 'FG%', '3PM', '3PA', '3P%', 'FTM', 'FTA', 'FT%', 'REB', 'AST', 'STL', 'BLK', 'TO']

# Initialize an empty list to hold DataFrames from each CSV file
all_data = []

# Define the file pattern for matching CSV files
file_pattern = "wnba_player_stats_*.csv"

# Check if the output file 'wnba_player_stats_all.csv' exists, and delete it to avoid appending to old data
if os.path.exists('wnba_player_stats_all.csv'):
    os.remove('wnba_player_stats_all.csv')
    print("'wnba_player_stats_all.csv' has been deleted.")

# Use glob to find all files that match the pattern
files = glob.glob(file_pattern)

# Iterate over each file in the list
for file in files:
    # Read each CSV file into a DataFrame
    df = pd.read_csv(file)
    
    # Append the DataFrame to the all_data list for concatenation
    all_data.append(df)

# Concatenate all the DataFrames in the list into one combined DataFrame
combined_df = pd.concat(all_data, ignore_index=True)

# Reorder the columns to ensure the final output matches the specified column order
combined_df = combined_df[column_order]

# Save the combined DataFrame to 'wnba_player_stats_all.csv'
combined_df.to_csv('wnba_player_stats_all.csv', index=False)

# Confirm successful completion
print("Files have been successfully combined and saved to 'wnba_player_stats_all.csv'.")


## Get All Years of WNBA Player Stats

ONLY RUN ONCE!

In [None]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By

# Initialize the WebDriver for Firefox. Ensure geckodriver is installed and in your PATH.
wd = webdriver.Firefox()

# Define a function to scrape data for a given year
def scrape_wnba_stats(year):
    # Define the URL to scrape for the specific year
    url = f"https://www.espn.com/wnba/stats/player/_/season/{year}/seasontype/2"

    # Open the URL in the WebDriver
    wd.get(url)

    # Pause to allow manual interaction for pressing the 'Show More' button twice
    input(f"Press Enter after pressing 'Show More' button twice for the year {year}...")

    # Extract the player names using Selenium by locating elements that match the given XPath
    player_elements = wd.find_elements(By.XPATH, "//tr[contains(@class, 'Table__TR')]//td[2]//a")

    # Extract the text from each WebElement and store it in a list
    names = [element.text for element in player_elements if element.text]

    # Use pandas to read the HTML tables from the webpage source fetched by Selenium
    html_source = wd.page_source
    tables = pd.read_html(html_source)

    # Assuming the first table contains 'RK' and 'NAME' columns, and the second table contains the rest of the statistics
    players = tables[0]
    stats = tables[1]

    # Drop the 'RK' column from the players DataFrame
    players = players.drop(columns=['RK'])

    # Add the extracted player names to the players DataFrame
    players['Name'] = names

    # Merge the players and stats DataFrames on their index to create a complete dataset
    df = pd.concat([players, stats], axis=1)

    return df

# Iterate through the years from 2007 to 2025
for year in range(2007, 2025 + 1):
    print(f"Scraping data for the year {year}...")
    df_year = scrape_wnba_stats(year)
    df_year['Year'] = year  # Add a column to identify the year of the data
    
    # Save the DataFrame to a CSV file named 'wnba_player_stats_{year}.csv'
    df_year.to_csv(f'wnba_player_stats_{year}.csv', index=False)

# Close the WebDriver
wd.quit()
