## WNBA Player Stats 2024

RUN THIS PROGRAM BEFORE EACH WORKSHOP TO GET THE LATEST DATA.

In [None]:
"""
Program: WNBA Player Stats Scraper
Author: Peter Beens
Date: 2024-05-19
Description: This program scrapes WNBA player statistics from ESPN's website using Selenium and Pandas.
             It initializes a WebDriver, navigates to the stats page, and allows the user to manually
             interact with the page to load additional data. The program then extracts player names and
             their statistics, merges the data into a single DataFrame, and saves it to a CSV file.
Usage: Run the script in an environment with Selenium, Pandas, and a WebDriver installed. Ensure geckodriver
       is installed and in your PATH for Firefox.
Dependencies: 
    - selenium
    - pandas
    - time
Notes: 
    - Ensure you have the necessary WebDriver installed (geckodriver for Firefox).
    - Manual interaction is required to load additional data by pressing the 'Show More' button twice.
"""

import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By

# Initialize the WebDriver for Firefox. Ensure geckodriver is installed and in your PATH.
wd = webdriver.Firefox()

# Define the URL to scrape
url = "https://www.espn.com/wnba/stats/player"

# Open the URL in the WebDriver
wd.get(url)

# Pause to allow manual interaction
input("Press Enter after pressing 'Show More' button twice...")

# Extract the player names using Selenium by locating elements that match the given XPath
player_elements = wd.find_elements(By.XPATH, "//tr[contains(@class, 'Table__TR')]//td[2]//a")

# Extract the text from each WebElement and store it in a list
names = [element.text for element in player_elements if element.text]

# Print the number of player names extracted and the player names themselves for verification
print(f"Number of player names extracted: {len(names)}")
print(names)

# Use pandas to read the HTML tables from the webpage source fetched by Selenium
html_source = wd.page_source
tables = pd.read_html(html_source)

# Print the number of tables found on the webpage for verification
# print(f"Number of tables found: {len(tables)}")

# Assuming the first table contains 'RK' and 'NAME' columns, and the second table contains the rest of the statistics
players = tables[0]
stats = tables[1]

# Drop the 'RK' column from the players DataFrame
players = players.drop(columns=['RK'])

# Add the extracted player names to the players DataFrame
players['Name'] = names

# Merge the players and stats DataFrames on their index to create a complete dataset
df = pd.concat([players, stats], axis=1)

# Display the final DataFrame to ensure correctness
print(df)

# Save the DataFrame to a CSV file named 'wnba_player_stats_2024.csv'
df.to_csv('wnba_player_stats_2024.csv', index=False)

# Close the WebDriver
wd.quit()
