In [1]:
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException, ElementClickInterceptedException
from io import StringIO

# Initialize WebDriver (Ensure geckodriver is installed and in your PATH)
wd = webdriver.Firefox()

# Define the NBA stats URL and filename
end_year = 2025  # Change this to desired season
url = f"https://www.espn.com/nba/stats/player/_/season/{end_year}/seasontype/2"
filename = f'nba_player_stats_{end_year-1}-{end_year}.csv'

# Open the URL in WebDriver
wd.get(url)
time.sleep(3)  # Allow page to load

# Function to click "Show More" until all data is loaded
def click_show_more():
    while True:
        try:
            # Locate the "Show More" link inside the div
            show_more_link = wd.find_element(By.XPATH, "//div[contains(@class, 'loadMore')]//a[contains(@class, 'loadMore__link')]")
            
            # Scroll into view
            wd.execute_script("arguments[0].scrollIntoView();", show_more_link)
            time.sleep(1)  # Allow scrolling time
            
            # Click using JavaScript (ensures it works)
            wd.execute_script("arguments[0].click();", show_more_link)
            time.sleep(2)  # Allow content to load
        except NoSuchElementException:
            print("No more 'Show More' button found. Page is fully loaded.")
            break
        except ElementClickInterceptedException:
            print("Click intercepted. Retrying after a short wait...")
            time.sleep(2)

# Click "Show More" until everything is loaded
click_show_more()

# Extract player names
try:
    player_elements = wd.find_elements(By.XPATH, "//tr[contains(@class, 'Table__TR')]//td[2]//a")
    names = [element.text for element in player_elements if element.text]
    print(f"Number of player names extracted: {len(names)}")
except Exception as e:
    print("Error extracting player names:", e)
    names = []

# Extract tables using pandas (fixing the warning)
html_source = wd.page_source
tables = pd.read_html(StringIO(html_source))

# Ensure tables exist
if len(tables) < 2:
    print("Error: Expected at least 2 tables but found", len(tables))
    wd.quit()
    exit()

# Extract player data
players = tables[0]
stats = tables[1]

# Drop 'RK' column if it exists
if 'RK' in players.columns:
    players = players.drop(columns=['RK'])

# Add extracted player names
players['Name'] = names

# Merge player and stats DataFrames
df = pd.concat([players, stats], axis=1)

# Display the final DataFrame
print(df.head())

# Save to CSV
df.to_csv(filename, index=False)
print(f"Data saved to {filename}")

# Close WebDriver
wd.quit()


No more 'Show More' button found. Page is fully loaded.
Number of player names extracted: 302
                      Name POS  GP   MIN   PTS   FGM   FGA   FG%  3PM   3PA  \
0  Shai Gilgeous-Alexander  PG  61  34.2  32.8  11.3  21.4  52.6  2.1   5.7   
1    Giannis Antetokounmpo  PF  49  33.9  30.8  12.2  20.2  60.5  0.2   0.9   
2             Nikola Jokic   C  56  36.1  28.8  11.0  19.1  57.6  1.9   4.4   
3          Anthony Edwards  SG  61  36.5  27.5   9.0  20.6  43.9  4.1  10.2   
4             Kevin Durant  PF  49  36.8  26.9   9.8  18.5  52.7  2.4   6.1   

   ...  FTM   FTA   FT%   REB   AST  STL  BLK   TO  DD2  TD3  
0  ...  8.0   9.0  89.8   5.1   6.2  1.8  1.0  2.6    5    0  
1  ...  6.2  10.3  59.6  12.1   5.9  0.8  1.2  3.3   42    6  
2  ...  4.9   6.0  81.9  12.8  10.4  1.8  0.6  3.3   48   28  
3  ...  5.2   6.2  83.7   5.9   4.6  1.2  0.6  3.3    5    0  
4  ...  4.9   6.0  82.5   6.0   4.2  0.9  1.3  3.0    5    0  

[5 rows x 21 columns]
Data saved to nba_player_stats