In [1]:
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException, ElementClickInterceptedException
from io import StringIO

# Initialize WebDriver (Ensure geckodriver is installed and in your PATH)
wd = webdriver.Firefox()

# Define the NBA stats URL and filename
end_year = 2026  # Change this to desired season
url = f"https://www.espn.com/nba/stats/player/_/season/{end_year}/seasontype/2"
filename = f'nba_player_stats_{end_year-1}-{end_year}.csv'

# Open the URL in WebDriver
wd.get(url)
time.sleep(3)  # Allow page to load

# Function to click "Show More" until all data is loaded
def click_show_more():
    while True:
        try:
            # Locate the "Show More" link inside the div
            show_more_link = wd.find_element(By.XPATH, "//div[contains(@class, 'loadMore')]//a[contains(@class, 'loadMore__link')]")
            
            # Scroll into view
            wd.execute_script("arguments[0].scrollIntoView();", show_more_link)
            time.sleep(1)  # Allow scrolling time
            
            # Click using JavaScript (ensures it works)
            wd.execute_script("arguments[0].click();", show_more_link)
            time.sleep(2)  # Allow content to load
        except NoSuchElementException:
            print("No more 'Show More' button found. Page is fully loaded.")
            break
        except ElementClickInterceptedException:
            print("Click intercepted. Retrying after a short wait...")
            time.sleep(2)

# Click "Show More" until everything is loaded
click_show_more()

# Extract player names
try:
    player_elements = wd.find_elements(By.XPATH, "//tr[contains(@class, 'Table__TR')]//td[2]//a")
    names = [element.text for element in player_elements if element.text]
    print(f"Number of player names extracted: {len(names)}")
except Exception as e:
    print("Error extracting player names:", e)
    names = []

# Extract tables using pandas (fixing the warning)
html_source = wd.page_source
tables = pd.read_html(StringIO(html_source))

# Ensure tables exist
if len(tables) < 2:
    print("Error: Expected at least 2 tables but found", len(tables))
    wd.quit()
    exit()

# Extract player data
players = tables[0]
stats = tables[1]

# Drop 'RK' column if it exists
if 'RK' in players.columns:
    players = players.drop(columns=['RK'])

# Add extracted player names
players['Name'] = names

# Merge player and stats DataFrames
df = pd.concat([players, stats], axis=1)

# Display the final DataFrame
print(df.head())

# Save to CSV
df.to_csv(filename, index=False)
print(f"Data saved to {filename}")

# Close WebDriver
wd.quit()

No more 'Show More' button found. Page is fully loaded.
Number of player names extracted: 318
                      Name POS  GP   MIN   PTS   FGM   FGA   FG%  3PM   3PA  \
0              Luka Doncic   G  11  36.8  34.6  10.6  22.5  47.2  3.5  10.9   
1             Tyrese Maxey   G  15  40.7  33.4  11.1  23.4  47.3  4.1   9.7   
2  Shai Gilgeous-Alexander   G  16  33.4  32.0  10.8  20.4  52.9  2.2   5.7   
3    Giannis Antetokounmpo   F  13  31.8  31.2  12.0  19.1  62.9  0.7   1.4   
4          Lauri Markkanen   F  14  35.9  30.6  10.4  21.5  48.5  3.7   9.6   

   ...  FTM   FTA   FT%   REB  AST  STL  BLK   TO  DD2  TD3  
0  ...  9.9  12.5  79.0   8.5  9.0  1.9  0.5  4.1    8    1  
1  ...  7.2   8.2  87.8   4.7  7.9  1.6  0.9  2.8    2    0  
2  ...  8.2   9.1  89.7   5.1  6.5  1.5  0.8  1.8    2    0  
3  ...  6.5  10.2  63.6  10.8  6.8  0.9  1.2  3.5    8    0  
4  ...  6.1   6.9  88.5   6.1  2.1  1.0  0.5  1.4    1    0  

[5 rows x 21 columns]
Data saved to nba_player_stats_2025-