# Scraping Player Salaries from ESPN

In [1]:
# Import necessary packages
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

In [2]:
# Function to scrape player salaries from ESPN
def scrape_espn_salaries(year, page):
    """
    Scrape ESPN NBA salaries for a given year and page.
    Returns a list of [rank, player, team, salary] rows.
    """
    url = f"https://www.espn.com/nba/salaries/_/year/{year}/page/{page}/seasontype/4"
    headers = {"User-Agent": "Mozilla/5.0"}

    response = requests.get(url, headers=headers)
    if response.status_code != 200:
        print(f"Page {page} failed with status {response.status_code}")
        return []

    soup = BeautifulSoup(response.content, 'html.parser')
    table = soup.find('table', class_='tablehead')
    if not table:
        return []

    rows = table.find_all('tr', class_=['oddrow', 'evenrow'])

    data = []
    for row in rows:
        cols = [td.get_text(strip=True) for td in row.find_all('td')]
        if len(cols) == 4:
            rank, name, team, salary = cols
            salary = salary.replace("$", "").replace(",", "")
            data.append([rank, name, team, float(salary), year])

    return data


In [75]:
# Scrape all 2024-2025 player salaries
year = 2025
all_data = []

for page in range(1, 20):
    print(f"Scraping page {page}...")
    page_data = scrape_espn_salaries(year, page)

    if not page_data:
        print("No more data found. Stopping.")
        break

    all_data.extend(page_data)
    time.sleep(2)

# Convert to DataFrame
df = pd.DataFrame(all_data, columns=["Rank", "Player", "Team", "Salary", "Season"])
print(f"Total players scraped: {len(df)}")
df.head()

Scraping page 1...
Scraping page 2...
Scraping page 3...
Scraping page 4...
Scraping page 5...
Scraping page 6...
Scraping page 7...
Scraping page 8...
Scraping page 9...
Scraping page 10...
Scraping page 11...
Scraping page 12...
Scraping page 13...
Scraping page 14...
No more data found. Stopping.
Total players scraped: 491


Unnamed: 0,Rank,Player,Team,Salary,Season
0,1,"Stephen Curry, PG",Golden State Warriors,55761216.0,2025
1,2,"Joel Embiid, C",Philadelphia 76ers,51415938.0,2025
2,3,"Nikola Jokic, C",Denver Nuggets,51415938.0,2025
3,4,"Kevin Durant, PF",Phoenix Suns,51179021.0,2025
4,5,"Bradley Beal, SG",Phoenix Suns,50203930.0,2025


In [76]:
# Save as a CSV
df.to_csv('nba_salaries_2025.csv', index=False)

Repeat the above two steps for each of the last 10 seasons, since 2014-15

**Note**: Due to occasional website instability, the full scrape was not executed in a single loop across all seasons and pages. Instead, the scraping function should be run one season at a time, as the site may intermittently fail to load certain pages, causing the loop to break early. You may need to rerun the function for a given season to ensure all player salary data is captured.

In [77]:
# Combine all CSVs into one dataset
salaries = pd.concat([
    pd.read_csv('nba_salaries_2025.csv'),
    pd.read_csv('nba_salaries_2024.csv'),
    pd.read_csv('nba_salaries_2023.csv'),
    pd.read_csv('nba_salaries_2022.csv'),
    pd.read_csv('nba_salaries_2021.csv'),
    pd.read_csv('nba_salaries_2020.csv'),
    pd.read_csv('nba_salaries_2019.csv'),
    pd.read_csv('nba_salaries_2018.csv'),
    pd.read_csv('nba_salaries_2017.csv'),
    pd.read_csv('nba_salaries_2016.csv'),
    pd.read_csv('nba_salaries_2015.csv')])

# Create a position column
salaries[['Player', 'Position']] = salaries['Player'].str.split(',', expand=True)
salaries['Player'] = salaries['Player'].str.strip()
salaries['Position'] = salaries['Position'].str.strip()

# Rename columns
salaries.rename(columns={'Rank': 'rank', 'Player': 'player', 'Team': 'team', 'Salary': 'salary', 'Season': 'season', 'Position': 'position'}, inplace=True)
salaries.head()

Unnamed: 0,rank,player,team,salary,season,position
0,1,Stephen Curry,Golden State Warriors,55761216.0,2025,PG
1,2,Joel Embiid,Philadelphia 76ers,51415938.0,2025,C
2,3,Nikola Jokic,Denver Nuggets,51415938.0,2025,C
3,4,Kevin Durant,Phoenix Suns,51179021.0,2025,PF
4,5,Bradley Beal,Phoenix Suns,50203930.0,2025,SG


In [87]:
# Save as a CSV
salaries.to_csv('salaries.csv', index=False)