In [1]:
import time
import requests
import numpy as np
import pandas as pd

In [2]:
# Set display options for easier DataFrame visualization
pd.set_option('display.max_columns', None)

In [3]:
# Setup request headers as required by NBA stats website
headers = {
    'Accept': '*/*',
    'Accept-Encoding': 'gzip, deflate, br',
    'Accept-Language': 'en-GB,en;q=0.9',
    'Connection': 'keep-alive',
    'Host': 'stats.nba.com',
    'Origin': 'https://www.nba.com',
    'Referer': 'https://www.nba.com/',
    'Sec-Fetch-Dest': 'empty',
    'Sec-Fetch-Mode': 'cors',
    'Sec-Fetch-Site': 'same-site',
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.4.1 Safari/605.1.15'
}

In [4]:
# Define the years and season types for which data needs to be scraped (11 years).
years = ['2013-14', '2014-15', '2015-16', '2016-17', '2017-18', '2018-19', '2019-20', '2020-21',
          '2021-22', '2022-23', '2023-24']
season_types = ['Regular Season', 'Playoffs']

In [5]:
# Initialize a list to collect all the DataFrame chunks
data_list = []

# Start the timer to monitor runtime
begin_loop = time.time()

# Loop through each year and season to scrape data
for year in years:
    for season in season_types:
        try:
            # Format the URL with the current year and season type, properly URL encoded
            url = f"https://stats.nba.com/stats/leagueLeaders?LeagueID=00&PerMode=PerGame&Scope=S&Season={year}&SeasonType={season.replace(' ', '%20')}&StatCategory=PTS"
            response = requests.get(url=url, headers=headers)
            response.raise_for_status()  # Check for request errors
            
            # Parse the JSON response and convert to DataFrame
            data = response.json()
            temp_df = pd.DataFrame(data['resultSet']['rowSet'], columns=data['resultSet']['headers'])
            
            # Create columns for 'YEAR' and 'SEASON_TYPE' and place them at the beginning
            temp_df.insert(0, 'SEASON_TYPE', season)
            temp_df.insert(0, 'YEAR', year)
            
            data_list.append(temp_df)
            print(f'Finished scraping data for {year} and {season}.')
        except requests.RequestException as e:
            print(f'Error fetching data for {year} and {season}: {str(e)}')

        # Implement a random delay to avoid server throttling
        lag = np.random.uniform(low=5, high=15)
        time.sleep(lag)

Finished scraping data for 2013-14 and Regular Season.
Finished scraping data for 2013-14 and Playoffs.
Finished scraping data for 2014-15 and Regular Season.
Finished scraping data for 2014-15 and Playoffs.
Finished scraping data for 2015-16 and Regular Season.
Finished scraping data for 2015-16 and Playoffs.
Finished scraping data for 2016-17 and Regular Season.
Finished scraping data for 2016-17 and Playoffs.
Finished scraping data for 2017-18 and Regular Season.
Finished scraping data for 2017-18 and Playoffs.
Finished scraping data for 2018-19 and Regular Season.
Finished scraping data for 2018-19 and Playoffs.
Finished scraping data for 2019-20 and Regular Season.
Finished scraping data for 2019-20 and Playoffs.
Finished scraping data for 2020-21 and Regular Season.
Finished scraping data for 2020-21 and Playoffs.
Finished scraping data for 2021-22 and Regular Season.
Finished scraping data for 2021-22 and Playoffs.
Finished scraping data for 2022-23 and Regular Season.
Finished 

In [6]:
print(f'Process completed! Total runtime: {round((time.time() - begin_loop) / 60, 2)} minutes')

Process completed! Total runtime: 6.02 minutes


In [8]:
# Concatenate all data into a single DataFrame.
df = pd.concat(data_list, ignore_index=True)

  df = pd.concat(data_list, ignore_index=True)


In [9]:
df

Unnamed: 0,YEAR,SEASON_TYPE,PLAYER_ID,RANK,PLAYER,TEAM_ID,TEAM,GP,MIN,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PTS,EFF
0,2013-14,Regular Season,201142,1,Kevin Durant,1610612760,OKC,81,38.5,10.5,20.8,0.503,2.4,6.1,0.391,8.7,9.9,0.873,0.7,6.7,7.4,5.5,1.3,0.7,3.5,32.0,31.8
1,2013-14,Regular Season,2546,2,Carmelo Anthony,1610612752,NYK,77,38.7,9.6,21.3,0.452,2.2,5.4,0.402,6.0,7.0,0.848,1.9,6.2,8.1,3.1,1.2,0.7,2.6,27.4,25.2
2,2013-14,Regular Season,2544,3,LeBron James,1610612748,MIA,77,37.7,10.0,17.6,0.567,1.5,4.0,0.379,5.7,7.6,0.750,1.1,5.9,6.9,6.3,1.6,0.3,3.5,27.1,29.3
3,2013-14,Regular Season,201567,4,Kevin Love,1610612750,MIN,77,36.3,8.4,18.5,0.457,2.5,6.6,0.376,6.8,8.2,0.821,2.9,9.6,12.5,4.4,0.8,0.5,2.5,26.1,30.2
4,2013-14,Regular Season,201935,5,James Harden,1610612745,HOU,73,38.0,7.5,16.5,0.456,2.4,6.6,0.366,7.9,9.1,0.866,0.8,3.9,4.7,6.1,1.6,0.4,3.6,25.4,24.3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4288,2023-24,Regular Season,1630231,236,KJ Martin,1610612755,PHI,60,12.5,1.6,3.0,0.536,0.1,0.5,0.286,0.4,0.7,0.537,0.7,1.5,2.2,0.9,0.4,0.2,0.5,3.7,5.1
4289,2023-24,Regular Season,203939,237,Dwight Powell,1610612742,DAL,63,13.3,1.1,1.7,0.679,0.0,0.0,0.333,1.0,1.4,0.708,1.5,1.9,3.4,1.3,0.4,0.3,0.5,3.3,7.3
4290,2023-24,Regular Season,1630192,238,Zeke Nnaji,1610612743,DEN,58,9.9,1.2,2.6,0.463,0.1,0.4,0.261,0.7,1.1,0.677,1.1,1.1,2.2,0.6,0.3,0.7,0.5,3.2,4.7
4291,2023-24,Regular Season,1630550,239,JT Thor,1610612766,CHA,63,12.4,1.3,2.9,0.437,0.4,1.3,0.346,0.2,0.3,0.550,0.7,1.6,2.3,0.5,0.2,0.4,0.2,3.2,4.5


In [10]:
df.to_csv('nba_player_stats.csv', index=False)