# Scraping NBA Leaders' Data

## Importing needed Libraries

In [1]:
import pandas as pd
import requests
pd.set_option('display.max_columns', None)
import time
import numpy as np

In [2]:
test_url="https://stats.nba.com/stats/leagueLeaders?LeagueID=00&PerMode=PerGame&Scope=S&Season=2012-13&SeasonType=Regular%20Season&StatCategory=PTS"

### requesting HTML and converting it to json

In [3]:
r=requests.get(url=test_url).json()

### Setting up Heading Row for table

In [4]:
table_headers=r['resultSet']['headers']

### Adding Years and Season for new DataFrame

In [5]:
temp_df1=pd.DataFrame(r['resultSet']['rowSet'], columns=table_headers)
temp_df2=pd.DataFrame({'Year':['2012-13' for i in range(len(temp_df1))],
                      'Season_type':['Regular%20Season' for i in range(len(temp_df1))]})

### Concatinating the DataFrames

In [6]:
temp_df3=pd.concat([temp_df2, temp_df1], axis=1)

In [7]:
del temp_df1, temp_df2, temp_df3

# Summing up Everything in one code

### Headers to be not considered as bot

In [8]:
headers={
        'Accept': '*/*',
        'Accept-Encoding': 'gzip, deflate, br',
        'Accept-Language': 'en-GB,en-US;q=0.9,en;q=0.8,hi;q=0.7,fr;q=0.6',
        'Connection': 'keep-alive',
        'Host': 'stats.nba.com',
        'If-Modified-Since': 'Fri, 25 Nov 2022 12:34:17 GMT',
        'Origin': 'https://www.nba.com',
        'Referer': 'https://www.nba.com/',
        'sec-ch-ua': '"Google Chrome";v="107", "Chromium";v="107", "Not=A?Brand";v="24"',
        'sec-ch-ua-mobile': '?0',
        'sec-ch-ua-platform': "Windows",
       'Sec-Fetch-Dest': 'empty',
        'Sec-Fetch-Mode': 'cors',
        'Sec-Fetch-Site': 'same-site',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36'
}

### Defining DataFrame Columns' Titles

In [9]:
df_cols=['Year', 'Season_type']+ table_headers

In [10]:
pd.DataFrame(columns=df_cols)

Unnamed: 0,Year,Season_type,PLAYER_ID,RANK,PLAYER,TEAM_ID,TEAM,GP,MIN,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PTS,EFF


### Defining Selective Attributes

In [11]:
df=pd.DataFrame(columns=df_cols)
season_types=['Regular%20Season', 'Playoffs']
years=[ '2013-14', '2014-15', '2015-16', '2016-17', '2017-18','2018-19','2019-20', '2020-21', '2021-22', '2022-23']

## Looping through all data and seasons 

In [12]:


begin_loop=time.time()
for y in years:
    for s in season_types:
        api_url='https://stats.nba.com/stats/leagueLeaders?LeagueID=00&PerMode=PerGame&Scope=S&Season='+y+'&SeasonType='+s+'&StatCategory=PTS'
        r=requests.get(url=api_url, headers=headers).json()
        temp_df1=pd.DataFrame(r['resultSet']['rowSet'], columns=table_headers)
        temp_df2=pd.DataFrame({'Year':[y for i in range(len(temp_df1))],
                               'Season_type':[s for i in range(len(temp_df1))]})
        temp_df3=pd.concat([temp_df2, temp_df1], axis=1)

        df=pd.concat([df, temp_df3], axis=0)
        s2=s;
        if(s2=='Regular%20Season'):
            s2='Regular Season'
        print(f'Finished scraping data for year {y} season {s2}.')
        lag=np.random.uniform(low=5,high=25)
        print(f'wait for {round(lag,1)} seconds')
        time.sleep(lag)

print(f'Process Completed ! Total Time Taken: {round(((time.time()-begin_loop)/60),2)}')


Finished scraping data for year 2013-14 season Regular Season.
wait for 24.4 seconds
Finished scraping data for year 2013-14 season Playoffs.
wait for 12.8 seconds
Finished scraping data for year 2014-15 season Regular Season.
wait for 12.6 seconds
Finished scraping data for year 2014-15 season Playoffs.
wait for 8.7 seconds
Finished scraping data for year 2015-16 season Regular Season.
wait for 16.3 seconds
Finished scraping data for year 2015-16 season Playoffs.
wait for 10.2 seconds
Finished scraping data for year 2016-17 season Regular Season.
wait for 16.1 seconds
Finished scraping data for year 2016-17 season Playoffs.
wait for 16.4 seconds
Finished scraping data for year 2017-18 season Regular Season.
wait for 12.3 seconds
Finished scraping data for year 2017-18 season Playoffs.
wait for 10.6 seconds
Finished scraping data for year 2018-19 season Regular Season.
wait for 15.2 seconds
Finished scraping data for year 2018-19 season Playoffs.
wait for 12.9 seconds
Finished scraping

### Writing DataFrame to an excel file

In [13]:
writer = pd.ExcelWriter('nba_players_data.xlsx', engine='xlsxwriter')
df.to_excel(writer, sheet_name='Sheet1')
writer.save()

  writer.save()
