In [4]:
from urllib.request import urlopen
from bs4 import BeautifulSoup
import pandas as pd

In [5]:
# NBA season we will be analyzing
def getstats(year):

    # URL page we will scraping (see image above)
    url = "https://www.basketball-reference.com/leagues/NBA_{}_totals.html".format(year)
    # this is the HTML from the given URL
    html = urlopen(url)
    soup = BeautifulSoup(html)

    # use findALL() to get the column headers
    soup.findAll('tr', limit=2)
    # use getText()to extract the text we need into a list
    headers = [th.getText() for th in soup.findAll('tr', limit=2)[0].findAll('th')]
    # exclude the first column as we will not need the ranking order from Basketball Reference for the analysis
    headers = headers[1:]

    # avoid the first header row
    rows = soup.findAll('tr')[1:]
    player_stats = [[td.getText() for td in rows[i].findAll('td')]
                for i in range(len(rows))]

    stats = pd.DataFrame(player_stats, columns = headers)
    stats['Year'] = year
    return stats

In [6]:
years = list(range(2000, 2022))
yearly_stats = []
for year in years:
    yearly_stats.append(getstats(year))
    

In [7]:
yearly_stats

[                  Player Pos Age   Tm   G  GS    MP   FG   FGA   FG%  ...  \
 0      Tariq Abdul-Wahad  SG  25  TOT  61  56  1578  274   646  .424  ...   
 1      Tariq Abdul-Wahad  SG  25  ORL  46  46  1205  223   515  .433  ...   
 2      Tariq Abdul-Wahad  SG  25  DEN  15  10   373   51   131  .389  ...   
 3    Shareef Abdur-Rahim  SF  23  VAN  82  82  3223  594  1277  .465  ...   
 4         Cory Alexander  PG  26  DEN  29   2   329   28    98  .286  ...   
 ..                   ...  ..  ..  ...  ..  ..   ...  ...   ...   ...  ...   
 512     Haywoode Workman  PG  34  MIL  23   1   248   23    62  .371  ...   
 513     Haywoode Workman  PG  34  TOR  13   1   102    8    28  .286  ...   
 514    Metta World Peace  SF  20  CHI  72  63  2238  309   759  .407  ...   
 515      Lorenzen Wright   C  24  ATL  75   0  1205  180   361  .499  ...   
 516            Tim Young   C  23  GSW  25   0   137   13    39  .333  ...   
 
      ORB  DRB  TRB  AST  STL BLK  TOV   PF   PTS  Year  
 0  

In [8]:
df = pd.concat(yearly_stats)

In [9]:
df

Unnamed: 0,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,FG%,...,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,Year
0,Tariq Abdul-Wahad,SG,25,TOT,61,56,1578,274,646,.424,...,101,190,291,98,59,28,106,147,697,2000
1,Tariq Abdul-Wahad,SG,25,ORL,46,46,1205,223,515,.433,...,77,162,239,72,53,16,87,116,563,2000
2,Tariq Abdul-Wahad,SG,25,DEN,15,10,373,51,131,.389,...,24,28,52,26,6,12,19,31,134,2000
3,Shareef Abdur-Rahim,SF,23,VAN,82,82,3223,594,1277,.465,...,218,607,825,271,89,87,249,244,1663,2000
4,Cory Alexander,PG,26,DEN,29,2,329,28,98,.286,...,8,34,42,58,24,2,28,39,82,2000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
726,Delon Wright,PG,28,SAC,27,8,696,104,225,.462,...,28,77,105,97,43,11,35,30,271,2021
727,Thaddeus Young,PF,32,CHI,68,23,1652,370,662,.559,...,168,255,423,291,74,40,137,152,823,2021
728,Trae Young,PG,22,ATL,63,63,2125,487,1112,.438,...,38,207,245,594,53,12,261,111,1594,2021
729,Cody Zeller,C,28,CHO,48,21,1005,181,324,.559,...,119,209,328,86,27,17,51,121,451,2021


In [10]:
df.to_csv("PlayersStats.csv", index = False)