In [295]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

Get stats of all NBA players for Season 2020-2021

In [296]:
# get the contents of website
URL = "https://www.basketball-reference.com/leagues/NBA_2021_totals.html"
res = requests.get(URL)
soup = BeautifulSoup(res.text, 'lxml')

In [297]:
# there are 705 rows in the table
len(soup.select('.full_table, .italic_text.partial_table'))

705

In [298]:
player = soup.select('.full_table, .italic_text.partial_table')[0]
player

<tr class="full_table"><th class="right" csk="1" data-stat="ranker" scope="row">1</th><td class="left" csk="Achiuwa,Precious" data-append-csv="achiupr01" data-stat="player"><a href="/players/a/achiupr01.html">Precious Achiuwa</a></td><td class="center" data-stat="pos">PF</td><td class="right" data-stat="age">21</td><td class="left" data-stat="team_id"><a href="/teams/MIA/2021.html">MIA</a></td><td class="right" data-stat="g">61</td><td class="right" data-stat="gs">4</td><td class="right" data-stat="mp">737</td><td class="right" data-stat="fg">124</td><td class="right" data-stat="fga">228</td><td class="right non_qual" data-stat="fg_pct">.544</td><td class="right iz" data-stat="fg3">0</td><td class="right" data-stat="fg3a">1</td><td class="right non_qual iz" data-stat="fg3_pct">.000</td><td class="right" data-stat="fg2">124</td><td class="right" data-stat="fg2a">227</td><td class="right non_qual" data-stat="fg2_pct">.546</td><td class="right non_qual" data-stat="efg_pct">.544</td><td cl

In [299]:
player.select('td')

[<td class="left" csk="Achiuwa,Precious" data-append-csv="achiupr01" data-stat="player"><a href="/players/a/achiupr01.html">Precious Achiuwa</a></td>,
 <td class="center" data-stat="pos">PF</td>,
 <td class="right" data-stat="age">21</td>,
 <td class="left" data-stat="team_id"><a href="/teams/MIA/2021.html">MIA</a></td>,
 <td class="right" data-stat="g">61</td>,
 <td class="right" data-stat="gs">4</td>,
 <td class="right" data-stat="mp">737</td>,
 <td class="right" data-stat="fg">124</td>,
 <td class="right" data-stat="fga">228</td>,
 <td class="right non_qual" data-stat="fg_pct">.544</td>,
 <td class="right iz" data-stat="fg3">0</td>,
 <td class="right" data-stat="fg3a">1</td>,
 <td class="right non_qual iz" data-stat="fg3_pct">.000</td>,
 <td class="right" data-stat="fg2">124</td>,
 <td class="right" data-stat="fg2a">227</td>,
 <td class="right non_qual" data-stat="fg2_pct">.546</td>,
 <td class="right non_qual" data-stat="efg_pct">.544</td>,
 <td class="right" data-stat="ft">56</td>

In [300]:
# getting the label of each kind of stats
data_stat = ['player', 'pos', 'age', 'team_id', 'g', 'gs', 'mp', 'fg', 'fga', 'fg_pct', 'fg3', 'fg3a', 'fg3_pct', 'fg2', 'fg2a', 'fg2_pct', 'efg_pct', 'ft',
            'fta', 'ft_pct', 'orb', 'drb', 'trb', 'ast', 'stl', 'blk', 'tov', 'pf', 'pts']
labels = []
for a in soup.select('tr th'):
    if a['data-stat'] in data_stat:
        labels.append(a.text)
    if len(labels) == len(data_stat):
        break

In [301]:
print('Column names of the table: ', labels)

Column names of the table:  ['Player', 'Pos', 'Age', 'Tm', 'G', 'GS', 'MP', 'FG', 'FGA', 'FG%', '3P', '3PA', '3P%', '2P', '2PA', '2P%', 'eFG%', 'FT', 'FTA', 'FT%', 'ORB', 'DRB', 'TRB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PTS']


In [302]:
all_stats = []

for row in soup.select('.full_table, .italic_text.partial_table'):  #Get the elements containing those two classes
    only_td = row.select('td')  # get only td tag
    player = {}
    for (label, column) in zip(labels, only_td): # loop through each column of a row
        player[label] = column.text
    all_stats.append(player)

In [303]:
# put them all in the data frame
stats_df = pd.DataFrame(all_stats)
stats_df

Unnamed: 0,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,FG%,...,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
0,Precious Achiuwa,PF,21,MIA,61,4,737,124,228,.544,...,.509,73,135,208,29,20,28,43,91,304
1,Jaylen Adams,PG,24,MIL,7,0,18,1,8,.125,...,,0,3,3,2,0,0,0,1,2
2,Steven Adams,C,27,NOP,58,58,1605,189,308,.614,...,.444,213,301,514,111,54,38,78,113,438
3,Bam Adebayo,C,23,MIA,64,64,2143,456,800,.570,...,.799,142,431,573,346,75,66,169,145,1197
4,LaMarcus Aldridge,C,35,TOT,26,23,674,140,296,.473,...,.872,19,99,118,49,11,29,27,47,352
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
700,Delon Wright,PG,28,SAC,27,8,696,104,225,.462,...,.833,28,77,105,97,43,11,35,30,271
701,Thaddeus Young,PF,32,CHI,68,23,1652,370,662,.559,...,.628,168,255,423,291,74,40,137,152,823
702,Trae Young,PG,22,ATL,63,63,2125,487,1112,.438,...,.886,38,207,245,594,53,12,261,111,1594
703,Cody Zeller,C,28,CHO,48,21,1005,181,324,.559,...,.714,119,209,328,86,27,17,51,121,451


Now we need to get the salary of each player 

Think of a way to get salary for each player for 2020-2021 season. 
- Note: I kept searching on the internet to figure out if the performance of each player is reflected right away towards the salary of that season or if it will be a factor of deciding the amount of salary for the next season. However, I could not find clear answer for this question, so this project will be focusing on if the salary given for that season is reasonable based on the performance of that season. Thus, for now, I will use the salary for 2020-2021 season as the stats are from that season.

In [304]:
#re = requests.get('https://www.basketball-reference.com/players/a/adamsja01.html')
#sou = BeautifulSoup(re.text, 'lxml')

In [305]:
#sou.find_all('div', {'id':'all_faq'})

In [306]:
#div = sou.find_all('div', {'id':'all_all_salaries'})
#div

In [307]:

#for player in soup.select('.full_table, .italic_text.partial_table'):
 #   url_player = 'https://www.basketball-reference.com{}' #base url for players
  #  a_tags = player.select('a')
   # for a_tag in a_tags: # there might be 2 a-tags, one for player and one for team. Only need player a-tag
     #   if len(a_tag.text) > 4:  # if player's name is longer than 4 letters
      #      url_player = url_player.format(a_tag['href'])
   # player_res = requests.get(url_player)
   # player_soup = BeautifulSoup(player_res.text, 'lxml')
    
    

There was a problem with HTML code for the website so could not grab salary information from basketball reference. Therefore, I deicided to use a different website called [HOOPSHYPE](https://hoopshype.com/salaries/players/2020-2021/) to get the salary information for each player.

In [308]:
result2 = requests.get('https://hoopshype.com/salaries/players/2020-2021/')
soup2 = BeautifulSoup(result2.text, 'lxml')

In [309]:
for player in soup2.select('tr')[1:]:
    print(player.find_all('td', {'style':'color:black'})[0].text)
    break


							$43,006,362						


In [310]:
players_salary = {}
for player in soup2.select('tr')[1:]:
    
    player_name = player.select('a')[0].text.strip()
    
    salary = player.find_all('td', {'style':'color:black'})[0].text.strip()
    
    players_salary[player_name] = salary

In [311]:
players_salary

{'Stephen Curry': '$43,006,362',
 'Chris Paul': '$41,358,814',
 'Russell Westbrook': '$41,358,814',
 'James Harden': '$41,254,920',
 'John Wall': '$41,254,920',
 'Kevin Durant': '$40,108,950',
 'LeBron James': '$39,219,566',
 'Paul George': '$35,450,412',
 'Klay Thompson': '$35,361,360',
 'Mike Conley': '$34,502,132',
 'Kemba Walker': '$34,379,100',
 'Kawhi Leonard': '$34,379,100',
 'Jimmy Butler': '$34,379,100',
 'Tobias Harris': '$34,358,850',
 'Blake Griffin': '$33,900,241',
 'Kyrie Irving': '$33,722,850',
 'Khris Middleton': '$33,051,724',
 'Anthony Davis': '$32,742,000',
 'Damian Lillard': '$31,626,953',
 'Kevin Love': '$31,258,256',
 'Ben Simmons': '$30,559,200',
 'Pascal Siakam': '$30,559,200',
 'Kyle Lowry': '$30,500,000',
 'Steven Adams': '$29,592,695',
 'Nikola Jokic': '$29,542,010',
 'Andrew Wiggins': '$29,542,010',
 'Joel Embiid': '$29,542,010',
 'Kristaps Porzingis': '$29,467,800',
 'Karl-Anthony Towns': '$29,467,800',
 'Devin Booker': '$29,467,800',
 'CJ McCollum': '$29,3

In [312]:
player_list = list(stats_df['Player'])
player_list

['Precious Achiuwa',
 'Jaylen Adams',
 'Steven Adams',
 'Bam Adebayo',
 'LaMarcus Aldridge',
 'LaMarcus Aldridge',
 'LaMarcus Aldridge',
 'Ty-Shon Alexander',
 'Nickeil Alexander-Walker',
 'Grayson Allen',
 'Jarrett Allen',
 'Jarrett Allen',
 'Jarrett Allen',
 'Al-Farouq Aminu',
 'Al-Farouq Aminu',
 'Al-Farouq Aminu',
 'Kyle Anderson',
 'Giannis Antetokounmpo',
 'Kostas Antetokounmpo',
 'Thanasis Antetokounmpo',
 'Carmelo Anthony',
 'Cole Anthony',
 'OG Anunoby',
 'Ryan Arcidiacono',
 'Trevor Ariza',
 'D.J. Augustin',
 'D.J. Augustin',
 'D.J. Augustin',
 'Deni Avdija',
 'Deandre Ayton',
 'Udoka Azubuike',
 'Dwayne Bacon',
 'Marvin Bagley III',
 'LaMelo Ball',
 'Lonzo Ball',
 'Mo Bamba',
 'Desmond Bane',
 'Harrison Barnes',
 'RJ Barrett',
 'Will Barton',
 'Keita Bates-Diop',
 'Nicolas Batum',
 'Aron Baynes',
 'Kent Bazemore',
 'Darius Bazley',
 'Bradley Beal',
 'Malik Beasley',
 'Jordan Bell',
 'Jordan Bell',
 'Jordan Bell',
 "DeAndre' Bembry",
 'Dāvis Bertāns',
 'Patrick Beverley',
 'S

In [318]:
for player in all_stats:
    if player['Player'] in players_salary:
        player['Salary'] = players_salary[player['Player']]
    else:
        player["Salary"] = 'NA'

In [319]:
all_stats_salary

[{'Player': 'Precious Achiuwa',
  'Pos': 'PF',
  'Age': '21',
  'Tm': 'MIA',
  'G': '61',
  'GS': '4',
  'MP': '737',
  'FG': '124',
  'FGA': '228',
  'FG%': '.544',
  '3P': '0',
  '3PA': '1',
  '3P%': '.000',
  '2P': '124',
  '2PA': '227',
  '2P%': '.546',
  'eFG%': '.544',
  'FT': '56',
  'FTA': '110',
  'FT%': '.509',
  'ORB': '73',
  'DRB': '135',
  'TRB': '208',
  'AST': '29',
  'STL': '20',
  'BLK': '28',
  'TOV': '43',
  'PF': '91',
  'PTS': '304',
  'Salary': '$2,582,160'},
 {'Player': 'Jaylen Adams',
  'Pos': 'PG',
  'Age': '24',
  'Tm': 'MIL',
  'G': '7',
  'GS': '0',
  'MP': '18',
  'FG': '1',
  'FGA': '8',
  'FG%': '.125',
  '3P': '0',
  '3PA': '2',
  '3P%': '.000',
  '2P': '1',
  '2PA': '6',
  '2P%': '.167',
  'eFG%': '.125',
  'FT': '0',
  'FTA': '0',
  'FT%': '',
  'ORB': '0',
  'DRB': '3',
  'TRB': '3',
  'AST': '2',
  'STL': '0',
  'BLK': '0',
  'TOV': '0',
  'PF': '1',
  'PTS': '2',
  'Salary': '$449,115'},
 {'Player': 'Steven Adams',
  'Pos': 'C',
  'Age': '27',
  'T

In [320]:
stats_salary_df = pd.DataFrame(all_stats)
stats_salary_df

Unnamed: 0,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,FG%,...,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,Salary
0,Precious Achiuwa,PF,21,MIA,61,4,737,124,228,.544,...,73,135,208,29,20,28,43,91,304,"$2,582,160"
1,Jaylen Adams,PG,24,MIL,7,0,18,1,8,.125,...,0,3,3,2,0,0,0,1,2,"$449,115"
2,Steven Adams,C,27,NOP,58,58,1605,189,308,.614,...,213,301,514,111,54,38,78,113,438,"$29,592,695"
3,Bam Adebayo,C,23,MIA,64,64,2143,456,800,.570,...,142,431,573,346,75,66,169,145,1197,"$5,115,492"
4,LaMarcus Aldridge,C,35,TOT,26,23,674,140,296,.473,...,19,99,118,49,11,29,27,47,352,"$17,628,340"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
700,Delon Wright,PG,28,SAC,27,8,696,104,225,.462,...,28,77,105,97,43,11,35,30,271,"$9,000,000"
701,Thaddeus Young,PF,32,CHI,68,23,1652,370,662,.559,...,168,255,423,291,74,40,137,152,823,"$13,545,000"
702,Trae Young,PG,22,ATL,63,63,2125,487,1112,.438,...,38,207,245,594,53,12,261,111,1594,"$6,571,800"
703,Cody Zeller,C,28,CHO,48,21,1005,181,324,.559,...,119,209,328,86,27,17,51,121,451,"$15,415,730"


In [321]:
stats_salary_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 705 entries, 0 to 704
Data columns (total 30 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Player  705 non-null    object
 1   Pos     705 non-null    object
 2   Age     705 non-null    object
 3   Tm      705 non-null    object
 4   G       705 non-null    object
 5   GS      705 non-null    object
 6   MP      705 non-null    object
 7   FG      705 non-null    object
 8   FGA     705 non-null    object
 9   FG%     705 non-null    object
 10  3P      705 non-null    object
 11  3PA     705 non-null    object
 12  3P%     705 non-null    object
 13  2P      705 non-null    object
 14  2PA     705 non-null    object
 15  2P%     705 non-null    object
 16  eFG%    705 non-null    object
 17  FT      705 non-null    object
 18  FTA     705 non-null    object
 19  FT%     705 non-null    object
 20  ORB     705 non-null    object
 21  DRB     705 non-null    object
 22  TRB     705 non-null    ob

In [322]:
stats_salary_df.head(20)

Unnamed: 0,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,FG%,...,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,Salary
0,Precious Achiuwa,PF,21,MIA,61,4,737,124,228,0.544,...,73,135,208,29,20,28,43,91,304,"$2,582,160"
1,Jaylen Adams,PG,24,MIL,7,0,18,1,8,0.125,...,0,3,3,2,0,0,0,1,2,"$449,115"
2,Steven Adams,C,27,NOP,58,58,1605,189,308,0.614,...,213,301,514,111,54,38,78,113,438,"$29,592,695"
3,Bam Adebayo,C,23,MIA,64,64,2143,456,800,0.57,...,142,431,573,346,75,66,169,145,1197,"$5,115,492"
4,LaMarcus Aldridge,C,35,TOT,26,23,674,140,296,0.473,...,19,99,118,49,11,29,27,47,352,"$17,628,340"
5,LaMarcus Aldridge,C,35,SAS,21,18,544,115,248,0.464,...,17,77,94,36,8,18,20,36,288,"$17,628,340"
6,LaMarcus Aldridge,C,35,BRK,5,5,130,25,48,0.521,...,2,22,24,13,3,11,7,11,64,"$17,628,340"
7,Ty-Shon Alexander,SG,22,PHO,15,0,47,3,12,0.25,...,2,8,10,6,0,1,3,2,9,"$449,115"
8,Nickeil Alexander-Walker,SG,22,NOP,46,13,1007,192,458,0.419,...,13,131,144,102,47,22,69,88,508,"$3,113,160"
9,Grayson Allen,SG,25,MEM,50,38,1259,173,414,0.418,...,19,141,160,108,46,8,48,71,532,"$2,545,320"
