In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import re

In [2]:
BOS_URL = (f'https://www.basketball-reference.com/teams/BOS/2022.html')

# The requests library can send a GET request to the wiz_url
BOS_RES = requests.get(BOS_URL)

In [3]:
# BeautifulSoup library parses the content of an HTML document, in this case BOS_RES
BOS_SOUP = BeautifulSoup(BOS_RES.content, 'lxml')

# BeautifulSoup's .find() method searches for a tag and specified attributes, 
# returning the first match 
BOS_PER_GAME = BOS_SOUP.find(name = 'table', attrs = {'id' : 'per_game'})

In [4]:
# Creating a list of dictionaries to then convert into a Pandas Dataframe
BOS_STATS = []

for row in BOS_PER_GAME.find_all('tr')[1:]:  # Excluding the first 'tr', since that's the table's title head

    player = {}
    player['Name'] = row.find('a').text.strip()
    player['Age'] = row.find('td', {'data-stat' : 'age'}).text
    player['Min PG'] = row.find('td', {'data-stat' : 'mp_per_g'}).text
    player['Field Goal %'] = row.find('td', {'data-stat' : 'fg_pct'}).text
    player['Rebounds PG'] = row.find('td', {'data-stat' : 'trb_per_g'}).text
    player['Assists PG'] = row.find('td', {'data-stat' : 'ast_per_g'}).text
    player['Steals PG'] = row.find('td', {'data-stat' : 'stl_per_g'}).text
    player['Blocks PG'] = row.find('td', {'data-stat' : 'blk_per_g'}).text
    player['Turnovers PG'] = row.find('td', {'data-stat' : 'tov_per_g'}).text
    player['Points PG'] = row.find('td', {'data-stat' : 'pts_per_g'}).text
    BOS_STATS.append(player)

pd.DataFrame(BOS_STATS)

Unnamed: 0,Name,Age,Min PG,Field Goal %,Rebounds PG,Assists PG,Steals PG,Blocks PG,Turnovers PG,Points PG
0,Jayson Tatum,23,35.9,0.453,8.0,4.4,1.0,0.6,2.9,26.9
1,Jaylen Brown,25,33.6,0.473,6.1,3.5,1.1,0.3,2.7,23.6
2,Marcus Smart,27,32.3,0.418,3.8,5.9,1.7,0.3,2.2,12.1
3,Robert Williams,24,29.6,0.736,9.6,2.0,0.9,2.2,1.0,10.0
4,Dennis Schröder,28,29.2,0.44,3.3,4.2,0.8,0.1,2.1,14.4
5,Al Horford,35,29.1,0.467,7.7,3.4,0.7,1.3,0.9,10.2
6,Derrick White,27,27.4,0.409,3.4,3.5,0.6,0.6,1.2,11.0
7,Josh Richardson,28,24.7,0.443,2.8,1.5,0.8,0.5,0.9,9.7
8,Grant Williams,23,24.4,0.475,3.6,1.0,0.5,0.7,0.8,7.8
9,Daniel Theis,29,18.7,0.598,4.7,1.0,0.4,0.7,0.7,7.9


In [6]:
height_weight_position = []

for row in BOS_PER_GAME.find_all('tr')[1:]:
    
    player = {}
    
    # Parsing html data from each player's specific web page
    player_url = ('https://www.basketball-reference.com/' + row.find('a').attrs['href'])
    player_rest = requests.get(player_url)
    player_soup = BeautifulSoup(player_rest.content, 'lxml')
    player_info = player_soup.find(name = 'div', attrs = {'itemtype' : 'https://schema.org/Person'})
    
    # Adding name for clarity
    player['Name'] = row.find('a').text.strip()
    
    # Using RegEx to extract height, weight, and position from each player's web profile.
    # The '(.*)' regex notation allows the extraction of text from in between two known substrings,
    # which is the text written on either side of '(.*)' in the below code. 
    s = str(player_info.find_all('p'))
    weight = re.search('\"weight\">(.*)lb</span>', s)
    position = re.search('Position:\n  </strong>\n (.*)\n\n', s)
    height = re.search('\"height\">(.*)</span>,\xa0<span itemprop="weight', s)
    player['Height'] = height.group(1).strip()
    player['Weight (Lbs)'] = weight.group(1).strip()
    player['Position'] = position.group(1).strip()

    height_weight_position.append(player)
        
pd.DataFrame(height_weight_position)

AttributeError: 'NoneType' object has no attribute 'find_all'