In [1]:
from selenium.webdriver import Firefox
from selenium.common.exceptions import TimeoutException
from bs4 import BeautifulSoup as bs
import pandas as pd
import time

In [2]:
team_names = ['ATL', 'BOS', 'BRK', 'CHO', 'CHI', 'CLE', 'DAL', 'DEN',
         'DET', 'GSW', 'HOU', 'IND', 'LAC', 'LAL', 'MEM', 'MIA',
         'MIL', 'MIN', 'NOP', 'NYK', 'OKC', 'ORL', 'PHI', 'PHO',
         'POR', 'SAC', 'SAS', 'TOR', 'UTA', 'WAS']

In [3]:
def get_team_stats(start, url):
    data = {start+'team': f'{url[-13:-10]}'}
        
    html = None
    for i in range(3):
        try:
            driver.get(url)

            html = driver.page_source
            soup = bs(html, 'lxml')
            
        except TimeoutException:
            print(f'{i}: TimeoutError on {url[-11:-14]}')
            continue
        else:
            break

    if html:
        rows = soup.find('table', {'id': 'team_misc'}) \
                   .find('tbody').find_all('tr')
        
        for row in rows:
            if row.find('th', {'data-stat': 'player'}).text == 'Team':
                name = 'Team_'
            else:
                name = 'LgRank_'
                
            data[start+name+'W'] = int(row.find('td', {'data-stat': 'wins'}).text)
            data[start+name+'L'] = int(row.find('td', {'data-stat': 'losses'}).text)
            data[start+name+'MOV'] = float(row.find('td', {'data-stat': 'mov'}).text)
            data[start+name+'SRS'] = float(row.find('td', {'data-stat': 'srs'}).text)
            data[start+name+'ORtg'] = float(row.find('td', {'data-stat': 'off_rtg'}).text)
            data[start+name+'DRtg'] = float(row.find('td', {'data-stat': 'def_rtg'}).text)
            data[start+name+'FTr'] = float(row.find('td', {'data-stat': 'fta_per_fga_pct'}).text)
            data[start+name+'eFG%'] = float(row.find('td', {'data-stat': 'efg_pct'}).text)
            data[start+name+'TOV%'] = float(row.find('td', {'data-stat': 'tov_pct'}).text)
            data[start+name+'ORB%'] = float(row.find('td', {'data-stat': 'orb_pct'}).text)
            data[start+name+'FT/FGA'] = float(row.find('td', {'data-stat': 'ft_rate'}).text)
            data[start+name+'opp_eFG%'] = float(row.find('td', {'data-stat': 'opp_efg_pct'}).text)
            data[start+name+'opp_TOV%'] = float(row.find('td', {'data-stat': 'opp_tov_pct'}).text)
            data[start+name+'DRB%'] = float(row.find('td', {'data-stat': 'drb_pct'}).text)
            data[start+name+'opp_FT/FGA'] = float(row.find('td', {'data-stat': 'opp_ft_rate'}).text)
    
    return data

In [4]:
local_teams = []
visit_teams = []
driver = Firefox()
for idx, team in enumerate(team_names):
    print(f'{idx+1}: {team}')
    
    url = f'https://www.basketball-reference.com/teams/{team}/2023.html'
    
    local_team = get_team_stats('local_', url)
    visit_team = get_team_stats('visit_', url)
    
    local_teams.append(local_team)
    visit_teams.append(visit_team)
driver.close()

1: ATL
2: BOS
3: BRK
4: CHO
5: CHI
6: CLE
7: DAL
8: DEN
9: DET
10: GSW
11: HOU
12: IND
13: LAC
14: LAL
15: MEM
16: MIA
17: MIL
18: MIN
19: NOP
20: NYK
21: OKC
22: ORL
23: PHI
24: PHO
25: POR
26: SAC
27: SAS
28: TOR
29: UTA
30: WAS


In [5]:
local_df = pd.DataFrame(local_teams)
visit_df = pd.DataFrame(visit_teams)

local_df.to_csv('local_teams.csv', index=0)
visit_df.to_csv('visit_teams.csv', index=0)