## NBA League Statistics

In [1]:
import requests
from bs4 import BeautifulSoup
import csv
import pandas as pd

In [2]:
#a list of URLs for each player in the NBA, and we want to scrape the per game stats for each player
#we want to create a function that takes in a URL and returns a dataframe of the per game stats for that player
def get_per_season_stats_e(link):    
    res = requests.get(link)
    #the code that could potentially have an error is put in a try clause; the program execution moves to the start of a following except clause if an error happens
    try:
        res.raise_for_status()
    except Exception as exc:
        print('There was a problem: %s' % (exc))
    soup = BeautifulSoup(res.content, 'html.parser')
    per_season_stats = soup.find('div', class_ = 'table_wrapper', id = 'all_confs_standings_E')
        #soup.find('table', class_ = 'stats_table', id = 'per_game')
        #find_all() method returns a list of Tag objects, which is how Beautiful Soup represents an HTML element
        #iterate through all teams and grab their stats
    per_season_stats_by_team = per_season_stats.find_all('tr', class_ = 'full_table')
    team_name = []
    wins = []
    losses = []
    win_loss_pct = []
    for i in per_season_stats_by_team:
        #grab team name and stats for each team
        team_name.append(i.find_all('th', class_ = 'left')[0].find('a').text)
        wins.append(i.find_all('td')[0].text)
        losses.append(i.find_all('td')[1].text)
        win_loss_pct.append(i.find_all('td')[2].text)  
    #grab season number
    season_number = soup.find('h1').find('span').text    
    #grab conference
    conference = soup.find('div', class_ = 'table_wrapper', id = "all_confs_standings_E").find('th').text
    #grab league winner
    league_champions = soup.find('div', id = 'meta').find_all('div')[1].find('p').find('a').text
    #create dataframe
    df = pd.DataFrame(zip(team_name, wins, losses, win_loss_pct), columns = ['team_name', 'wins', 'losses', 'win_loss_pct'])
    #add season number column
    df['season_number'] = season_number
    #add conference column
    df['conference'] = conference
    #add league champion column
    df['league_champions'] = league_champions
    #append western conference dataframe to eastern conference dataframe    
    return df

In [3]:
#a list of URLs for each player in the NBA, and we want to scrape the per game stats for each player
#we want to create a function that takes in a URL and returns a dataframe of the per game stats for that player
def get_per_season_stats_w(link):    
    res = requests.get(link)
    #the code that could potentially have an error is put in a try clause; the program execution moves to the start of a following except clause if an error happens
    try:
        res.raise_for_status()
    except Exception as exc:
        print('There was a problem: %s' % (exc))
    soup = BeautifulSoup(res.content, 'html.parser')
    per_season_stats = soup.find('div', class_ = 'table_wrapper', id = 'all_confs_standings_W')
        #soup.find('table', class_ = 'stats_table', id = 'per_game')
        #find_all() method returns a list of Tag objects, which is how Beautiful Soup represents an HTML element
        #iterate through all teams and grab their stats
    per_season_stats_by_team = per_season_stats.find_all('tr', class_ = 'full_table')
    team_name = []
    wins = []
    losses = []
    win_loss_pct = []
    for i in per_season_stats_by_team:
        #grab team name and stats for each team
        team_name.append(i.find_all('th', class_ = 'left')[0].find('a').text)
        wins.append(i.find_all('td')[0].text)
        losses.append(i.find_all('td')[1].text)
        win_loss_pct.append(i.find_all('td')[2].text)  
    #grab season number
    season_number = soup.find('h1').find('span').text    
    #grab conference
    conference = soup.find('div', class_ = 'table_wrapper', id = "all_confs_standings_W").find('th').text
    #grab league winner
    league_champions = soup.find('div', id = 'meta').find_all('div')[1].find('p').find('a').text
    #create dataframe
    df = pd.DataFrame(zip(team_name, wins, losses, win_loss_pct), columns = ['team_name', 'wins', 'losses', 'win_loss_pct'])
    #add season number column
    df['season_number'] = season_number
    #add conference column
    df['conference'] = conference
    #add league champion column
    df['league_champions'] = league_champions
    #append western conference dataframe to eastern conference dataframe    
    return df

In [4]:
#create function to grab stats from multiple URLs
def get_per_season_stats_multiple_links(links):
    df_list = []
    for i in links:
        df_list.append(get_per_season_stats_e(i))
        df_list.append(get_per_season_stats_w(i))
    df = pd.concat(df_list)
    return df

In [7]:
df = get_per_season_stats_multiple_links(['https://www.basketball-reference.com/leagues/NBA_2023.html'
                                    ,'https://www.basketball-reference.com/leagues/NBA_2022.html'
                                    ,'https://www.basketball-reference.com/leagues/NBA_2021.html'
                                    ,'https://www.basketball-reference.com/leagues/NBA_2020.html'
                                    ,'https://www.basketball-reference.com/leagues/NBA_2019.html'])

In [8]:
df.to_csv('nba_season_stats.csv', index = False)

Off to Looker Studio!