In [3]:
from urllib.request import urlopen
from bs4 import BeautifulSoup
import pandas as pd

from ipynb.fs.full.UtilCollections import TEAM_TO_ABBR
from ipynb.fs.full.UtilFunctions import format_season

In [8]:
def get_team_stats(seasons, playoffs=False):
    selector = "div_per_game-team"
    regular_or_playoffs="leagues"
    team_column = "Team"
    
    if playoffs:
        regular_or_playoffs = "playoffs"
        team_column = "Tm"
        
    for season in seasons:
        #Reads the html data and turns it into a DataFrame
        url = f'https://widgets.sports-reference.com/wg.fcgi?css=1&site=bbr&url=%2F{regular_or_playoffs}%2FNBA_{season}.html&div={selector}'
        html = urlopen(url)
        soup = BeautifulSoup(html)
        table = soup.find('table')
        df = pd.read_html(str(table))[0]
        
        #Eliminates league abverage row
        league_avg_index = df[df[team_column] == 'League Average'].index[0]
        df = df[:league_avg_index]

        #Marks wether teams made the playoffs or not
        df["Made_Playoffs"] = False
        
        for team in df[team_column]:
            if '*' in team:
                df.loc[df[team_column] == team, ["Made_Playoffs"]] = True
                
        df[team_column] = df[team_column].apply(lambda x: x.replace('*', '').upper())
        df[team_column] = df[team_column].apply(lambda x: TEAM_TO_ABBR[x])
        df['Rk'] = df['Rk'].apply(lambda x: int(x))
        df = df.set_index('Rk')
        
        first_year, second_year = format_season(season)

        csv_file_name = "DataCollection/Team_Stats/team_stats_{0}-{1}.csv".format(first_year, second_year)
        if playoffs:
            csv_file_name = "DataCollection/Team_Stats_Playoffs/team_stats_playoffs{0}-{1}.csv".format(first_year, second_year)

        df.to_csv(csv_file_name, index=False)

In [5]:
def get_opponent_stats(seasons, playoffs=False):
    selector = "div_per_game-opponent"
    regular_or_playoffs="leagues"
    team_column = "Team"
    
    if playoffs:
        regular_or_playoffs = "playoffs"
        team_column = "Tm"
        
    for season in seasons:
        url = f'https://widgets.sports-reference.com/wg.fcgi?css=1&site=bbr&url=%2F{regular_or_playoffs}%2FNBA_{season}.html&div={selector}'        
        soup = BeautifulSoup(urlopen(url))
        table = soup.find('table')
        df = pd.read_html(str(table))[0]

        league_avg_index = df[df[team_column] == 'League Average'].index[0]
        df = df[:league_avg_index]
        
        df["Made_Playoffs"] = False
        for team in df[team_column]:
            if '*' in team:
                df.loc[df[team_column] == team, ["Made_Playoffs"]] = True
                
        df[team_column] = df[team_column].apply(lambda x: x.replace('*', '').upper())
        df[team_column] = df[team_column].apply(lambda x: TEAM_TO_ABBR[x])
        df['Rk'] = df['Rk'].apply(lambda x: int(x))
        df = df.set_index('Rk')
        
        static_columns = [team_column, "G", "MP"]
        opp_columns = static_columns
        for column in df.columns:
            if column not in static_columns:
                column = "OPP_"+column
                opp_columns.append(column)
                
        df.columns = opp_columns
    

        first_year, second_year = format_season(season)

        csv_file_name = "DataCollection/Opponent_Stats/opponent_stats_{0}-{1}.csv".format(first_year, second_year)
        if playoffs:
            csv_file_name = "DataCollection/Opponent_Stats_Playoffs/opponent_stats_playoffS_{0}-{1}.csv".format(first_year, second_year)

        df.to_csv(csv_file_name, index=False)

In [6]:
def get_advanced_team_stats(seasons, playoffs=False):
    selector = "div_advanced-team"
    regular_or_playoffs="leagues"
    team_column = "Team"
    
    if playoffs:
        regular_or_playoffs = "playoffs"
        team_column = "Tm"
            
    for season in seasons:
        url = f'https://widgets.sports-reference.com/wg.fcgi?css=1&site=bbr&url=%2F{regular_or_playoffs}%2FNBA_{season}.html&div={selector}'
        soup = BeautifulSoup(urlopen(html))
        table = soup.find('table')
        df = pd.read_html(str(table))[0]

        df.columns = list(map(lambda x: x[1], list(df.columns)))
        league_avg_index = df[df[team_column] == 'League Average'].index[0]
        df = df[:league_avg_index]

        df["Made_Playoffs"] = False
        for team in df[team_column]:
            if '*' in team:
                df.loc[df[team_column] == team, ["Made_Playoffs"]] = True

        df[team_column] = df[team_column].apply(lambda x: x.replace('*', '').upper())
        df[team_column] = df[team_column].apply(lambda x: TEAM_TO_ABBR[x])
        df['Rk'] = df['Rk'].apply(lambda x: int(x))
        df = df.set_index('Rk')
        df = df.loc[:, ~df.columns.str.contains("Unnamed")]

        columns = list(df.columns)
        if not playoffs:
            for i in range(16, 24):
                column = columns[i]
                if i < 20:
                    columns[i] = "Off_"+column
                else:
                    columns[i] = "Def_"+column
        else:
            for i in range(14, 22):
                column = columns[i]
                if i < 18:
                    columns[i] = "Off_"+column
                else:
                    columns[i] = "Def_"+column
        df.columns = columns

        first_year, second_year = format_season(season)

        csv_file_name = "DataCollection/Advanced_Team_Stats/adv_team_stats_{0}-{1}.csv".format(first_year, second_year)
        if playoffs:
             csv_file_name = "DataCollection/Advanced_Team_Stats_Playoffs/adv_team_stats_playoffs_{0}-{1}.csv".format(first_year, second_year)
        
        df.to_csv(csv_file_name, index=False)

In [7]:
seasons = range(2001, 2023)

get_team_stats(seasons)

ValueError: No tables found

In [9]:
get_team_stats(seasons, True)

ValueError: No tables found

In [None]:
get_opponent_stats(seasons)

In [10]:
get_opponent_stats(seasons, True)

In [11]:
seasons = range(2001, 2023)

get_advanced_team_stats(seasons)

In [12]:
get_advanced_team_stats(seasons, True)