In [1]:
import pandas as pd
import numpy as np

from bs4 import BeautifulSoup
import requests

from collections import defaultdict
import re

seasons = np.array(range(1991, 2020))
base = "https://www.basketball-reference.com/leagues/NBA_{}_standings.html"
webpage = 'https://en.wikipedia.org/wiki/Wikipedia:WikiProject_National_Basketball_Association/National_Basketball_Association_team_abbreviations'
season_stats = '/Users/pranav/nba_allNBA_predictor/season_statistics/{}_stats.csv'

In [2]:
def getDF(year):
    url = base.format(year)
    DFs = pd.read_html(url)
    for df in DFs:
        df.rename(columns={"Eastern Conference" : "Team", "Western Conference" : "Team"}, inplace=True)
    df = pd.concat(DFs)
    df = df[~df.Team.str.contains('Division')]
    df['Team'] = df['Team'].str.replace('[^a-zA-Z0-7/ ]', '')
    df = df.drop_duplicates(subset="Team", keep="first")
    df.index = np.array(range(1, len(df.index)+1))
    return df

In [4]:
def getAbbreviations(url):
    d = {}
    
    html_content = requests.get(url).text
    soup = BeautifulSoup(html_content, "lxml")
    
    abb = soup.find("table")
    
    contents = abb.tbody.find_all("td")[2:]
    contents = [re.sub("<[^>]*>|\n", "", str(content)) for content in contents]
    
    for abb, team in zip(contents[::2], contents[1::2]):
        d[team] = abb
        
    return d

In [5]:
d = getAbbreviations(webpage)

In [6]:
def replaceNames(dataframe, d):
    teams = set(d.keys())
    ###abb = set(d.values())
    for season in seasons:
        df = pd.read_csv(season_stats.format(season))
        df['Tm'] = df['Tm'].str.replace('CHH', 'CHA')
        df['Tm'] = df['Tm'].str.replace('CHO', 'CHA')
        df['Tm'] = df['Tm'].str.replace('PHO', 'PHX')
        df['Tm'] = df['Tm'].str.replace('BRK', 'BKN')
        teams.update(set(dataframe.Team))
        ###abb.update(set(df.Tm))
        
    for team in teams:
        d[team] = d.get(team, '')

    d['New Jersey Nets'] = "NJN"
    d['Seattle SuperSonics'] = "SEA"
    d['Vancouver Grizzlies'] = "VAN"
    d['Charlotte Bobcats'] = "CHA"
    d['Washington Bullets'] = "WSB"
    d['New Orleans/Oklahoma City Hornets'] = "NOK"
    d['New Orleans Hornets'] = "NOH"
    
    dataframe['Team'] = dataframe['Team'].replace(d)
    dataframe['W/L%'] = [round(x, 3) for x in dataframe['W/L%'].astype(float)]
    
    return dataframe

In [7]:
def createCSVs():
    d = getAbbreviations(webpage)
    savePath = '/Users/pranav/nba_allNBA_predictor/team_records/'
    for season in seasons:
        df = getDF(season)
        df = replaceNames(df, d)
        df.to_csv(savePath + str(season) + '_teamRecords.csv', index=False)

In [8]:
createCSVs()