# Libraries / Shortcuts

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from nba_api.stats.endpoints import commonteamroster
import json
import utils

seasons = [
    '2000-01', '2001-02', '2002-03', '2003-04', '2004-05', '2005-06', '2006-07', 
    '2007-08', '2008-09', '2009-10', '2010-11', '2011-12', '2012-13', '2013-14', 
    '2014-15', '2015-16', '2016-17', '2017-18', '2018-19', '2019-20', '2020-21'
]

# Load Data

In [2]:
# Get team IDs
from nba_api.stats.static import teams
# get_teams returns a list of 30 dictionaries, each an NBA team.
nba_teams_info = teams.get_teams()

# Build dictionary with team IDs as keys and name of team as values
nba_teams_names = {}
for team in nba_teams_info:
    nba_teams_names[team['id']] = team['full_name']

In [34]:
# https://github.com/swar/nba_api/blob/master/docs/nba_api/stats/endpoints/commonteamroster.md
rosters_data = []
season = seasons[0]

for i, team_id in enumerate(list(nba_teams_names.keys())):
    df_roster = commonteamroster.CommonTeamRoster(
        season=season,
        team_id=team_id
    ).get_data_frames()[0]

    # Drop unnecessary columns
    df_roster.drop(columns=['LeagueID', 'NICKNAME', 'PLAYER_SLUG', 'NUM'], inplace=True)

    # Renaming
    df_roster.rename(
        columns={'TeamID': 'TEAM ID', 'PLAYER_ID': 'PLAYER ID'},
        inplace=True
    )

    # Creating columns
    df_roster['SEASON'] = season
    df_roster['TEAM'] = nba_teams_names[team_id]

    rosters_data.append(df_roster)

    # Print progress once in a while
    if (i+1) % 10 == 0:
        print('Season {} - {} teams processed'.format(season, i+1))

all_rosters = pd.concat(rosters_data, ignore_index=True)

# Change units
all_rosters['WEIGHT'] = all_rosters['WEIGHT'].astype(float) / 2.20462
all_rosters[['HEIGHT (FEET)', 'HEIGHT (INCHES)']] = all_rosters['HEIGHT'].str.split('-', expand=True).astype(float)
# 1 inch is 2.54 cm, 12 inches per foot
all_rosters['HEIGHT'] = 2.54 * (all_rosters['HEIGHT (INCHES)'] + (12*all_rosters['HEIGHT (FEET)']))

# Order columns
new_columns = list(all_rosters.columns[:2][::-1]) + list(all_rosters.columns[-2:][::-1]) + list(all_rosters.columns[2:-2])
all_rosters = all_rosters[new_columns]

all_rosters.sort_values(
    by=['SEASON', 'TEAM ID'],
    inplace=True
)

all_rosters.to_csv('Rosters_{}.csv'.format(season))

Season 2000-01 - 10 teams processed


ReadTimeout: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)