# Libraries / Shortcuts

In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import time
from nba_api.stats.endpoints.leaguedashplayerbiostats import LeagueDashPlayerBioStats
from nba_api.stats.endpoints.commonteamroster import CommonTeamRoster
from nba_api.stats.endpoints.draftcombinedrillresults import DraftCombineDrillResults
from nba_api.stats.endpoints.draftcombineplayeranthro import DraftCombinePlayerAnthro

seasons = [
    '2000-01', '2001-02', '2002-03', '2003-04', '2004-05', '2005-06', '2006-07', 
    '2007-08', '2008-09', '2009-10', '2010-11', '2011-12', '2012-13', '2013-14', 
    '2014-15', '2015-16', '2016-17', '2017-18', '2018-19', '2019-20', '2020-21'
]

# Load Data

## LeagueDashPlayerBioStats

In [50]:
# # https://github.com/swar/nba_api/blob/master/docs/nba_api/stats/endpoints/commonteamroster.md
# player_bios = []

# for i, season in enumerate(seasons):
#     df_player_bios = LeagueDashPlayerBioStats(season=season).get_data_frames()[0]

#     # Drop unnecessary columns
#     df_player_bios.drop(
#         columns=['PLAYER_HEIGHT'] + list(df_player_bios.columns[-10:]),
#         inplace=True
#     )

#     # Remove underscore in column names, use space instead
#     df_player_bios.columns = [col.replace('_', ' ') for col in df_player_bios.columns]

#     # Creating columns
#     df_player_bios['SEASON'] = season

#     # Print and save progress
#     print('Season {} processed'.format(season))
#     df_player_bios.to_csv('./Data/PlayerBios/PlayerBios_{}.csv'.format(season))

#     player_bios.append(df_player_bios)


# all_player_bios = pd.concat(player_bios, ignore_index=True)

# # Change units
# # Pounds to kilograms
# all_player_bios['PLAYER WEIGHT'] /= 2.20462
# # 1 inch is 2.54 cm, 12 inches per foot
# all_player_bios['PLAYER HEIGHT INCHES'] = 2.54 * all_player_bios['PLAYER HEIGHT INCHES'].astype(float)
# all_player_bios.rename(columns={'PLAYER HEIGHT INCHES': 'PLAYER HEIGHT'}, inplace=True)

# # Order columns
# new_columns = ['SEASON'] + list(all_player_bios.columns[:-1])
# all_player_bios = all_player_bios[new_columns]

# all_player_bios.sort_values(
#     by=['SEASON', 'TEAM ID'],
#     inplace=True
# )

# all_player_bios.reset_index(drop=True, inplace=True)
# all_player_bios.to_csv('./Data/PlayerBios/all_player_bios.csv')

In [51]:
player_bios = pd.read_csv('./Data/PlayerBios/all_player_bios.csv', index_col=0)
player_bios

Unnamed: 0,SEASON,PLAYER ID,PLAYER NAME,TEAM ID,TEAM ABBREVIATION,AGE,PLAYER HEIGHT,PLAYER WEIGHT,COLLEGE,COUNTRY,DRAFT YEAR,DRAFT ROUND,DRAFT NUMBER
0,2000-01,673,Alan Henderson,1610612737,ATL,28.0,205.74,106.594334,Indiana,USA,1995,1,16
1,2000-01,1950,Andy Panko,1610612737,ATL,23.0,205.74,111.130263,Lebanon Valley,USA,Undrafted,Undrafted,Undrafted
2,2000-01,1510,Brevin Knight,1610612737,ATL,25.0,177.80,77.110795,Stanford,USA,1997,1,16
3,2000-01,1898,Cal Bowdler,1610612737,ATL,24.0,208.28,111.130263,Old Dominion,USA,1999,1,17
4,2000-01,1544,Chris Crawford,1610612737,ATL,26.0,205.74,106.594334,Marquette,USA,1997,2,50
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9938,2020-21,1630268,Nate Darling,1610612766,CHA,22.0,198.12,90.718582,Delaware,Canada,Undrafted,Undrafted,Undrafted
9939,2020-21,1630208,Nick Richards,1610612766,CHA,23.0,213.36,111.130263,Kentucky,Jamaica,2020,2,42
9940,2020-21,1629023,P.J. Washington,1610612766,CHA,22.0,200.66,104.326369,Kentucky,USA,2019,1,12
9941,2020-21,1626179,Terry Rozier,1610612766,CHA,27.0,185.42,86.182653,Louisville,USA,2015,1,16


## CommonTeamRoster

In [5]:
# Get team IDs
from nba_api.stats.static import teams
# get_teams returns a list of 30 dictionaries, each an NBA team.
nba_teams_info = teams.get_teams()

# Build dictionary with team IDs as keys and name of team as values
nba_teams_names = {}
for team in nba_teams_info:
    nba_teams_names[team['id']] = team['full_name']

# https://github.com/swar/nba_api/blob/master/docs/nba_api/stats/endpoints/commonteamroster.md

all_rosters_by_season = []
completed_seasons = seasons[:1]

for season in seasons:
    season_rosters = []
    if season in completed_seasons:
        continue

    for i, team_id in enumerate(list(nba_teams_names.keys())):
        df_roster = CommonTeamRoster(
            season=season,
            team_id=team_id
        ).get_data_frames()[0]

        # Creating columns
        df_roster['SEASON'] = season
        df_roster['TEAM'] = nba_teams_names[team_id]

        season_rosters.append(df_roster)

        # Print progress once in a while
        if (i+1) % 10 == 0:
            print('Season {} - {} teams processed'.format(season, i+1))

    all_current_season_rosters = pd.concat(season_rosters, ignore_index=True)
    all_current_season_rosters.to_csv('./Data/Rosters/Rosters_{}.csv'.format(season))
    all_rosters_by_season.append(all_current_season_rosters)

    time.sleep(180)

all_rosters = pd.concat(all_rosters_by_season, ignore_index=True)

# Drop unnecessary columns
all_rosters.drop(
    columns=['LeagueID', 'NICKNAME', 'PLAYER_SLUG', 'NUM'], 
    inplace=True
)

# Renaming
all_rosters.rename(
    columns={'TeamID': 'TEAM ID', 'PLAYER_ID': 'PLAYER ID'},
    inplace=True
)

# Change units
all_rosters['WEIGHT'] = all_rosters['WEIGHT'].astype(float) / 2.20462
all_rosters[['HEIGHT (FEET)', 'HEIGHT (INCHES)']] = all_rosters['HEIGHT'].str.split('-', expand=True).astype(float)
# Convert from total inches to cm (1 inch is 2.54 cm, 12 inches per foot)
all_rosters['HEIGHT'] = 2.54 * (all_rosters['HEIGHT (INCHES)'] + (12*all_rosters['HEIGHT (FEET)']))
all_rosters.drop(columns=['HEIGHT (FEET)', 'HEIGHT (INCHES)'], inplace=True)

# Order columns
new_columns = list(all_rosters.columns[:2][::-1]) + list(all_rosters.columns[-2:][::-1]) + list(all_rosters.columns[2:-2])
all_rosters = all_rosters[new_columns]

all_rosters.sort_values(
    by=['SEASON', 'TEAM ID'],
    inplace=True
)

all_rosters.reset_index(drop=True, inplace=True)
all_rosters.to_csv('./Data/Rosters/all_rosters.csv')

Season 2001-02 - 10 teams processed
Season 2001-02 - 20 teams processed
Season 2001-02 - 30 teams processed
Season 2002-03 - 10 teams processed
Season 2002-03 - 20 teams processed
Season 2002-03 - 30 teams processed


## DraftCombineDrillResults

In [5]:
# # https://github.com/swar/nba_api/blob/master/docs/nba_api/stats/endpoints/draftcombinedrillresults.md
# all_drill_results = []

# for season in seasons:
#     drill_results = DraftCombineDrillResults(season_year=season).get_data_frames()[0]

#     # Creating columns
#     drill_results['SEASON'] = season

#     # Print and save progress
#     print('Season {} processed'.format(season))
#     drill_results.to_csv('./Data/DraftCombine/DrillResults_{}.csv'.format(season))

#     all_drill_results.append(drill_results)


# df_all_drill_results = pd.concat(all_drill_results, ignore_index=True)

# # Drop unnecessary columns
# df_all_drill_results.drop(
#     columns=['FIRST_NAME', 'LAST_NAME'],
#     inplace=True
# )

# # Remove underscore in column names, use space instead
# df_all_drill_results.columns = [col.replace('_', ' ') for col in df_all_drill_results.columns]

# # Order columns
# new_columns = ['SEASON'] + list(df_all_drill_results.columns[:-1])
# df_all_drill_results = df_all_drill_results[new_columns]

# df_all_drill_results.sort_values(
#     by=['SEASON', 'PLAYER NAME'],
#     inplace=True
# )

# df_all_drill_results.reset_index(drop=True, inplace=True)
# df_all_drill_results.to_csv('./Data/DraftCombine/all_drill_results.csv')

In [6]:
all_drill_results = pd.read_csv('./Data/DraftCombine/all_drill_results.csv', index_col=0)
all_drill_results

Unnamed: 0,SEASON,TEMP PLAYER ID,PLAYER ID,PLAYER NAME,POSITION,STANDING VERTICAL LEAP,MAX VERTICAL LEAP,LANE AGILITY TIME,MODIFIED LANE AGILITY TIME,THREE QUARTER SPRINT,BENCH PRESS
0,2000-01,,12137,A.J. Granger,SF,30.0,34.0,10.73,,3.25,20.0
1,2000-01,,2062,A.J. Guyton,PG-SG,33.0,37.5,10.55,,3.22,9.0
2,2000-01,,2637,Alex Scales,SG-PG,38.5,42.5,10.98,,3.21,10.0
3,2000-01,,2238,Antonis Fotsis,SF,27.0,30.0,11.60,,3.44,2.0
4,2000-01,,12021,Aubrey Reese,PG,31.5,37.5,11.13,,3.25,14.0
...,...,...,...,...,...,...,...,...,...,...,...
1390,2020-21,1630179.0,1630179,Tyrell Terry,PG,30.0,34.0,10.78,2.59,3.28,
1391,2020-21,1628962.0,1628962,Udoka Azubuike,C,37.0,41.0,11.82,3.61,3.23,
1392,2020-21,1630214.0,1630214,Xavier Tillman Sr.,PF-C,26.0,32.5,11.80,3.01,3.49,
1393,2020-21,1630213.0,1630213,Yoeli Childs,PF,34.0,38.5,10.87,2.42,3.22,


## DraftCombinePlayerAnthro

In [8]:
# https://github.com/swar/nba_api/blob/master/docs/nba_api/stats/endpoints/draftcombineplayeranthro.md
all_player_anthro = []

for season in seasons:
    player_anthro = DraftCombinePlayerAnthro(season_year=season).get_data_frames()[0]

    # Creating columns
    player_anthro['SEASON'] = season

    # Print and save progress
    print('Season {} processed'.format(season))
    player_anthro.to_csv('./Data/DraftCombine/PlayerAnthro_{}.csv'.format(season))

    all_player_anthro.append(player_anthro)


df_all_player_anthro = pd.concat(all_player_anthro, ignore_index=True)

# Drop unnecessary columns
df_all_player_anthro.drop(
    columns=['FIRST_NAME', 'LAST_NAME'],
    inplace=True
)

# Remove underscore in column names, use space instead
df_all_player_anthro.columns = [col.replace('_', ' ') for col in df_all_player_anthro.columns]

# Order columns
new_columns = ['SEASON'] + list(df_all_player_anthro.columns[:-1])
df_all_player_anthro = df_all_player_anthro[new_columns]

df_all_player_anthro.sort_values(
    by=['SEASON', 'PLAYER NAME'],
    inplace=True
)

df_all_player_anthro.reset_index(drop=True, inplace=True)
df_all_player_anthro.to_csv('./Data/DraftCombine/all_player_anthro.csv')