In [1]:
import pandas as pd 
import numpy as np
from tqdm import tqdm
import pickle
import matplotlib.pyplot as plt
import seaborn as sns

## Select All players that have played between 2000 to 2020

In [2]:
from nba_api.stats.endpoints import commonallplayers

all_players = commonallplayers.CommonAllPlayers().get_data_frames()[0]

#all_players

all_players['TO_YEAR'] = all_players['TO_YEAR'].astype('int64')

all_players = all_players[all_players['TO_YEAR']>=2000]

In [3]:
all_players

Unnamed: 0,PERSON_ID,DISPLAY_LAST_COMMA_FIRST,DISPLAY_FIRST_LAST,ROSTERSTATUS,FROM_YEAR,TO_YEAR,PLAYERCODE,TEAM_ID,TEAM_CITY,TEAM_NAME,TEAM_ABBREVIATION,TEAM_CODE,GAMES_PLAYED_FLAG,OTHERLEAGUE_EXPERIENCE_CH
3,51,"Abdul-Rauf, Mahmoud",Mahmoud Abdul-Rauf,0,1990,2000,mahmoud_abdul-rauf,0,,,,,Y,00
4,1505,"Abdul-Wahad, Tariq",Tariq Abdul-Wahad,0,1997,2003,tariq_abdul-wahad,0,,,,,Y,00
5,949,"Abdur-Rahim, Shareef",Shareef Abdur-Rahim,0,1996,2007,shareef_abdur-rahim,0,,,,,Y,00
9,203518,"Abrines, Alex",Alex Abrines,0,2016,2018,alex_abrines,0,,,,,Y,00
10,1630173,"Achiuwa, Precious",Precious Achiuwa,1,2020,2020,precious_achiuwa,1610612748,Miami,Heat,MIA,heat,Y,00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4596,2583,"Zimmerman, Derrick",Derrick Zimmerman,0,2005,2005,derrick_zimmerman,0,,,,,Y,01
4597,1627757,"Zimmerman, Stephen",Stephen Zimmerman,0,2016,2016,stephen_zimmerman,0,,,,,Y,01
4598,1627835,"Zipser, Paul",Paul Zipser,0,2016,2017,paul_zipser,0,,,,,Y,01
4599,1627790,"Zizic, Ante",Ante Zizic,0,2017,2019,ante_zizic,0,,,,,Y,01


## Select Players Stats from 2000 to 2020

In [4]:
from nba_api.stats.endpoints import playercareerstats
import time

# extract all IDs
players_ID = all_players['PERSON_ID']

# initial an empty dataframe
players_stats = pd.DataFrame()
# save fail queries
error_log = []
# extracting stats
for ID in tqdm(players_ID):
    try:
        time.sleep(1) # avoid too many queries submitted at the same time
        career = playercareerstats.PlayerCareerStats(player_id=ID)
        player_career = career.get_data_frames()[0]
        players_stats = pd.concat([players_stats,player_career],axis=0,ignore_index=True)
    except:
        error_log.append(ID)

100%|██████████| 2096/2096 [1:14:47<00:00,  2.14s/it]    


In [5]:
error_log

[1629109,
 203546,
 1721,
 1629634,
 236,
 1918,
 201961,
 203898,
 203516,
 1915,
 1629234]

In [6]:
for ID in tqdm(error_log):
    try:
        time.sleep(1)
        career = playercareerstats.PlayerCareerStats(player_id=ID)
        player_career = career.get_data_frames()[0]
        players_stats = pd.concat([players_stats,player_career],axis=0,ignore_index=True)
    except:
        error_log.append(ID)

100%|██████████| 11/11 [00:18<00:00,  1.68s/it]


In [7]:
with open("player_stats.pkl",'wb') as f:
    pickle.dump(players_stats,f)

In [8]:
players_stats

Unnamed: 0,PLAYER_ID,SEASON_ID,LEAGUE_ID,TEAM_ID,TEAM_ABBREVIATION,PLAYER_AGE,GP,GS,MIN,FGM,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS
0,51,1990-91,00,1610612743,DEN,22.0,67,19,1505.0,417,...,0.857,34,87,121,206,55,4,110,149,942
1,51,1991-92,00,1610612743,DEN,23.0,81,11,1538.0,356,...,0.870,22,92,114,192,44,4,117,130,837
2,51,1992-93,00,1610612743,DEN,24.0,81,81,2710.0,633,...,0.935,51,174,225,344,84,8,187,179,1553
3,51,1993-94,00,1610612743,DEN,25.0,80,78,2617.0,588,...,0.956,27,141,168,362,82,10,151,150,1437
4,51,1994-95,00,1610612743,DEN,26.0,73,43,2082.0,472,...,0.885,32,105,137,263,77,9,119,126,1165
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15085,1915,2001-02,00,1610612742,DAL,27.0,31,6,299.0,21,...,0.606,35,63,98,9,10,9,16,60,62
15086,1915,2002-03,00,1610612742,DAL,28.0,17,3,135.0,7,...,0.750,10,19,29,6,10,7,6,35,17
15087,1629234,2018-19,00,1610612759,SAS,22.0,23,0,113.0,15,...,0.846,6,28,34,7,2,5,8,11,41
15088,1629234,2019-20,00,1610612759,SAS,23.0,22,3,272.0,43,...,0.769,28,57,85,15,5,17,21,37,107


In [9]:
players_stats[players_stats.TEAM_ID==1610612761]

Unnamed: 0,PLAYER_ID,SEASON_ID,LEAGUE_ID,TEAM_ID,TEAM_ABBREVIATION,PLAYER_AGE,GP,GS,MIN,FGM,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS
41,203112,2012-13,00,1610612761,TOR,22.0,29,0,342.0,42,...,0.816,30,47,77,11,13,15,17,53,116
42,203112,2013-14,00,1610612761,TOR,23.0,7,0,61.0,6,...,0.625,5,10,15,4,4,3,2,8,19
53,200801,2008-09,00,1610612761,TOR,25.0,12,0,52.0,4,...,0.500,1,6,7,1,1,1,4,4,11
103,201582,2010-11,00,1610612761,TOR,23.0,24,0,265.0,47,...,0.733,12,49,61,8,8,14,17,60,114
111,202374,2010-11,00,1610612761,TOR,23.0,12,0,59.0,3,...,0.000,7,7,14,2,2,2,3,5,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14851,1626153,2017-18,00,1610612761,TOR,26.0,69,4,1433.0,201,...,0.829,45,153,198,200,72,33,78,81,555
14852,1626153,2018-19,00,1610612761,TOR,27.0,49,2,897.0,127,...,0.869,39,86,125,110,46,15,37,53,337
14873,201153,2010-11,00,1610612761,TOR,24.0,52,6,766.0,82,...,0.512,47,71,118,58,39,20,43,45,188
15017,1721,2000-01,00,1610612761,TOR,26.0,46,0,969.0,167,...,0.585,78,170,248,38,16,110,42,143,413


## Get Team data

In [10]:
from nba_api.stats.endpoints import teamyearbyyearstats
from nba_api.stats.static import teams

In [11]:
nba_teams = teams.get_teams()

In [12]:
teams_stats = pd.DataFrame()
error_teams =[]
for i in tqdm(nba_teams):
    try:
        time.sleep(1)
        team = teamyearbyyearstats.TeamYearByYearStats(team_id=i['id'])
        team_data = team.get_data_frames()[0]
        teams_stats = pd.concat([teams_stats,team_data],axis=0,ignore_index=True)
    except:
        error_teams.append(i)

100%|██████████| 30/30 [00:47<00:00,  1.58s/it]


In [13]:
teams_stats

Unnamed: 0,TEAM_ID,TEAM_CITY,TEAM_NAME,YEAR,GP,WINS,LOSSES,WIN_PCT,CONF_RANK,DIV_RANK,...,OREB,DREB,REB,AST,PF,STL,TOV,BLK,PTS,PTS_RANK
0,1610612737,Tri-Cities,Blackhawks,1949-50,64,29,35,0.453,0,3,...,0,0,0,1330,2057,0,0,0,5313,10
1,1610612737,Tri-Cities,Blackhawks,1950-51,68,25,43,0.368,0,5,...,0,0,0,1476,2092,0,0,0,5730,3
2,1610612737,Milwaukee,Hawks,1951-52,66,17,49,0.258,0,5,...,0,0,0,1229,1848,0,0,0,4833,10
3,1610612737,Milwaukee,Hawks,1952-53,71,27,44,0.380,0,5,...,0,0,0,1427,2120,0,0,0,5389,9
4,1610612737,Milwaukee,Hawks,1953-54,72,21,51,0.292,0,4,...,0,0,0,1298,1771,0,0,0,5038,9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1532,1610612766,Charlotte,Hornets,2016-17,82,36,46,0.439,11,4,...,721,2853,3574,1891,1360,571,942,390,8601,16
1533,1610612766,Charlotte,Hornets,2017-18,82,36,46,0.439,10,3,...,827,2901,3728,1770,1409,559,1041,373,8874,10
1534,1610612766,Charlotte,Hornets,2018-19,82,39,43,0.476,9,2,...,814,2778,3592,1905,1550,591,1001,405,9081,19
1535,1610612766,Charlotte,Hornets,2019-20,65,23,42,0.354,10,4,...,715,2066,2781,1549,1223,428,949,268,6687,30


In [14]:
with open("teams_stats.pkl","wb") as f:
    pickle.dump(teams_stats,f)

In [15]:
teams_stats[teams_stats.TEAM_CITY=='Toronto']

Unnamed: 0,TEAM_ID,TEAM_CITY,TEAM_NAME,YEAR,GP,WINS,LOSSES,WIN_PCT,CONF_RANK,DIV_RANK,...,OREB,DREB,REB,AST,PF,STL,TOV,BLK,PTS,PTS_RANK
1274,1610612761,Toronto,Raptors,1995-96,82,21,61,0.256,14,8,...,1071,2213,3284,1927,1987,745,1544,493,7994,21
1275,1610612761,Toronto,Raptors,1996-97,82,30,52,0.366,12,8,...,1135,2254,3389,1714,1883,722,1347,517,7829,18
1276,1610612761,Toronto,Raptors,1997-98,82,16,66,0.195,15,8,...,1187,2149,3336,1746,1851,769,1371,663,7781,17
1277,1610612761,Toronto,Raptors,1998-99,50,23,27,0.46,10,6,...,712,1447,2159,1036,1139,439,799,321,4557,18
1278,1610612761,Toronto,Raptors,1999-00,82,45,37,0.549,6,3,...,1098,2449,3547,1947,1989,666,1137,544,7968,17
1279,1610612761,Toronto,Raptors,2000-01,82,47,35,0.573,5,2,...,1118,2529,3647,2004,1745,599,1080,519,8007,5
1280,1610612761,Toronto,Raptors,2001-02,82,42,40,0.512,7,3,...,1114,2336,3450,1779,1771,688,1174,454,7494,25
1281,1610612761,Toronto,Raptors,2002-03,82,24,58,0.293,14,7,...,1023,2355,3378,1583,1761,609,1181,392,7453,27
1282,1610612761,Toronto,Raptors,2003-04,82,33,49,0.402,10,6,...,830,2419,3249,1574,1748,604,1164,402,7006,29
1283,1610612761,Toronto,Raptors,2004-05,82,33,49,0.402,11,4,...,844,2444,3288,1670,1876,621,1087,317,8178,7


## Awards Data 

In [16]:
from nba_api.stats.endpoints import playerawards

In [17]:
players_awards = pd.DataFrame()
error_awards = []
for ID in tqdm(players_ID):
    try:
        time.sleep(1)
        award = playerawards.PlayerAwards(player_id=ID)
        award.get_data_frames()[0]
        players_awards = pd.concat([players_awards,award.get_data_frames()[0]],axis=0,ignore_index=True)
    except:
        error_awards.append(ID)

100%|██████████| 2096/2096 [1:59:01<00:00,  3.41s/it]    


In [18]:
error_awards

[1626164, 202344, 200767, 1916, 2430, 2414, 2547, 1934, 101236]

In [19]:
for ID in tqdm(error_awards):
    time.sleep(1)
    award = playerawards.PlayerAwards(player_id=ID)
    award.get_data_frames()[0]
    players_awards = pd.concat([players_awards,award.get_data_frames()[0]],axis=0,ignore_index=True)

100%|██████████| 9/9 [00:11<00:00,  1.27s/it]


In [20]:
with open("players_awards.pkl","wb") as f:
    pickle.dump(players_awards,f)