# Get Dataset of NBA Players

This notebook will get the dataset of NBA players for our project. We will include all active players of the 2023-2024 season who have played at least 500 minutes. All players who have ever played in the NBA can be identified by a unique player ID.

In [1]:
# Imports
from nba_api.stats.static import players
from nba_api.stats.endpoints import leaguedashplayerstats

In [2]:
# Get all players in the 2023-24 season
player_stats = leaguedashplayerstats.LeagueDashPlayerStats(
    season='2023-24', 
    per_mode_detailed='Totals', 
    season_type_all_star='Regular Season',
)
players_df = player_stats.get_data_frames()[0]
players_df

Unnamed: 0,PLAYER_ID,PLAYER_NAME,NICKNAME,TEAM_ID,TEAM_ABBREVIATION,AGE,GP,W,L,W_PCT,...,BLK_RANK,BLKA_RANK,PF_RANK,PFD_RANK,PTS_RANK,PLUS_MINUS_RANK,NBA_FANTASY_PTS_RANK,DD2_RANK,TD3_RANK,WNBA_FANTASY_PTS_RANK
0,1630639,A.J. Lawson,A.J.,1610612742,DAL,23.0,42,27,15,0.643,...,428,211,148,415,385,192,400,257,38,405
1,1631260,AJ Green,AJ,1610612749,MIL,24.0,56,35,21,0.625,...,410,121,223,375,313,145,369,257,38,343
2,1631100,AJ Griffin,AJ,1610612737,ATL,20.0,20,8,12,0.400,...,451,121,77,519,450,396,480,257,38,469
3,203932,Aaron Gordon,Aaron,1610612743,DEN,28.0,73,49,24,0.671,...,79,520,457,52,93,13,79,54,38,87
4,1628988,Aaron Holiday,Aaron,1610612745,HOU,27.0,78,39,39,0.500,...,370,335,419,238,216,123,243,257,38,233
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
567,203897,Zach LaVine,Zach,1610612741,CHI,29.0,25,10,15,0.400,...,331,335,249,230,225,451,269,190,38,262
568,1630285,Zavier Simpson,Zavier,1610612763,MEM,27.0,7,1,6,0.143,...,428,199,103,485,460,352,448,257,38,447
569,1630192,Zeke Nnaji,Zeke,1610612743,DEN,23.0,58,41,17,0.707,...,108,358,314,215,351,508,332,257,38,340
570,1630533,Ziaire Williams,Ziaire,1610612763,MEM,22.0,51,20,31,0.392,...,319,382,320,225,246,535,271,190,38,265


In [3]:
print(players_df.columns)
print(players_df['MIN'].dtype)

Index(['PLAYER_ID', 'PLAYER_NAME', 'NICKNAME', 'TEAM_ID', 'TEAM_ABBREVIATION',
       'AGE', 'GP', 'W', 'L', 'W_PCT', 'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M',
       'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST',
       'TOV', 'STL', 'BLK', 'BLKA', 'PF', 'PFD', 'PTS', 'PLUS_MINUS',
       'NBA_FANTASY_PTS', 'DD2', 'TD3', 'WNBA_FANTASY_PTS', 'GP_RANK',
       'W_RANK', 'L_RANK', 'W_PCT_RANK', 'MIN_RANK', 'FGM_RANK', 'FGA_RANK',
       'FG_PCT_RANK', 'FG3M_RANK', 'FG3A_RANK', 'FG3_PCT_RANK', 'FTM_RANK',
       'FTA_RANK', 'FT_PCT_RANK', 'OREB_RANK', 'DREB_RANK', 'REB_RANK',
       'AST_RANK', 'TOV_RANK', 'STL_RANK', 'BLK_RANK', 'BLKA_RANK', 'PF_RANK',
       'PFD_RANK', 'PTS_RANK', 'PLUS_MINUS_RANK', 'NBA_FANTASY_PTS_RANK',
       'DD2_RANK', 'TD3_RANK', 'WNBA_FANTASY_PTS_RANK'],
      dtype='object')
float64


In [4]:
# Get the players who have played at least 500 minutes in total during the season
players_with_over_500_min_df = players_df[players_df['MIN'] > 500]
players_with_over_500_min_df = players_with_over_500_min_df.reset_index()
players_with_over_500_min_df

Unnamed: 0,index,PLAYER_ID,PLAYER_NAME,NICKNAME,TEAM_ID,TEAM_ABBREVIATION,AGE,GP,W,L,...,BLK_RANK,BLKA_RANK,PF_RANK,PFD_RANK,PTS_RANK,PLUS_MINUS_RANK,NBA_FANTASY_PTS_RANK,DD2_RANK,TD3_RANK,WNBA_FANTASY_PTS_RANK
0,1,1631260,AJ Green,AJ,1610612749,MIL,24.0,56,35,21,...,410,121,223,375,313,145,369,257,38,343
1,3,203932,Aaron Gordon,Aaron,1610612743,DEN,28.0,73,49,24,...,79,520,457,52,93,13,79,54,38,87
2,4,1628988,Aaron Holiday,Aaron,1610612745,HOU,27.0,78,39,39,...,370,335,419,238,216,123,243,257,38,233
3,5,1630174,Aaron Nesmith,Aaron,1610612754,IND,24.0,72,41,31,...,61,499,568,103,117,79,133,190,38,133
4,6,1630598,Aaron Wiggins,Aaron,1610612760,OKC,25.0,78,55,23,...,219,421,336,248,202,117,222,257,38,218
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
355,566,1628380,Zach Collins,Zach,1610612759,SAS,26.0,69,19,50,...,57,446,553,100,146,563,136,135,38,143
356,567,203897,Zach LaVine,Zach,1610612741,CHI,29.0,25,10,15,...,331,335,249,230,225,451,269,190,38,262
357,569,1630192,Zeke Nnaji,Zeke,1610612743,DEN,23.0,58,41,17,...,108,358,314,215,351,508,332,257,38,340
358,570,1630533,Ziaire Williams,Ziaire,1610612763,MEM,22.0,51,20,31,...,319,382,320,225,246,535,271,190,38,265


In [5]:
# Keep only the player name and player id
columns_to_keep = ['PLAYER_ID', 'PLAYER_NAME']
final_list_of_players_df = players_with_over_500_min_df[columns_to_keep]
final_list_of_players_df

Unnamed: 0,PLAYER_ID,PLAYER_NAME
0,1631260,AJ Green
1,203932,Aaron Gordon
2,1628988,Aaron Holiday
3,1630174,Aaron Nesmith
4,1630598,Aaron Wiggins
...,...,...
355,1628380,Zach Collins
356,203897,Zach LaVine
357,1630192,Zeke Nnaji
358,1630533,Ziaire Williams


In [6]:
# Write this dataframe into a csv
path_to_csv = '../../data'
final_list_of_players_df.to_csv(f'{path_to_csv}/players.csv')