API Documentation: https://github.com/swar/nba_api

In [1]:
#Install API as a custom library through the Terminal
#pip install nba_api

Collecting nba_api
  Downloading nba_api-1.4.1-py3-none-any.whl (261 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/261.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.2/261.7 kB[0m [31m2.5 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m261.7/261.7 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: nba_api
Successfully installed nba_api-1.4.1


In [2]:
#Import libraries
from nba_api.stats.endpoints import playerprofilev2
from nba_api.stats.static import players
import pandas as pd
import requests
import json
import numpy as np

In [16]:
#Get the dictionary that contains all NBA players and their IDs.
#Done through API request
nba_players = players.get_players()

In [14]:
#Create list of Nuggets roster from the 2022-23 season.
playerlist = ['Christian Braun', 'Bruce Brown', 'Thomas Bryant', 'Kentavious Caldwell-Pope', 'Vlatko Cancar', 'Aaron Gordon', 'Jeff Green', 'Bones Hyland', 'Reggie Jackson', 'Nikola Jokic', 'DeAndre Jordan', 'Jamal Murray', 'Zeke Nnaji', 'Michael Porter Jr.', 'Davon Reed', 'Ish Smith', 'Peyton Watson', 'Jack White']

In [17]:
#Create list to store all player ids
playerids = []

#For loop to assign all Nuggets players their ID
#Go through each player in the full player dictionary.
for player in range(len(nba_players)):
  #Compare each player from the full list to each player in the Nuggets.
  for nugget in playerlist:
    #When we get matching names from the Nuggets roster and the full list, append the id found to the id list.
    if nba_players[player]['full_name'] == nugget:
      id = nba_players[player]['id']
      playerids.append(id)

#Create dictionary to have keys (ID) and values (Player Name) for Nuggets team
#Note how the for loop already retains the index of ids to their respective player.
#No need to reorder the ids since they are in the correct place.
nuggets_dict = dict(zip(playerids, playerlist))

In [29]:
#Create list that serves as an array for the Nuggets players.
nuggets_array = []
#Using the player ID from the Nuggets dictionary, we gather the statistics for each player in the dictionary.
#Done using a for loop.
for key in nuggets_dict:
  #Make a request to the API to gather the per game statistics of the current player.
  stats = playerprofilev2.PlayerProfileV2(key, per_mode36='PerGame')
  #Transform statistics into a dataframe.
  df = stats.get_data_frames()[0]
  #Create a filter mask to only get statistics for the 2022-23 season.
  #TEAM_ID was also included since some players were in 2+ teams during the season.
  filter_mask = (df['TEAM_ID'] == 1610612743) & (df['SEASON_ID'] == '2022-23')
  #Apply filter mask to dataframe.
  df = df[filter_mask]
  #Extracting values from dataframe found from https://www.statology.org/pandas-row-to-list/
  #Values are then stored into a list.
  df_list = df.values.flatten().tolist()
  #With the list of statistics, append that list to the array.
  nuggets_array.append(df_list)
#Example dataframe for visual representation. See next section.
df

Unnamed: 0,PLAYER_ID,SEASON_ID,LEAGUE_ID,TEAM_ID,TEAM_ABBREVIATION,PLAYER_AGE,GP,GS,MIN,FGM,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS
0,1631298,2022-23,0,1610612743,DEN,25.0,17,0,3.9,0.5,...,0.667,0.4,0.6,1.0,0.2,0.2,0.1,0.1,0.5,1.2


In [30]:
#Columns as list found from https://stackoverflow.com/questions/19482970/get-a-list-from-pandas-dataframe-column-headers
#Used to store columns in a variable since the dataframe will get reset.
#Columns used are from the extraction process, seen from section above.
columns = list(df)
#Create a dictionary using columns as the key and the Nuggets array as the values.
df_dict = dict(zip(columns, nuggets_array))
#Create dataframe using the dictionary.
df = pd.DataFrame(df_dict)
#Dataframe initially outputs the values as columns. We want them outputted as rows. Fixed by:
#Transpose the data in the rows and removes the index. Reset the dataframe too.
#Help from https://community.sisense.com/t5/knowledge/transposing-tables-using-python-pandas/ta-p/9428
df = df.T.reset_index(drop=True)
#Recreate the columns deleted from the columns variable.
df.columns = columns
#Delete unnecessary columns that give no useful statistics.
df = df.drop(['PLAYER_ID','SEASON_ID', 'TEAM_ID', 'LEAGUE_ID', 'TEAM_ABBREVIATION'], axis=1)
#All values were object type. Pandas changes numbers to floats or integers through inference.
#Help from https://www.geeksforgeeks.org/change-data-type-for-one-or-more-columns-in-pandas-dataframe/
df = df.infer_objects()
#Check if the columns were successfully changed to their correct type.
#If so, we can get the proper statistics when using describe.
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18 entries, 0 to 17
Data columns (total 22 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   PLAYER_AGE  18 non-null     float64
 1   GP          18 non-null     int64  
 2   GS          18 non-null     int64  
 3   MIN         18 non-null     float64
 4   FGM         18 non-null     float64
 5   FGA         18 non-null     float64
 6   FG_PCT      18 non-null     float64
 7   FG3M        18 non-null     float64
 8   FG3A        18 non-null     float64
 9   FG3_PCT     18 non-null     float64
 10  FTM         18 non-null     float64
 11  FTA         18 non-null     float64
 12  FT_PCT      18 non-null     float64
 13  OREB        18 non-null     float64
 14  DREB        18 non-null     float64
 15  REB         18 non-null     float64
 16  AST         18 non-null     float64
 17  STL         18 non-null     float64
 18  BLK         18 non-null     float64
 19  TOV         18 non-null     flo

In [31]:
#Show the final dataframe of the information gathered.
df

Unnamed: 0,PLAYER_AGE,GP,GS,MIN,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS
0,22.0,76,6,15.5,1.9,3.8,0.495,0.4,1.3,0.354,...,0.625,0.6,1.8,2.4,0.8,0.5,0.2,0.5,1.3,4.7
1,26.0,80,31,28.5,4.5,9.3,0.483,1.1,3.2,0.358,...,0.758,0.8,3.3,4.1,3.4,1.1,0.6,1.5,2.4,11.5
2,25.0,18,1,11.4,1.8,3.8,0.485,0.2,0.5,0.444,...,0.722,1.1,2.3,3.3,0.1,0.1,0.4,0.6,1.4,4.6
3,30.0,76,76,31.3,3.8,8.3,0.462,1.8,4.2,0.423,...,0.824,0.5,2.3,2.7,2.4,1.5,0.5,1.1,1.9,10.8
4,26.0,60,9,14.8,1.8,3.8,0.476,0.7,1.9,0.374,...,0.927,0.4,1.7,2.1,1.3,0.4,0.2,0.6,1.4,5.0
5,27.0,68,68,30.2,6.3,11.2,0.564,0.9,2.5,0.347,...,0.608,2.4,4.1,6.6,3.0,0.8,0.8,1.4,1.9,16.3
6,36.0,56,4,19.5,2.9,5.9,0.488,0.5,1.9,0.288,...,0.744,0.7,1.9,2.6,1.2,0.3,0.3,0.8,1.8,7.8
7,22.0,42,1,19.5,4.1,10.3,0.399,2.2,5.7,0.378,...,0.866,0.2,1.8,2.0,3.0,0.7,0.3,1.6,1.6,12.1
8,33.0,16,2,19.9,3.1,8.0,0.383,1.2,4.3,0.279,...,0.833,0.2,1.6,1.8,3.1,0.6,0.1,1.2,1.4,7.9
9,28.0,69,69,33.7,9.4,14.8,0.632,0.8,2.2,0.383,...,0.822,2.4,9.4,11.8,9.8,1.3,0.7,3.6,2.5,24.5


In [12]:
#Get the general statistics for the dataframe
df.describe()

Unnamed: 0,PLAYER_AGE,MIN,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS
count,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0
mean,27.166667,19.172222,3.411111,6.888889,0.4865,0.927778,2.527778,0.388611,1.277778,1.716667,0.718444,0.811111,2.633333,3.455556,2.222222,0.577778,0.372222,1.111111,1.544444,9.011111
std,4.630462,9.539195,2.497502,4.655533,0.100863,0.90281,2.233121,0.167264,1.214886,1.535176,0.131987,0.679003,2.01757,2.599598,2.449863,0.415233,0.216403,0.807238,0.559645,6.733663
min,20.0,3.9,0.5,1.1,0.313,0.0,0.0,0.167,0.0,0.1,0.458,0.1,0.6,1.0,0.1,0.1,0.1,0.1,0.5,1.2
25%,25.0,11.975,1.8,3.175,0.42925,0.3,0.75,0.3365,0.525,0.8,0.63,0.325,1.45,1.85,0.575,0.3,0.2,0.6,1.325,4.625
50%,26.0,17.5,2.6,4.85,0.484,0.6,1.9,0.369,0.75,1.2,0.747,0.65,1.85,2.6,1.25,0.45,0.35,1.05,1.6,6.5
75%,29.5,28.875,4.4,10.05,0.49425,1.175,3.95,0.41,1.575,1.975,0.8235,1.075,3.275,4.075,3.0,0.775,0.5,1.35,1.9,11.95
max,36.0,33.7,9.4,16.0,0.765,3.0,7.3,1.0,4.9,6.0,0.927,2.4,9.4,11.8,9.8,1.5,0.8,3.6,2.5,24.5


In [32]:
#Transfer the data from the dataframe to a csv file.
df.to_csv('DEN2023Roster.csv')