In [1]:
!pip install nba_api

Collecting nba_api
  Downloading nba_api-1.1.11.tar.gz (125 kB)
Building wheels for collected packages: nba-api
  Building wheel for nba-api (setup.py): started
  Building wheel for nba-api (setup.py): finished with status 'done'
  Created wheel for nba-api: filename=nba_api-1.1.11-py3-none-any.whl size=251504 sha256=b0e78107ebd05d2051bbdda6737278acba325de6c878755e6d57214061718059
  Stored in directory: c:\users\marco\appdata\local\pip\cache\wheels\66\c2\3b\c87a243f9e5d2449e7f2c7bd65de4a6b5ce9a24b33978398a7
Successfully built nba-api
Installing collected packages: nba-api
Successfully installed nba-api-1.1.11


In [31]:
from nba_api.stats.endpoints import commonallplayers
players = commonallplayers.CommonAllPlayers(is_only_current_season=1)
players_df = players.get_data_frames()[0]
#players_df.set_index('PERSON_ID', inplace=True) # No need to set index, small dataset
players_df['PERSON_ID'].tolist()

[1630173,
 203500,
 1628389,
 1630583,
 200746,
 1629638,
 1628960,
 1628386,
 1630631,
 1626147,
 203937,
 203507,
 203648,
 2546,
 1630175,
 1628384,
 1627853,
 2772,
 201571,
 1630166,
 1630555,
 1629028,
 1628962,
 1628963,
 1630163,
 1628366,
 1628964,
 1630217,
 1630625,
 1627760,
 203084,
 1630567,
 1629628,
 203115,
 1628238,
 1629646,
 1628966,
 201587,
 203145,
 1629647,
 203078,
 1627736,
 1628395,
 1627761,
 202722,
 201976,
 1630180,
 203920,
 1629048,
 202687,
 202357,
 202339,
 1629833,
 203992,
 202711,
 1629626,
 1630195,
 1629067,
 1626164,
 1630527,
 1628449,
 1630547,
 202340,
 1628396,
 1629649,
 1628969,
 1628970,
 1629052,
 1627763,
 1629717,
 1628415,
 1628971,
 1627759,
 1629650,
 1628425,
 1630535,
 1629718,
 1630602,
 1628972,
 1628973,
 1628418,
 1629783,
 203493,
 203504,
 202692,
 1630215,
 202710,
 1629719,
 203484,
 1630267,
 1628427,
 1629962,
 203991,
 1630176,
 1628975,
 1628976,
 1627936,
 1626161,
 1629958,
 1630551,
 1629597,
 1629185,
 1627737,
 1

In [32]:
players_df = players_df.loc[~(players_df['PERSON_ID'] == 1630597)]
players_df = players_df.loc[(players_df['TEAM_ID'] != 0) & (players_df['GAMES_PLAYED_FLAG'] != 'N')]

In [33]:
len(players_df)

503

In [1]:
!pip install tenacity
#from tenacity import *
import tenacity

"""
Fetch Common Player Info for a single player id.

Handles retry policy with the @tenacity.retry decorator.
The function should be decorated with the @tenacity.retry decorator.

Parameters
----------
player_id_list : list
    List of player ID
fetch_func : func
    Function that fetches the dataframe based on the player's id

Returns
-------
pandas.DataFrame
    Returns all DataFrames from every player merged in a single DataFrame

Examples
--------
These are written in doctest format, and should illustrate how to
use the function.

>>> @tenacity.retry(...)
>>> def fetch_func(id):
>>>   ...
>>>   return single_player_df
>>>   ...
>>> player_id_list = players_df['PERSON_ID'].tolist()
>>> result_df = get_info_from_players(player_id_list, fetch_func)
>>> result_df

"""
#@tenacity.retry(wait=tenacity.wait_fixed(1), stop=tenacity.stop_after_attempt(5))
@tenacity.retry(wait=tenacity.wait_fixed(1))
def fetch_single_player_CommonPlayerInfo_retry(id):
    player_info = commonplayerinfo.CommonPlayerInfo(id)
    return player_info.get_data_frames()[0]



In [None]:
"""
Fetch Player Career Stats for every player id.

Uses the playercareerstats endpoint from nba_api.stats.endpoints.
Handles retry policy from the decorated function passed as argument.
The function should be decorated with the @tenacity.retry decorator.
Delays 1 second between API calls.
Note: The endpoint returns the stats for every season, so there can be more than 1 row for every player

Parameters
----------
id : int
    player ID

Returns
-------
pandas.DataFrame
    Returns a DataFrame with the information the PlayerCareerStats endpoint gives for the player ID

Examples
--------

>>> id = 1495 # Tim Duncan's id
>>> result_df = fetch_single_player_PlayerCareerStats_retry(id)
>>> result_df

"""

from nba_api.stats.endpoints import playercareerstats

@tenacity.retry(wait=tenacity.wait_fixed(1))
def fetch_single_player_PlayerCareerStats_retry(id):
    player_stats = playercareerstats.PlayerCareerStats(player_id=id)
    return player_stats.get_data_frames()[0]

In [3]:
import time
import pandas as pd

class FetchFailureException(Exception): # raised if data conversion fails
    def __init__(self, failureIndex):
        self.failureIndex = failureIndex;
        print("There was a problem fetching player with index: ", self.failureIndex)
    def get_index(self):
        return self.failureIndex

"""
Fetch specified info for every player id and merge in single DataFrame.

Uses a fetch_func parameter to specify which endpoint to use.
Handles retry policy from the decorated function passed as argument.
The function should be decorated with the @tenacity.retry decorator.
Delays 1 second between API calls.

Parameters
----------
player_id_list : list
    List of player ID
fetch_func : func
    Function that fetches the dataframe based on the player's id

Returns
-------
pandas.DataFrame
    Returns all DataFrames from every player merged in a single DataFrame

Examples
--------
These are written in doctest format, and should illustrate how to
use the function.

>>> @tenacity.retry(...)
>>> def fetch_func(id):
>>>   ...
>>>   return single_player_df
>>>   ...
>>> player_id_list = players_df['PERSON_ID'].tolist()
>>> result_df = get_info_from_players(player_id_list, fetch_func)
>>> result_df

"""
def get_info_from_players(player_id_list, fetch_func):
    try: # Try for the whole loop
        result_df = pd.DataFrame() #creates a new dataframe that's empty
        for player_id in player_id_list:
            time.sleep(1) # Delay 1 second to avoid being blocked by the API because of rapid repeating calls
            try: # Try for a single fetch
                # Returns a DataFrame with a single row containing the players personal info
                player_info = fetch_single_player_CommonPlayerInfo_retry(player_id)
            except tenacity.RetryError: # Single fetch failed
            # Tried too many times with timeout. Tenacity decorator launched this exception
            # Now launch an exception containing the information of the last player that was fetched with success
                raise FetchFailureException(index)
            else: # Single fetch succeeded
                # Fetch with success so append new row to result DataFrame
                print("\r", "player_id "+str(player_id)+" successful", end="")
                result_df = result_df.append(player_info, ignore_index = False) # ignoring index is optional
        return result_df

    except FetchFailureException as e: # There was a problem in an iteration of the loop
        # A fetch failed so here we print all the information we need to identify the error
        print('Fetch failed for player in index: ', e.get_index())
        return result_df
    else: # All the rows were fetched correctly
        print('Fetched all rows succesfully!')
        return result_df

In [39]:
player_id_list = players_df['PERSON_ID'].tolist()
result_df = get_info_from_players(player_id_list, fetch_single_player_CommonPlayerInfo_retry)
result_df

 player_id 1627826 successful

Unnamed: 0,PERSON_ID,FIRST_NAME,LAST_NAME,DISPLAY_FIRST_LAST,DISPLAY_LAST_COMMA_FIRST,DISPLAY_FI_LAST,PLAYER_SLUG,BIRTHDATE,SCHOOL,COUNTRY,...,PLAYERCODE,FROM_YEAR,TO_YEAR,DLEAGUE_FLAG,NBA_FLAG,GAMES_PLAYED_FLAG,DRAFT_YEAR,DRAFT_ROUND,DRAFT_NUMBER,GREATEST_75_FLAG
0,1630173,Precious,Achiuwa,Precious Achiuwa,"Achiuwa, Precious",P. Achiuwa,precious-achiuwa,1999-09-19T00:00:00,Memphis,Nigeria,...,precious_achiuwa,2020,2021,N,Y,Y,2020,1,20,N
0,203500,Steven,Adams,Steven Adams,"Adams, Steven",S. Adams,steven-adams,1993-07-20T00:00:00,Pittsburgh,New Zealand,...,steven_adams,2013,2021,N,Y,Y,2013,1,12,N
0,1628389,Bam,Adebayo,Bam Adebayo,"Adebayo, Bam",B. Adebayo,bam-adebayo,1997-07-18T00:00:00,Kentucky,USA,...,bam_adebayo,2017,2021,N,Y,Y,2017,1,14,N
0,1630583,Santi,Aldama,Santi Aldama,"Aldama, Santi",S. Aldama,santi-aldama,2001-01-10T00:00:00,Loyola-Maryland,Spain,...,santi_aldama,2021,2021,Y,Y,Y,2021,1,30,N
0,200746,LaMarcus,Aldridge,LaMarcus Aldridge,"Aldridge, LaMarcus",L. Aldridge,lamarcus-aldridge,1985-07-19T00:00:00,Texas-Austin,USA,...,lamarcus_aldridge,2006,2021,N,Y,Y,2006,1,2,N
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,1628221,Gabe,York,Gabe York,"York, Gabe",G. York,gabe-york,1993-08-02T00:00:00,Arizona,USA,...,gabe_york,2021,2021,Y,Y,Y,Undrafted,Undrafted,Undrafted,N
0,201152,Thaddeus,Young,Thaddeus Young,"Young, Thaddeus",T. Young,thaddeus-young,1988-06-21T00:00:00,Georgia Tech,USA,...,thaddeus_young,2007,2021,N,Y,Y,2007,1,12,N
0,1629027,Trae,Young,Trae Young,"Young, Trae",T. Young,trae-young,1998-09-19T00:00:00,Oklahoma,USA,...,trae_young,2018,2021,N,Y,Y,2018,1,5,N
0,1630209,Omer,Yurtseven,Omer Yurtseven,"Yurtseven, Omer",O. Yurtseven,omer-yurtseven,1998-06-19T00:00:00,Georgetown,Turkey,...,tmp_omer_yurtseven,2020,2021,Y,Y,Y,Undrafted,Undrafted,Undrafted,N


In [46]:
result_df.columns

Index(['PERSON_ID', 'FIRST_NAME', 'LAST_NAME', 'DISPLAY_FIRST_LAST',
       'DISPLAY_LAST_COMMA_FIRST', 'DISPLAY_FI_LAST', 'PLAYER_SLUG',
       'BIRTHDATE', 'SCHOOL', 'COUNTRY', 'LAST_AFFILIATION', 'HEIGHT',
       'WEIGHT', 'SEASON_EXP', 'JERSEY', 'POSITION', 'ROSTERSTATUS',
       'GAMES_PLAYED_CURRENT_SEASON_FLAG', 'TEAM_ID', 'TEAM_NAME',
       'TEAM_ABBREVIATION', 'TEAM_CODE', 'TEAM_CITY', 'PLAYERCODE',
       'FROM_YEAR', 'TO_YEAR', 'DLEAGUE_FLAG', 'NBA_FLAG', 'GAMES_PLAYED_FLAG',
       'DRAFT_YEAR', 'DRAFT_ROUND', 'DRAFT_NUMBER', 'GREATEST_75_FLAG'],
      dtype='object')

In [43]:
file_path = 'D:/Users/marco/Documents/AnyoneAI/players_personal_info_raw.csv'
result_df.to_csv(file_path, encoding='utf-8', index=False)

In [None]:
player_id (int) (INDEX) PERSON_ID
player_name (str) DISPLAY_FIRST_LAST
team_name (str) TEAM_NAME
position (str) POSITION
height (int) (in centimeters) HEIGHT
weight (float) (in kilograms) WEIGHT
country of origin (str) COUNTRY
date_of_birth (datetime) BIRTHDATE
age (str) (years and months) f(BIRTHDATE)
years_of_experience (int) (years since entering the league) FROM_YEAR
Draft position (int) f(DRAFT_ROUND, DRAFT_NUMBER)

In [12]:
from nba_api.stats.endpoints import playercareerstats
# Anthony Davis
career = playercareerstats.PlayerCareerStats(player_id='203076', per_mode36='Totals')
career.get_data_frames()[0].columns

Index(['PLAYER_ID', 'SEASON_ID', 'LEAGUE_ID', 'TEAM_ID', 'TEAM_ABBREVIATION',
       'PLAYER_AGE', 'GP', 'GS', 'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A',
       'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'STL',
       'BLK', 'TOV', 'PF', 'PTS'],
      dtype='object')

In [16]:
career.get_data_frames()[0]

Unnamed: 0,PLAYER_ID,SEASON_ID,LEAGUE_ID,TEAM_ID,TEAM_ABBREVIATION,PLAYER_AGE,GP,GS,MIN,FGM,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS
0,203076,2012-13,0,1610612740,NOH,20.0,64,60,1846.0,349,...,0.751,165,357,522,63,75,112,89,158,867
1,203076,2013-14,0,1610612740,NOP,21.0,67,66,2358.0,522,...,0.791,207,466,673,105,89,189,109,200,1394
2,203076,2014-15,0,1610612740,NOP,22.0,68,68,2455.0,642,...,0.805,173,523,696,149,100,200,95,141,1656
3,203076,2015-16,0,1610612740,NOP,23.0,61,61,2164.0,560,...,0.758,130,497,627,116,78,125,121,148,1481
4,203076,2016-17,0,1610612740,NOP,24.0,75,75,2708.0,770,...,0.802,172,712,884,157,94,167,181,168,2099
5,203076,2017-18,0,1610612740,NOP,25.0,75,75,2727.0,780,...,0.828,187,644,831,174,115,193,162,159,2110
6,203076,2018-19,0,1610612740,NOP,26.0,56,56,1850.0,530,...,0.794,174,498,672,218,88,135,112,132,1452
7,203076,2019-20,0,1610612747,LAL,27.0,62,62,2131.0,551,...,0.846,142,435,577,200,91,143,154,156,1618
8,203076,2020-21,0,1610612747,LAL,28.0,36,36,1162.0,301,...,0.738,62,224,286,110,45,59,74,60,786
9,203076,2021-22,0,1610612747,LAL,29.0,40,40,1404.0,370,...,0.713,106,288,394,122,49,90,82,97,927
