In [18]:
!pip install nba_api



In [19]:
from nba_api.stats.endpoints import commonallplayers
players = commonallplayers.CommonAllPlayers(is_only_current_season=1)
players_df = players.get_data_frames()[0]
#players_df.set_index('PERSON_ID', inplace=True) # No need to set index, small dataset
players_df['PERSON_ID'].tolist()

[1630173,
 203500,
 1628389,
 1630583,
 200746,
 1629638,
 1628960,
 1628386,
 1630631,
 1626147,
 203937,
 203507,
 203648,
 2546,
 1630175,
 1628384,
 1627853,
 2772,
 201571,
 1630166,
 1630555,
 1629028,
 1628962,
 1628963,
 1630163,
 1628366,
 1628964,
 1630217,
 1630625,
 1627760,
 203084,
 1630567,
 1629628,
 203115,
 1628238,
 1629646,
 1628966,
 201587,
 203145,
 1629647,
 203078,
 1627736,
 1628395,
 1627761,
 202722,
 201976,
 1630180,
 203920,
 1629048,
 202687,
 202357,
 202339,
 1629833,
 203992,
 202711,
 1629626,
 1630195,
 1629067,
 1626164,
 1630527,
 1628449,
 1630547,
 202340,
 1628396,
 1629649,
 1628969,
 1628970,
 1629052,
 1627763,
 1629717,
 1628415,
 1628971,
 1627759,
 1629650,
 1628425,
 1630535,
 1629718,
 1630602,
 1628972,
 1628973,
 1628418,
 1629783,
 203493,
 203504,
 202692,
 1630215,
 202710,
 1629719,
 203484,
 1630267,
 1628427,
 1629962,
 203991,
 1630176,
 1628975,
 1628976,
 1627936,
 1626161,
 1629958,
 1630551,
 1629597,
 1629185,
 1627737,
 1

In [20]:
players_df = players_df.loc[~(players_df['PERSON_ID'] == 1630597)]
players_df = players_df.loc[(players_df['TEAM_ID'] != 0) & (players_df['GAMES_PLAYED_FLAG'] != 'N')]

In [21]:
len(players_df)

503

In [22]:
!pip install tenacity
#from tenacity import *
import tenacity

"""
Fetch Common Player Info for a single player id.

Handles retry policy with the @tenacity.retry decorator.
The function should be decorated with the @tenacity.retry decorator.

Parameters
----------
player_id_list : list
    List of player ID
fetch_func : func
    Function that fetches the dataframe based on the player's id

Returns
-------
pandas.DataFrame
    Returns all DataFrames from every player merged in a single DataFrame

Examples
--------
These are written in doctest format, and should illustrate how to
use the function.

>>> @tenacity.retry(...)
>>> def fetch_func(id):
>>>   ...
>>>   return single_player_df
>>>   ...
>>> player_id_list = players_df['PERSON_ID'].tolist()
>>> result_df = get_info_from_players(player_id_list, fetch_func)
>>> result_df

"""
#@tenacity.retry(wait=tenacity.wait_fixed(1), stop=tenacity.stop_after_attempt(5))
@tenacity.retry(wait=tenacity.wait_fixed(1))
def fetch_single_player_CommonPlayerInfo_retry(id):
    player_info = commonplayerinfo.CommonPlayerInfo(id)
    return player_info.get_data_frames()[0]



In [23]:
"""
Fetch Player Career Stats for every player id.

Uses the playercareerstats endpoint from nba_api.stats.endpoints.
Handles retry policy from the decorated function passed as argument.
The function should be decorated with the @tenacity.retry decorator.
Delays 1 second between API calls.
Note: The endpoint returns the stats for every season, so there can be more than 1 row for every player

Parameters
----------
id : int
    player ID

Returns
-------
pandas.DataFrame
    Returns a DataFrame with the information the PlayerCareerStats endpoint gives for the player ID

Examples
--------

>>> id = 1495 # Tim Duncan's id
>>> result_df = fetch_single_player_PlayerCareerStats_retry(id)
>>> result_df

"""

from nba_api.stats.endpoints import playercareerstats

@tenacity.retry(wait=tenacity.wait_fixed(1))
def fetch_single_player_PlayerCareerStats_retry(id):
    player_stats = playercareerstats.PlayerCareerStats(player_id=id)
    return player_stats.get_data_frames()[0]

In [27]:
import time
import pandas as pd

class FetchFailureException(Exception): # raised if data conversion fails
    def __init__(self, failureIndex):
        self.failureIndex = failureIndex;
        print("There was a problem fetching player with index: ", self.failureIndex)
    def get_index(self):
        return self.failureIndex

"""
Fetch specified info for every player id and merge in single DataFrame.

Uses a fetch_func parameter to specify which endpoint to use.
Handles retry policy from the decorated function passed as argument.
The function should be decorated with the @tenacity.retry decorator.
Delays 1 second between API calls.

Parameters
----------
player_id_list : list
    List of player ID
fetch_func : func
    Function that fetches the dataframe based on the player's id

Returns
-------
pandas.DataFrame
    Returns all DataFrames from every player merged in a single DataFrame

Examples
--------
These are written in doctest format, and should illustrate how to
use the function.

>>> @tenacity.retry(...)
>>> def fetch_func(id):
>>>   ...
>>>   return single_player_df
>>>   ...
>>> player_id_list = players_df['PERSON_ID'].tolist()
>>> result_df = get_info_from_players(player_id_list, fetch_func)
>>> result_df

"""
def get_info_from_players(player_id_list, fetch_func):
    try: # Try for the whole loop
        result_df = pd.DataFrame() #creates a new dataframe that's empty
        for player_id in player_id_list:
            time.sleep(1) # Delay 1 second to avoid being blocked by the API because of rapid repeating calls
            try: # Try for a single fetch
                # Returns a DataFrame with a single row containing the players personal info
                player_info = fetch_func(player_id)
            except tenacity.RetryError: # Single fetch failed
            # Tried too many times with timeout. Tenacity decorator launched this exception
            # Now launch an exception containing the information of the last player that was fetched with success
                raise FetchFailureException(index)
            else: # Single fetch succeeded
                # Fetch with success so append new row to result DataFrame
                print("\r", "player_id "+str(player_id)+" successful", end="")
                result_df = result_df.append(player_info, ignore_index = False) # ignoring index is optional
        return result_df

    except FetchFailureException as e: # There was a problem in an iteration of the loop
        # A fetch failed so here we print all the information we need to identify the error
        print('Fetch failed for player in index: ', e.get_index())
        return result_df
    else: # All the rows were fetched correctly
        print('Fetched all rows succesfully!')
        return result_df

In [None]:
player_id_list = players_df['PERSON_ID'].tolist()
#result_df = get_info_from_players(player_id_list, fetch_single_player_CommonPlayerInfo_retry)
result_df = get_info_from_players(player_id_list, fetch_single_player_PlayerCareerStats_retry)
#result_df

 player_id 203500 successfull

In [46]:
result_df.columns

Index(['PERSON_ID', 'FIRST_NAME', 'LAST_NAME', 'DISPLAY_FIRST_LAST',
       'DISPLAY_LAST_COMMA_FIRST', 'DISPLAY_FI_LAST', 'PLAYER_SLUG',
       'BIRTHDATE', 'SCHOOL', 'COUNTRY', 'LAST_AFFILIATION', 'HEIGHT',
       'WEIGHT', 'SEASON_EXP', 'JERSEY', 'POSITION', 'ROSTERSTATUS',
       'GAMES_PLAYED_CURRENT_SEASON_FLAG', 'TEAM_ID', 'TEAM_NAME',
       'TEAM_ABBREVIATION', 'TEAM_CODE', 'TEAM_CITY', 'PLAYERCODE',
       'FROM_YEAR', 'TO_YEAR', 'DLEAGUE_FLAG', 'NBA_FLAG', 'GAMES_PLAYED_FLAG',
       'DRAFT_YEAR', 'DRAFT_ROUND', 'DRAFT_NUMBER', 'GREATEST_75_FLAG'],
      dtype='object')

In [43]:
file_path = 'D:/Users/marco/Documents/AnyoneAI/players_personal_info_raw.csv'
result_df.to_csv(file_path, encoding='utf-8', index=False)

In [None]:
player_id (int) (INDEX) PERSON_ID
player_name (str) DISPLAY_FIRST_LAST
team_name (str) TEAM_NAME
position (str) POSITION
height (int) (in centimeters) HEIGHT
weight (float) (in kilograms) WEIGHT
country of origin (str) COUNTRY
date_of_birth (datetime) BIRTHDATE
age (str) (years and months) f(BIRTHDATE)
years_of_experience (int) (years since entering the league) FROM_YEAR
Draft position (int) f(DRAFT_ROUND, DRAFT_NUMBER)