The API interface may have changed refer to https://github.com/swar/nba_api/tree/master/docs/nba_api for the latest documentation. The following code is based on the documentation as of 2023.


In [178]:
from nba_api.stats.static import teams, players
from nba_api.stats.endpoints import cumestatsteamgames, cumestatsteam, gamerotation
import pandas as pd
import numpy as np
import json
import difflib
import time
import requests



In [179]:
# Retry Wrapper 
def retry(func, retries=3):
    def retry_wrapper(*args, **kwargs):
        attempts = 0
        while attempts < retries:
            try:
                return func(*args, **kwargs)
            except requests.exceptions.RequestException as e:
                print(e)
                time.sleep(30)
                attempts += 1

    return retry_wrapper

In [180]:
# Get Season Schedule Function 

def getSeasonScheduleFrame(seasons,seasonType): 

    # Get date from string
    def getGameDate(matchup):
        return matchup.partition(' at')[0][:10]

    # Get Home team from string
    def getHomeTeam(matchup):
        return matchup.partition(' at')[2]

    # Get Away team from string
    def getAwayTeam(matchup):
        return matchup.partition(' at')[0][10:]

    # Match nickname from schedule to team table to find ID
    def getTeamIDFromNickname(nickname):
        return teamLookup.loc[teamLookup['nickname'] == difflib.get_close_matches(nickname,teamLookup['nickname'],1)[0]].values[0][0] 
    
    @retry
    def getRegularSeasonSchedule(season,teamID,seasonType):
        season = str(season) + "-" + str(season+1)[-2:] # Convert year to season format ie. 2020 -> 2020-21
        teamGames = cumestatsteamgames.CumeStatsTeamGames(league_id = '00',season = season ,
                                                                      season_type_all_star=seasonType,
                                                                      team_id = teamID).get_normalized_json()
        
        teamGames = pd.DataFrame(json.loads(teamGames)['CumeStatsTeamGames'])
        teamGames['SEASON'] = season
        return teamGames    
    
    # Get team lookup table
    teamLookup = pd.DataFrame(teams.get_teams())
    print(teamLookup.head())
    
    # Get teams schedule for each team for each season
    scheduleFrame = pd.DataFrame()
    print(type(scheduleFrame))
    for season in seasons:
        for id in teamLookup['id']:
            time.sleep(1)
            tmp=pd.DataFrame(getRegularSeasonSchedule(season,id,seasonType))
            if tmp.empty:
                continue
            else:
                scheduleFrame = pd.concat([scheduleFrame,tmp])
            #scheduleFrame = scheduleFrame.append(getRegularSeasonSchedule(season,id,seasonType))
            
    scheduleFrame['GAME_DATE'] = pd.to_datetime(scheduleFrame['MATCHUP'].map(getGameDate))
    scheduleFrame['HOME_TEAM_NICKNAME'] = scheduleFrame['MATCHUP'].map(getHomeTeam)
    scheduleFrame['HOME_TEAM_ID'] = scheduleFrame['HOME_TEAM_NICKNAME'].map(getTeamIDFromNickname)
    scheduleFrame['AWAY_TEAM_NICKNAME'] = scheduleFrame['MATCHUP'].map(getAwayTeam)
    scheduleFrame['AWAY_TEAM_ID'] = scheduleFrame['AWAY_TEAM_NICKNAME'].map(getTeamIDFromNickname)
    scheduleFrame = scheduleFrame.drop_duplicates() # There's a row for both teams, only need 1
    scheduleFrame = scheduleFrame.reset_index(drop=True)
            
    return scheduleFrame

In [181]:
#Get ScheduleFrame

seasons = [2022]
seasonType = 'Regular Season'

start = time.perf_counter_ns() # Track cell's runtime
scheduleFrame = getSeasonScheduleFrame(seasons,seasonType)
end = time.perf_counter_ns()


secs = (end-start)/1e9
mins = secs/60
print(mins)

#wrtie frame to csv
scheduleFrame.info()
#scheduleFrame.to_csv('scheduleFrame.csv',index=False)

           id             full_name abbreviation   nickname         city  \
0  1610612737         Atlanta Hawks          ATL      Hawks      Atlanta   
1  1610612738        Boston Celtics          BOS    Celtics       Boston   
2  1610612739   Cleveland Cavaliers          CLE  Cavaliers    Cleveland   
3  1610612740  New Orleans Pelicans          NOP   Pelicans  New Orleans   
4  1610612741         Chicago Bulls          CHI      Bulls      Chicago   

           state  year_founded  
0        Georgia          1949  
1  Massachusetts          1946  
2           Ohio          1970  
3      Louisiana          2002  
4       Illinois          1966  
<class 'pandas.core.frame.DataFrame'>
0.8147934027666667
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1230 entries, 0 to 1229
Data columns (total 8 columns):
 #   Column              Non-Null Count  Dtype         
---  ------              --------------  -----         
 0   MATCHUP             1230 non-null   object        
 1   GAME_ID  

In [184]:
# Get Single Game aggregation columns

def getSingleGameMetrics(gameID,homeTeamID,awayTeamID,awayTeamNickname,seasonYear,gameDate):

    @retry
    def getGameStats(teamID,gameID,seasonYear):
        gameStats = cumestatsteam.CumeStatsTeam(game_ids=gameID,league_id ="00",
                                               season=seasonYear,season_type_all_star="Regular Season",
                                               team_id = teamID).get_normalized_json()

        gameStats = pd.DataFrame(json.loads(gameStats)['TotalTeamStats'])
        return gameStats

    data = getGameStats(homeTeamID,gameID,seasonYear)
    data.at[1,'NICKNAME'] = awayTeamNickname
    data.at[1,'TEAM_ID'] = awayTeamID
    data.at[1,'OFFENSIVE_EFFICIENCY'] = (data.at[1,'FG'] + data.at[1,'AST'])/(data.at[1,'FGA'] - data.at[1,'OFF_REB'] + data.at[1,'AST'] + data.at[1,'TOTAL_TURNOVERS'])
    data.at[1,'SCORING_MARGIN'] = data.at[1,'PTS'] - data.at[0,'PTS']

    data.at[0,'OFFENSIVE_EFFICIENCY'] = (data.at[0,'FG'] + data.at[0,'AST'])/(data.at[0,'FGA'] - data.at[0,'OFF_REB'] + data.at[0,'AST'] + data.at[0,'TOTAL_TURNOVERS'])
    data.at[0,'SCORING_MARGIN'] = data.at[0,'PTS'] - data.at[1,'PTS']

    data['SEASON'] = seasonYear
    data['GAME_DATE'] = gameDate
    data['GAME_ID'] = gameID
    return data

In [185]:
#Example Output of Single Game Metrics
#Example Output of Single Game Metrics
print(len(scheduleFrame))
frames = scheduleFrame.head(5).apply(lambda row: getSingleGameMetrics(row['GAME_ID'], row['HOME_TEAM_ID'], row['AWAY_TEAM_ID'], row['AWAY_TEAM_NICKNAME'], row['SEASON'], row['GAME_DATE']), axis=1)


index_seed=frames[0].loc[0].index

winners=pd.DataFrame(columns=index_seed) 
losers=pd.DataFrame(columns=index_seed)
#print(frame1)



for frame in frames:

    winner,loser = (frame.loc[0],frame.loc[1]) if frame.at[0,'W'] > frame.at[1,'W'] else (frame.loc[1],frame.loc[0])

    list1=winner.values.tolist()
    list2=loser.values.tolist()

    winners.loc[len(winners)]=list1
    losers.loc[len(losers)]=list2




#print(loser.values.tolist())    
#winners=winners.append(loser,ignore_index=True)
#winners[len(winners)] = winner.values.tolist()
#winners=winners.append(pd.DataFrame(winner).T)  
print(len(winners))
print(len(losers))


# print(frame[0]['NICKNAME'],frame[0]['W'])
# print(frame[1]['NICKNAME'],frame[1]['W'])

1230
5
5
