In [20]:
from nba_api.stats.static import teams, players
from nba_api.stats.endpoints import (cumestatsteamgames, 
                                    cumestatsteam, 
                                    gamerotation, 
                                    teamgamelogs, 
                                    playergamelogs,
                                    draftboard)
import pandas as pd
import numpy as np
import json
import difflib
import time
import requests
from IPython.display import display


In [14]:
def retry(func, retries=3):
    def retry_wrapper(*args, **kwargs):
        attempts = 0
        while attempts < retries:
            try:
                return func(*args, **kwargs)
            except (requests.exceptions.RequestException, json.JSONDecodeError) as e:
                print(f"Attempt {attempts + 1} failed: {e}")
                time.sleep(30)
                attempts += 1
        raise Exception(f"Failed after {retries} attempts")
    return retry_wrapper

In [23]:
# =============================================================================
# ULTRA-FAST: TeamGameLogs (plural) - ALL teams in ONE API call per season!
# See: https://github.com/swar/nba_api/blob/master/docs/nba_api/stats/endpoints/teamgamelogs.md
# =============================================================================

def getTeamGameLogs(seasons, season_type='Regular Season'):
    """
    Get all team game logs using TeamGameLogs endpoint.
    ONE API call per season gets ALL 30 teams' data
    
    Args:
        seasons: List of season years (e.g., [2022, 2023] for 2022-23 and 2023-24)
        season_type: 'Regular Season', 'Playoffs', etc.
    
    Returns:
        DataFrame with all team game logs
    """
    
    @retry
    def fetch_all_team_logs(season_str, season_type):
        """Fetch ALL teams' game logs in ONE call!"""
        logs = teamgamelogs.TeamGameLogs(
            season_nullable=season_str,
            season_type_nullable=season_type
        )
        return logs.get_data_frames()[0]
    
    all_logs = []
    
    for season in seasons:
        season_str = f"{season}-{str(season + 1)[-2:]}"
        print(f"Loading Team Game Logs for {season_str} {season_type}...", end=" ")
        
        time.sleep(0.6)  # Rate limiting
        
        try:
            df = fetch_all_team_logs(season_str, season_type)
            all_logs.append(df)
            print(f"✓ {len(df)} records")
        except Exception as e:
            print(f"ERROR: {e}")
    
    # Combine all seasons
    game_logs = pd.concat(all_logs, ignore_index=True)
    
    # Parse game date
    game_logs['GAME_DATE'] = pd.to_datetime(game_logs['GAME_DATE'])
    
    # Add W column (1 for win, 0 for loss)
    game_logs['W'] = (game_logs['WL'] == 'W').astype(int)
    
    # Determine home/away from MATCHUP column
    game_logs['IS_HOME'] = game_logs['MATCHUP'].str.contains('vs.').astype(int)
    
    print(f"\n✓ Loaded {len(game_logs)} total game records")
    print(f"  Unique games: {game_logs['GAME_ID'].nunique()}")
    print(f"  Teams: {game_logs['TEAM_ID'].nunique()}")
    
    return game_logs


def getPlayerGameLogs(seasons, season_type='Regular Season'):
    """
    Get all team game logs using TeamGameLogs endpoint.
    ONE API call per season gets ALL 30 teams' data
    
    Args:
        seasons: List of season years (e.g., [2022, 2023] for 2022-23 and 2023-24)
        season_type: 'Regular Season', 'Playoffs', etc.
    
    Returns:
        DataFrame with all team game logs
    """
    
    @retry
    def fetch_all_team_logs(season_str, season_type):
        """Fetch ALL teams' game logs in ONE call!"""
        logs = playergamelogs.PlayerGameLogs(
            season_nullable=season_str,
            season_type_nullable=season_type
        )
        return logs.get_data_frames()[0]
    

    all_logs = []
    
    for season in seasons:
        season_str = f"{season}-{str(season + 1)[-2:]}"
        print(f"Loading Player Game Logs for {season_str} {season_type}...", end=" ")
        
        time.sleep(0.6)  # Rate limiting
        
        try:
            df = fetch_all_team_logs(season_str, season_type)
            all_logs.append(df)
            print(f"✓ {len(df)} records")
        except Exception as e:
            print(f"ERROR: {e}")
    
    # Combine all seasons
    game_logs = pd.concat(all_logs, ignore_index=True)
    
    # Parse game date
    game_logs['GAME_DATE'] = pd.to_datetime(game_logs['GAME_DATE'])
    
    # Add W column (1 for win, 0 for loss)
    game_logs['W'] = (game_logs['WL'] == 'W').astype(int)
    
    # Determine home/away from MATCHUP column
    game_logs['IS_HOME'] = game_logs['MATCHUP'].str.contains('vs.').astype(int)
    
    print(f"\n✓ Loaded {len(game_logs)} total game records")
    print(f"  Unique games: {game_logs['GAME_ID'].nunique()}")
    print(f"  Teams: {game_logs['TEAM_ID'].nunique()}")
    
    return game_logs


def getPlayerDraftData(seasons):
    @retry
    def fetch_all_draft_data(season_str):
        """Fetch ALL teams' game logs in ONE call!"""
        logs = draftboard.DraftBoard(
            season_year=season_str,
        )
        return logs.get_data_frames()[0]
    

    all_logs = []
    
    for season in seasons:
        season_str = f"{season}-{str(season + 1)[-2:]}"
        print(f"Loading Player Draft Data for {season_str} ...", end=" ")
        
        time.sleep(0.6)  # Rate limiting
        
        try:
            df = fetch_all_draft_data(season)
            all_logs.append(df)
            print(f"✓ {len(df)} records")
        except Exception as e:
            print(f"ERROR: {e}")
    
    # Combine all seasons
    draft_data = pd.concat(all_logs, ignore_index=True)
    
    # Parse game date
    # game_logs['GAME_DATE'] = pd.to_datetime(game_logs['GAME_DATE'])
    
    # # Add W column (1 for win, 0 for loss)
    # game_logs['W'] = (game_logs['WL'] == 'W').astype(int)
    
    # # Determine home/away from MATCHUP column
    # game_logs['IS_HOME'] = game_logs['MATCHUP'].str.contains('vs.').astype(int)
    
    print(f"\n✓ Loaded {len(draft_data)} total game records")
    print(f"  Unique Players: {draft_data['PERSON_ID'].nunique()}")
    print(f"  Teams: {draft_data['TEAM_ID'].nunique()}")
    
    return draft_data


def addFeaturesToGameLogs(game_logs):
    """
    Add derived features to game logs (rolling averages, win %, etc.)
    Works with data from getGameLogsBulk()
    """
    df = game_logs.copy()
    
    # Sort by team and date for proper rolling calculations
    df = df.sort_values(['TEAM_ID', 'SEASON', 'GAME_DATE']).reset_index(drop=True)
    
    # Calculate Offensive Efficiency: (FGM + AST) / (FGA - OREB + AST + TOV)
    df['OFFENSIVE_EFFICIENCY'] = (df['FGM'] + df['AST']) / (df['FGA'] - df['OREB'] + df['AST'] + df['TOV'])
    
    # Calculate Scoring Margin (need opponent score - will compute after merging home/away)
    # For now, we can use PLUS_MINUS as a proxy
    df['SCORING_MARGIN'] = df['PLUS_MINUS']
    
    # Rolling averages (last 3 games)
    df['ROLLING_OE'] = df.groupby(['TEAM_ID', 'SEASON'])['OFFENSIVE_EFFICIENCY'].transform(
        lambda x: x.rolling(3, min_periods=1).mean()
    )
    df['ROLLING_SCORING_MARGIN'] = df.groupby(['TEAM_ID', 'SEASON'])['SCORING_MARGIN'].transform(
        lambda x: x.rolling(3, min_periods=1).mean()
    )
    
    # Cumulative win percentage
    df['GAMES_PLAYED'] = df.groupby(['TEAM_ID', 'SEASON']).cumcount() + 1
    df['TOTAL_WINS'] = df.groupby(['TEAM_ID', 'SEASON'])['W'].cumsum()
    df['TOTAL_WIN_PCTG'] = df['TOTAL_WINS'] / df['GAMES_PLAYED']
    
    # Home/Away win percentages
    df['HOME_GAMES'] = df.groupby(['TEAM_ID', 'SEASON'])['IS_HOME'].cumsum()
    df['HOME_WINS'] = df.groupby(['TEAM_ID', 'SEASON']).apply(
        lambda x: (x['W'] * x['IS_HOME']).cumsum()
    ).reset_index(level=0, drop=True)
    df['HOME_WIN_PCTG'] = df['HOME_WINS'] / df['HOME_GAMES'].replace(0, np.nan)
    
    df['AWAY_GAMES'] = df['GAMES_PLAYED'] - df['HOME_GAMES']
    df['AWAY_WINS'] = df['TOTAL_WINS'] - df['HOME_WINS']
    df['AWAY_WIN_PCTG'] = df['AWAY_WINS'] / df['AWAY_GAMES'].replace(0, np.nan)
    
    # Rest days
    df['LAST_GAME_DATE'] = df.groupby(['TEAM_ID', 'SEASON'])['GAME_DATE'].shift(1)
    df['NUM_REST_DAYS'] = (df['GAME_DATE'] - df['LAST_GAME_DATE']).dt.days
    
    # Clean up intermediate columns
    df = df.drop(['GAMES_PLAYED', 'TOTAL_WINS', 'HOME_GAMES', 'HOME_WINS', 
                  'AWAY_GAMES', 'AWAY_WINS', 'LAST_GAME_DATE'], axis=1)
    
    return df

In [34]:

seasons_to_load = [2025]

team_game_logs = getTeamGameLogs(seasons_to_load, season_type='Regular Season')
player_game_logs = getPlayerGameLogs(seasons_to_load, season_type='Regular Season')
# draft_data = getPlayerDraftData(seasons_to_load)


display(team_game_logs.head())
display(player_game_logs.head())
# display(draft_data.head())


Loading Team Game Logs for 2025-26 Regular Season... ✓ 1394 records

✓ Loaded 1394 total game records
  Unique games: 697
  Teams: 30
Loading Player Game Logs for 2025-26 Regular Season... ✓ 15137 records

✓ Loaded 15137 total game records
  Unique games: 697
  Teams: 30


Unnamed: 0,SEASON_YEAR,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,...,STL_RANK,BLK_RANK,BLKA_RANK,PF_RANK,PFD_RANK,PTS_RANK,PLUS_MINUS_RANK,AVAILABLE_FLAG,W,IS_HOME
0,2025-26,1610612755,PHI,Philadelphia 76ers,22500666,2026-01-27,PHI vs. MIL,W,48.0,53,...,816,513,416,866,1350,46,166,1,1,1
1,2025-26,1610612752,NYK,New York Knicks,22500665,2026-01-27,NYK vs. SAC,W,48.0,37,...,228,1309,226,186,1129,1152,192,1,1,1
2,2025-26,1610612746,LAC,LA Clippers,22500670,2026-01-27,LAC @ UTA,W,48.0,41,...,324,980,226,481,1022,688,288,1,1,0
3,2025-26,1610612760,OKC,Oklahoma City Thunder,22500667,2026-01-27,OKC vs. NOP,W,48.0,35,...,1267,59,1370,110,530,1126,370,1,1,1
4,2025-26,1610612764,WAS,Washington Wizards,22500664,2026-01-27,WAS vs. POR,W,48.0,40,...,14,5,664,267,1022,688,539,1,1,1


Unnamed: 0,SEASON_YEAR,PLAYER_ID,PLAYER_NAME,NICKNAME,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,...,PLUS_MINUS_RANK,NBA_FANTASY_PTS_RANK,DD2_RANK,TD3_RANK,WNBA_FANTASY_PTS_RANK,AVAILABLE_FLAG,MIN_SEC,TEAM_COUNT,W,IS_HOME
0,2025-26,1642259,Alex Sarr,Alex,1610612764,WAS,Washington Wizards,22500664,2026-01-27,WAS vs. POR,...,1249,42,1,72,72,1,36:07,1,1,1
1,2025-26,1629008,Michael Porter Jr.,Michael,1610612751,BKN,Brooklyn Nets,22500669,2026-01-27,BKN @ PHX,...,7050,449,1104,72,218,1,36:59,1,0,0
2,2025-26,202331,Paul George,Paul,1610612755,PHI,Philadelphia 76ers,22500666,2026-01-27,PHI vs. MIL,...,1845,576,1104,72,255,1,32:58,1,1,1
3,2025-26,1631096,Chet Holmgren,Chet,1610612760,OKC,Oklahoma City Thunder,22500667,2026-01-27,OKC vs. NOP,...,4835,317,1,72,641,1,32:59,1,1,1
4,2025-26,1626167,Myles Turner,Myles,1610612749,MIL,Milwaukee Bucks,22500666,2026-01-27,MIL @ PHI,...,10522,859,1104,72,641,1,35:27,1,0,0


In [37]:
player_game_logs[player_game_logs["PLAYER_NAME"] == "Cooper Flagg"]

Unnamed: 0,SEASON_YEAR,PLAYER_ID,PLAYER_NAME,NICKNAME,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,...,PLUS_MINUS_RANK,NBA_FANTASY_PTS_RANK,DD2_RANK,TD3_RANK,WNBA_FANTASY_PTS_RANK,AVAILABLE_FLAG,MIN_SEC,TEAM_COUNT,W,IS_HOME
463,2025-26,1642843,Cooper Flagg,Cooper,1610612742,DAL,Dallas Mavericks,22500648,2026-01-24,DAL vs. LAL,...,9455,1908,1104,72,2598,1,34:19,1,0,1
746,2025-26,1642843,Cooper Flagg,Cooper,1610612742,DAL,Dallas Mavericks,22500630,2026-01-22,DAL vs. GSW,...,504,2519,1,72,2015,1,30:12,1,1,1
1232,2025-26,1642843,Cooper Flagg,Cooper,1610612742,DAL,Dallas Mavericks,22500016,2026-01-19,DAL @ NYK,...,604,4053,1104,72,3360,1,27:46,1,1,0
2149,2025-26,1642843,Cooper Flagg,Cooper,1610612742,DAL,Dallas Mavericks,22500575,2026-01-14,DAL vs. DEN,...,12543,10629,1104,72,10946,1,14:49,1,0,1
2351,2025-26,1642843,Cooper Flagg,Cooper,1610612742,DAL,Dallas Mavericks,22500561,2026-01-12,DAL vs. BKN,...,4379,449,1104,72,724,1,34:53,1,1,1
2760,2025-26,1642843,Cooper Flagg,Cooper,1610612742,DAL,Dallas Mavericks,22500546,2026-01-10,DAL @ CHI,...,13394,8986,1104,72,7844,1,28:13,1,0,0
3030,2025-26,1642843,Cooper Flagg,Cooper,1610612742,DAL,Dallas Mavericks,22500531,2026-01-08,DAL @ UTA,...,5366,191,1,72,255,1,37:49,1,0,0
3363,2025-26,1642843,Cooper Flagg,Cooper,1610612742,DAL,Dallas Mavericks,22500515,2026-01-06,DAL @ SAC,...,2336,1689,1104,72,1827,1,35:00,1,1,0
3868,2025-26,1642843,Cooper Flagg,Cooper,1610612742,DAL,Dallas Mavericks,22500491,2026-01-03,DAL vs. HOU,...,4835,2905,1104,72,3904,1,34:37,1,1,1
4218,2025-26,1642843,Cooper Flagg,Cooper,1610612742,DAL,Dallas Mavericks,22500473,2026-01-01,DAL vs. PHI,...,7735,3529,1104,72,4224,1,36:40,1,0,1


In [38]:
import requests
import pandas as pd

def getDraftBoardFromContentAPI(year):
    """
    Fetch draft board from NBA's Content API.
    Has birthday, height, weight, school, position, etc.
    """
    url = f"https://content-api-prod.nba.com/public/1/leagues/nba/draft/{year}/board"
    
    headers = {
        'User-Agent': 'Mozilla/5.0',
        'Accept': 'application/json'
    }
    
    response = requests.get(url, headers=headers)
    data = response.json()
    
    # Extract picks from first and second round
    picks = []
    
    for round_name in ['firstRound', 'secondRound']:
        round_picks = data['results']['picks'].get(round_name, [])
        for pick in round_picks:
            prospect = pick.get('prospect', {})
            team = pick.get('team', {})
            
            picks.append({
                'PICK_NUMBER': pick.get('pickNumber'),
                'PLAYER_NAME': prospect.get('displayName'),
                'FIRST_NAME': prospect.get('firstName'),
                'LAST_NAME': prospect.get('lastName'),
                'POSITION': prospect.get('position'),
                'HEIGHT': prospect.get('height', {}).get('feetAndInches'),
                'HEIGHT_INCHES': prospect.get('height', {}).get('inchesOnly'),
                'WEIGHT': prospect.get('weightLbs'),
                'SCHOOL': prospect.get('school'),
                'BIRTHDAY': prospect.get('birthday'),
                'COUNTRY': prospect.get('country'),
                'TEAM_ID': team.get('teamId'),
                'TEAM_NAME': team.get('teamName'),
                'TEAM_ABBR': team.get('teamAbbr'),
                'SEASON': year
            })
    
    return pd.DataFrame(picks)

# Usage:
draft_2025 = getDraftBoardFromContentAPI(2025)
draft_2025.head()

{'pickNumber': 1, 'pickDetails': '', 'team': {'id': 1748283, 'type': 'draft_team', 'season': 2025, 'teamId': 1610612742, 'teamRecord': {'season': '2024-25', 'winsAndLosses': '39-43', 'seasonFinish': '3rd place, Southwest Division', 'playoffsFinish': 'Lost in SoFi Play-In Tournament'}, 'permalink': 'https://www.nba.com/draft/2025/team-profiles/2025-dallas-mavericks', 'appUrl': 'gametime://webview?url=https://www.nba.com/webview/draft/2025/team-profiles/2025-dallas-mavericks?hidenav=true', 'tradeDetails': [], 'teamName': 'Dallas Mavericks', 'teamAbbr': 'DAL', 'pickedFirstRound': True}, 'prospect': {'id': 1753201, 'type': 'draft_prospect', 'season': 2025, 'displayName': 'Cooper Flagg', 'firstName': 'Cooper', 'lastName': 'Flagg', 'permalink': 'https://www.nba.com/draft/2025/prospects/cooper-flagg', 'appUrl': 'gametime://webview?url=https://www.nba.com/webview/draft/2025/prospects/cooper-flagg?hidenav=true', 'position': 'F', 'height': {'feetAndInches': '6-8', 'inchesOnly': 80}, 'weightLbs':

Unnamed: 0,PICK_NUMBER,PLAYER_NAME,FIRST_NAME,LAST_NAME,POSITION,HEIGHT,HEIGHT_INCHES,WEIGHT,SCHOOL,BIRTHDAY,COUNTRY,TEAM_ID,TEAM_NAME,TEAM_ABBR,SEASON
0,1,Cooper Flagg,Cooper,Flagg,F,6-8,80,221,Duke,12/21/2006,United States,1610612742,Dallas Mavericks,DAL,2025
1,2,Dylan Harper,Dylan,Harper,G,6-5,77,213,Rutgers,03/02/2006,United States,1610612759,San Antonio Spurs,SAS,2025
2,3,VJ Edgecombe,VJ,Edgecombe,G,6-4,76,193,Baylor,07/30/2005,Bahamas,1610612755,Philadelphia 76ers,PHI,2025
3,4,Kon Knueppel,Kon,Knueppel,G-F,6-7,79,217,Duke,08/03/2005,United States,1610612766,Charlotte Hornets,CHA,2025
4,5,Ace Bailey,Ace,Bailey,F,6-8,80,203,Rutgers,08/13/2006,United States,1610612762,Utah Jazz,UTA,2025


In [12]:
player_game_logs.columns.to_list()


['SEASON_YEAR',
 'PLAYER_ID',
 'PLAYER_NAME',
 'NICKNAME',
 'TEAM_ID',
 'TEAM_ABBREVIATION',
 'TEAM_NAME',
 'GAME_ID',
 'GAME_DATE',
 'MATCHUP',
 'WL',
 'MIN',
 'FGM',
 'FGA',
 'FG_PCT',
 'FG3M',
 'FG3A',
 'FG3_PCT',
 'FTM',
 'FTA',
 'FT_PCT',
 'OREB',
 'DREB',
 'REB',
 'AST',
 'TOV',
 'STL',
 'BLK',
 'BLKA',
 'PF',
 'PFD',
 'PTS',
 'PLUS_MINUS',
 'NBA_FANTASY_PTS',
 'DD2',
 'TD3',
 'WNBA_FANTASY_PTS',
 'GP_RANK',
 'W_RANK',
 'L_RANK',
 'W_PCT_RANK',
 'MIN_RANK',
 'FGM_RANK',
 'FGA_RANK',
 'FG_PCT_RANK',
 'FG3M_RANK',
 'FG3A_RANK',
 'FG3_PCT_RANK',
 'FTM_RANK',
 'FTA_RANK',
 'FT_PCT_RANK',
 'OREB_RANK',
 'DREB_RANK',
 'REB_RANK',
 'AST_RANK',
 'TOV_RANK',
 'STL_RANK',
 'BLK_RANK',
 'BLKA_RANK',
 'PF_RANK',
 'PFD_RANK',
 'PTS_RANK',
 'PLUS_MINUS_RANK',
 'NBA_FANTASY_PTS_RANK',
 'DD2_RANK',
 'TD3_RANK',
 'WNBA_FANTASY_PTS_RANK',
 'AVAILABLE_FLAG',
 'MIN_SEC',
 'TEAM_COUNT',
 'W',
 'IS_HOME']

In [5]:
# Get Season Schedule Function 

def getSeasonScheduleFrame(seasons,seasonType): 

    
    def getGameDate(matchup):
        return matchup.partition(' at')[0][:10]

    def getHomeTeam(matchup):
        return matchup.partition(' at')[2]

    def getAwayTeam(matchup):
        return matchup.partition(' at')[0][10:]

    def getTeamIDFromNickname(nickname):
        return teamLookup.loc[teamLookup['nickname'] == difflib.get_close_matches(nickname,teamLookup['nickname'],1)[0]].values[0][0] 
    
    @retry
    def getRegularSeasonSchedule(season,teamID,seasonType):
        season = str(season) + "-" + str(season+1)[-2:]
        teamGames = cumestatsteamgames.CumeStatsTeamGames(league_id = '00',season = season ,
                                                                      season_type_all_star=seasonType,
                                                                      team_id = teamID).get_normalized_json()

        teamGames = pd.DataFrame(json.loads(teamGames)['CumeStatsTeamGames'])
        teamGames['SEASON'] = season
        return teamGames    
    
    teamLookup = pd.DataFrame(teams.get_teams())
    
    scheduleFrames = [None] *  len(teamLookup['id'])

    for season in seasons:
        for i, id in enumerate(teamLookup['id']):
            time.sleep(1)
            scheduleFrames[i] = getRegularSeasonSchedule(season,id,seasonType)
    
    scheduleFrame = pd.concat(scheduleFrames)
    
    scheduleFrame['GAME_DATE'] = pd.to_datetime(scheduleFrame['MATCHUP'].map(getGameDate))
    scheduleFrame['HOME_TEAM_NICKNAME'] = scheduleFrame['MATCHUP'].map(getHomeTeam)
    scheduleFrame['HOME_TEAM_ID'] = scheduleFrame['HOME_TEAM_NICKNAME'].map(getTeamIDFromNickname)
    scheduleFrame['AWAY_TEAM_NICKNAME'] = scheduleFrame['MATCHUP'].map(getAwayTeam)
    scheduleFrame['AWAY_TEAM_ID'] = scheduleFrame['AWAY_TEAM_NICKNAME'].map(getTeamIDFromNickname)
    scheduleFrame = scheduleFrame.drop_duplicates() # There's a row for both teams, only need 1
    scheduleFrame = scheduleFrame.reset_index(drop=True)
            
    return scheduleFrame





In [6]:
# Get Single Game aggregation columns

def getSingleGameMetrics(gameID,homeTeamID,awayTeamID,awayTeamNickname,seasonYear,gameDate):

    @retry
    def getGameStats(teamID,gameID,seasonYear):
        #season = str(seasonYear) + "-" + str(seasonYear+1)[-2:]
        gameStats = cumestatsteam.CumeStatsTeam(game_ids=gameID,league_id ="00",
                                               season=seasonYear,season_type_all_star="Regular Season",
                                               team_id = teamID).get_normalized_json()

        gameStats = pd.DataFrame(json.loads(gameStats)['TotalTeamStats'])

        return gameStats

    data = getGameStats(homeTeamID,gameID,seasonYear)
    data.at[1,'NICKNAME'] = awayTeamNickname
    data.at[1,'TEAM_ID'] = awayTeamID
    data.at[1,'OFFENSIVE_EFFICIENCY'] = (data.at[1,'FG'] + data.at[1,'AST'])/(data.at[1,'FGA'] - data.at[1,'OFF_REB'] + data.at[1,'AST'] + data.at[1,'TOTAL_TURNOVERS'])
    data.at[1,'SCORING_MARGIN'] = data.at[1,'PTS'] - data.at[0,'PTS']

    data.at[0,'OFFENSIVE_EFFICIENCY'] = (data.at[0,'FG'] + data.at[0,'AST'])/(data.at[0,'FGA'] - data.at[0,'OFF_REB'] + data.at[0,'AST'] + data.at[0,'TOTAL_TURNOVERS'])
    data.at[0,'SCORING_MARGIN'] = data.at[0,'PTS'] - data.at[1,'PTS']

    data['SEASON'] = seasonYear
    data['GAME_DATE'] = gameDate
    data['GAME_ID'] = gameID

    return data

In [7]:
def getGameLogs(gameLogs,scheduleFrame):
    
    # Functions to prepare additional columns after gameLogs table loads
    def getHomeAwayFlag(gameDF):
        gameDF['HOME_FLAG'] = np.where((gameDF['W_HOME']==1) | (gameDF['L_HOME']==1),1,0)
        gameDF['AWAY_FLAG'] = np.where((gameDF['W_ROAD']==1) | (gameDF['L_ROAD']==1),1,0)
        #return gameDF 

    def getTotalWinPctg(gameDF):
        gameDF['TOTAL_GAMES_PLAYED'] = gameDF.groupby(['TEAM_ID','SEASON'])['GAME_DATE'].rank(ascending=True)
        gameDF['TOTAL_WINS'] = gameDF.sort_values(by='GAME_DATE').groupby(['TEAM_ID','SEASON'])['W'].cumsum()
        gameDF['TOTAL_WIN_PCTG'] = gameDF['TOTAL_WINS']/gameDF['TOTAL_GAMES_PLAYED']
        return gameDF.drop(['TOTAL_GAMES_PLAYED','TOTAL_WINS'],axis=1)

    def getHomeWinPctg(gameDF):
        gameDF['HOME_GAMES_PLAYED'] = gameDF.sort_values(by='GAME_DATE').groupby(['TEAM_ID','SEASON'])['HOME_FLAG'].cumsum()
        gameDF['HOME_WINS'] = gameDF.sort_values(by='GAME_DATE').groupby(['TEAM_ID','SEASON'])['W_HOME'].cumsum()
        gameDF['HOME_WIN_PCTG'] = gameDF['HOME_WINS']/gameDF['HOME_GAMES_PLAYED']
        return gameDF.drop(['HOME_GAMES_PLAYED','HOME_WINS'],axis=1)

    def getAwayWinPctg(gameDF):
        gameDF['AWAY_GAMES_PLAYED'] = gameDF.sort_values(by='GAME_DATE').groupby(['TEAM_ID','SEASON'])['AWAY_FLAG'].cumsum()
        gameDF['AWAY_WINS'] = gameDF.sort_values(by='GAME_DATE').groupby(['TEAM_ID','SEASON'])['W_ROAD'].cumsum()
        gameDF['AWAY_WIN_PCTG'] = gameDF['AWAY_WINS']/gameDF['AWAY_GAMES_PLAYED']
        return gameDF.drop(['AWAY_GAMES_PLAYED','AWAY_WINS'],axis=1)

    def getRollingOE(gameDF):
        gameDF['ROLLING_OE'] = gameDF.sort_values(by='GAME_DATE').groupby(['TEAM_ID','SEASON'])['OFFENSIVE_EFFICIENCY'].transform(lambda x: x.rolling(3, 1).mean())

    def getRollingScoringMargin(gameDF):
        gameDF['ROLLING_SCORING_MARGIN'] = gameDF.sort_values(by='GAME_DATE').groupby(['TEAM_ID','SEASON'])['SCORING_MARGIN'].transform(lambda x: x.rolling(3, 1).mean())

    def getRestDays(gameDF):
        gameDF['LAST_GAME_DATE'] = gameDF.sort_values(by='GAME_DATE').groupby(['TEAM_ID','SEASON'])['GAME_DATE'].shift(1)
        gameDF['NUM_REST_DAYS'] = (gameDF['GAME_DATE'] - gameDF['LAST_GAME_DATE'])/np.timedelta64(1,'D') 
        return gameDF.drop('LAST_GAME_DATE',axis=1)
    
    start = time.perf_counter_ns()

    i = int(len(gameLogs)/2) #Can use a previously completed gameLog datasetn

    gameLogs_lst = [None] * len(scheduleFrame)

    while i<len(scheduleFrame):
        time.sleep(1)
         
        try:
            gameLogs_lst[i] = getSingleGameMetrics(
                scheduleFrame.at[i,'GAME_ID'],
                scheduleFrame.at[i,'HOME_TEAM_ID'],
                scheduleFrame.at[i,'AWAY_TEAM_ID'],
                scheduleFrame.at[i,'AWAY_TEAM_NICKNAME'],
                scheduleFrame.at[i,'SEASON'],
                scheduleFrame.at[i,'GAME_DATE']
            )
        except Exception as e:
            print(f"Error at i={i}: {e}")
            break
    
        # gameLogs =  pd.concat([gameLogs,getSingleGameMetrics(scheduleFrame.at[i,'GAME_ID'],scheduleFrame.at[i,'HOME_TEAM_ID'],
        #                  scheduleFrame.at[i,'AWAY_TEAM_ID'],scheduleFrame.at[i,'AWAY_TEAM_NICKNAME'],
        #                  scheduleFrame.at[i,'SEASON'],scheduleFrame.at[i,'GAME_DATE'])])
        
        # gameLogs = gameLogs.reset_index(drop=True)

        end = time.perf_counter_ns()

        if i%5==0:
            print(f"Processing game {i} of {len(scheduleFrame)}")
        if i%100 == 0:
            mins = ((end-start)/1e9)/60
            print(i,mins)
            if i != 0:
                pd.concat([df for df in gameLogs_lst if isinstance(df, pd.DataFrame)]).to_pickle(f'gameLogs_checkpoint_{i}.pkl')

        i+=1
    

    gameLogs = pd.concat([df for df in gameLogs_lst if isinstance(df, pd.DataFrame)])
    gameLogs = gameLogs.reset_index(drop=True)

    # Get Table Level Aggregation Columns
    getHomeAwayFlag(gameLogs)
    gameLogs = getHomeWinPctg(gameLogs)
    gameLogs = getAwayWinPctg(gameLogs)
    gameLogs = getTotalWinPctg(gameLogs)
    getRollingScoringMargin(gameLogs)
    getRollingOE(gameLogs)
    gameLogs = getRestDays(gameLogs)

    return gameLogs.reset_index(drop=True)

In [9]:
#Get ScheduleFrame

seasons = [2020] #,2021,2022]
seasonType = 'Regular Season'

start = time.perf_counter_ns()
scheduleFrame = getSeasonScheduleFrame(seasons,seasonType)
end = time.perf_counter_ns()

secs = (end-start)/1e9
mins = secs/60
print(mins)

1.54762989445


In [13]:
# Get a batch of game IDs from your scheduleFrame
game_id_batch = scheduleFrame['GAME_ID'].iloc[0:10].tolist()

# # Try comma-separated
# game_ids_str = ','.join(game_id_batch)

# Or try pipe-separated  
game_ids_str = '|'.join(game_id_batch)

# Test the call
result = cumestatsteam.CumeStatsTeam(
    game_ids=game_ids_str,
    team_id=scheduleFrame.at[0, 'HOME_TEAM_ID'],
    season='2023-24',
    season_type_all_star='Regular Season'
)

# print(pd.DataFrame(json.loads(result)['TotalTeamStats']))
print(json.loads(result.get_normalized_json())['TotalTeamStats'])


# gameStats = cumestatsteam.CumeStatsTeam(game_ids=gameID,league_id ="00",
#                                                season=seasonYear,season_type_all_star="Regular Season",
#                                                team_id = teamID).get_normalized_json()

#         gameStats = pd.DataFrame(json.loads(gameStats)['TotalTeamStats'])

[{'CITY': 'Atlanta', 'NICKNAME': 'Hawks', 'TEAM_ID': 1610612737, 'W': 7, 'L': 3, 'W_HOME': 7, 'L_HOME': 0, 'W_ROAD': 0, 'L_ROAD': 3, 'TEAM_TURNOVERS': 4, 'TEAM_REBOUNDS': 63, 'GP': 10, 'GS': 50, 'ACTUAL_MINUTES': 2400, 'ACTUAL_SECONDS': 0, 'FG': 425, 'FGA': 865, 'FG_PCT': 0.491, 'FG3': 119, 'FG3A': 307, 'FG3_PCT': 0.388, 'FT': 195, 'FTA': 241, 'FT_PCT': 0.809, 'OFF_REB': 102, 'DEF_REB': 375, 'TOT_REB': 540, 'AST': 245, 'PF': 188, 'STL': 66, 'TOTAL_TURNOVERS': 128, 'BLK': 54, 'PTS': 1164, 'AVG_REB': 54.0, 'AVG_PTS': 116.4, 'DQ': 0}, {'CITY': 'OPPONENTS', 'NICKNAME': ' ', 'TEAM_ID': 0, 'W': 3, 'L': 7, 'W_HOME': 3, 'L_HOME': 0, 'W_ROAD': 0, 'L_ROAD': 7, 'TEAM_TURNOVERS': 4, 'TEAM_REBOUNDS': 64, 'GP': 10, 'GS': 50, 'ACTUAL_MINUTES': 2400, 'ACTUAL_SECONDS': 0, 'FG': 428, 'FGA': 923, 'FG_PCT': 0.464, 'FG3': 121, 'FG3A': 335, 'FG3_PCT': 0.361, 'FT': 151, 'FTA': 192, 'FT_PCT': 0.786, 'OFF_REB': 103, 'DEF_REB': 315, 'TOT_REB': 482, 'AST': 263, 'PF': 190, 'STL': 87, 'TOTAL_TURNOVERS': 109, 'BLK'

In [20]:
#Example Output of Single Game Metrics
getSingleGameMetrics(scheduleFrame.at[104,'GAME_ID'],scheduleFrame.at[104,'HOME_TEAM_ID'],
                     scheduleFrame.at[104,'AWAY_TEAM_ID'],scheduleFrame.at[104,'AWAY_TEAM_NICKNAME'],
                     scheduleFrame.at[104,'SEASON'],scheduleFrame.at[104,'GAME_DATE'])

Unnamed: 0,CITY,NICKNAME,TEAM_ID,W,L,W_HOME,L_HOME,W_ROAD,L_ROAD,TEAM_TURNOVERS,...,BLK,PTS,AVG_REB,AVG_PTS,DQ,OFFENSIVE_EFFICIENCY,SCORING_MARGIN,SEASON,GAME_DATE,GAME_ID
0,Cleveland,Cavaliers,1610612739,1,0,1,0,0,0,2,...,3,117,49.0,117.0,0,0.588785,7.0,2020-21,2021-03-17,22000620
1,OPPONENTS,Celtics,1610612738,0,1,0,0,0,1,0,...,9,110,48.0,110.0,0,0.567308,-7.0,2020-21,2021-03-17,22000620


In [30]:
#Create the gameLogs DataFrame
gameLogs = pd.DataFrame()
gameLogs = getGameLogs(gameLogs,scheduleFrame)
# gameLogs.to_csv('gameLogs.csv')

Processing game 0 of 1080
0 0.01909224375
Processing game 5 of 1080
Processing game 10 of 1080
Processing game 15 of 1080
Processing game 20 of 1080
Processing game 25 of 1080
Processing game 30 of 1080
Processing game 35 of 1080
Processing game 40 of 1080
Processing game 45 of 1080
Processing game 50 of 1080
Processing game 55 of 1080
Processing game 60 of 1080
Processing game 65 of 1080
Processing game 70 of 1080
Processing game 75 of 1080
Processing game 80 of 1080
Processing game 85 of 1080
Attempt 1 failed: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)
Processing game 90 of 1080
Processing game 95 of 1080
Processing game 100 of 1080
100 7.865775867366667
Processing game 105 of 1080
Processing game 110 of 1080
Processing game 115 of 1080
Processing game 120 of 1080
Processing game 125 of 1080
Processing game 130 of 1080
Processing game 135 of 1080
Processing game 140 of 1080
Processing game 145 of 1080
Processing game 150 of 1080
Processing 

In [9]:
#Example Output of Game Logs
gameLogs[(gameLogs['TEAM_ID'] == 1610612737 ) & (gameLogs['SEASON'] == '2022-23')].sort_values('GAME_DATE')

Unnamed: 0,CITY,NICKNAME,TEAM_ID,W,L,W_HOME,L_HOME,W_ROAD,L_ROAD,TEAM_TURNOVERS,...,GAME_DATE,GAME_ID,HOME_FLAG,AWAY_FLAG,HOME_WIN_PCTG,AWAY_WIN_PCTG,TOTAL_WIN_PCTG,ROLLING_SCORING_MARGIN,ROLLING_OE,NUM_REST_DAYS
4696,Atlanta,Hawks,1610612737,1,0,1,0,0,0,0,...,2022-10-19,22200005,1,0,1.0,,1.0,10.0,0.6,
4694,Atlanta,Hawks,1610612737,1,0,1,0,0,0,0,...,2022-10-21,22200020,1,0,1.0,,1.0,10.0,0.568293,2.0
4692,Atlanta,Hawks,1610612737,0,1,0,1,0,0,0,...,2022-10-23,22200038,1,0,0.666667,,0.666667,1.0,0.560148,2.0
4691,OPPONENTS,Hawks,1610612737,1,0,0,0,1,0,0,...,2022-10-26,22200057,0,1,0.666667,1.0,0.75,-0.666667,0.554351,3.0
4689,OPPONENTS,Hawks,1610612737,1,0,0,0,1,0,1,...,2022-10-28,22200070,0,1,0.666667,1.0,0.8,4.0,0.604823,2.0
4687,OPPONENTS,Hawks,1610612737,0,1,0,0,0,1,1,...,2022-10-29,22200085,0,1,0.666667,0.666667,0.666667,7.0,0.610671,1.0
4685,OPPONENTS,Hawks,1610612737,0,1,0,0,0,1,0,...,2022-10-31,22200099,0,1,0.666667,0.5,0.571429,-4.666667,0.59804,2.0
4683,OPPONENTS,Hawks,1610612737,1,0,0,0,1,0,2,...,2022-11-02,22200110,0,1,0.666667,0.6,0.625,-8.333333,0.557683,2.0
4680,Atlanta,Hawks,1610612737,1,0,1,0,0,0,0,...,2022-11-05,22200134,1,0,0.75,0.6,0.666667,-4.666667,0.559942,3.0
4678,Atlanta,Hawks,1610612737,1,0,1,0,0,0,0,...,2022-11-07,22200149,1,0,0.8,0.6,0.7,11.666667,0.566895,2.0


In [10]:
def getGameLogFeatureSet(gameDF):

    def shiftGameLogRecords(gameDF):
        gameDF['LAST_GAME_OE'] = gameLogs.sort_values('GAME_DATE').groupby(['TEAM_ID','SEASON'])['OFFENSIVE_EFFICIENCY'].shift(1)
        gameDF['LAST_GAME_HOME_WIN_PCTG'] = gameDF.sort_values('GAME_DATE').groupby(['TEAM_ID','SEASON'])['HOME_WIN_PCTG'].shift(1)
        gameDF['LAST_GAME_AWAY_WIN_PCTG'] = gameDF.sort_values('GAME_DATE').groupby(['TEAM_ID','SEASON'])['AWAY_WIN_PCTG'].shift(1)
        gameDF['LAST_GAME_TOTAL_WIN_PCTG'] = gameDF.sort_values('GAME_DATE').groupby(['TEAM_ID','SEASON'])['TOTAL_WIN_PCTG'].shift(1)
        gameDF['LAST_GAME_ROLLING_SCORING_MARGIN'] = gameDF.sort_values('GAME_DATE').groupby(['TEAM_ID','SEASON'])['ROLLING_SCORING_MARGIN'].shift(1)
        gameDF['LAST_GAME_ROLLING_OE'] = gameDF.sort_values('GAME_DATE').groupby(['TEAM_ID','SEASON'])['ROLLING_OE'].shift(1)
    
    
    def getHomeTeamFrame(gameDF):
        homeTeamFrame = gameDF[gameDF['CITY'] != 'OPPONENTS']
        homeTeamFrame = homeTeamFrame[['LAST_GAME_OE','LAST_GAME_HOME_WIN_PCTG','NUM_REST_DAYS','LAST_GAME_AWAY_WIN_PCTG','LAST_GAME_TOTAL_WIN_PCTG','LAST_GAME_ROLLING_SCORING_MARGIN','LAST_GAME_ROLLING_OE','W','TEAM_ID','GAME_ID','SEASON']]

        colRenameDict = {}
        for col in homeTeamFrame.columns:
            if (col != 'GAME_ID') & (col != 'SEASON') :
                colRenameDict[col] = 'HOME_' + col 

        homeTeamFrame.rename(columns=colRenameDict,inplace=True)

        return homeTeamFrame

    def getAwayTeamFrame(gameDF):
        awayTeamFrame = gameDF[gameDF['CITY'] == 'OPPONENTS']
        awayTeamFrame = awayTeamFrame[['LAST_GAME_OE','LAST_GAME_HOME_WIN_PCTG','NUM_REST_DAYS','LAST_GAME_AWAY_WIN_PCTG','LAST_GAME_TOTAL_WIN_PCTG','LAST_GAME_ROLLING_SCORING_MARGIN','LAST_GAME_ROLLING_OE','TEAM_ID','GAME_ID','SEASON']]

        colRenameDict = {}
        for col in awayTeamFrame.columns:
            if (col != 'GAME_ID') & (col != 'SEASON'):
                colRenameDict[col] = 'AWAY_' + col 

        awayTeamFrame.rename(columns=colRenameDict,inplace=True)

        return awayTeamFrame
    
    shiftGameLogRecords(gameLogs)
    awayTeamFrame = getAwayTeamFrame(gameLogs)
    homeTeamFrame = getHomeTeamFrame(gameLogs)
    
    return pd.merge(homeTeamFrame, awayTeamFrame, how="inner", on=[ "GAME_ID","SEASON"]).drop(['GAME_ID','AWAY_TEAM_ID','HOME_TEAM_ID'],axis=1)

In [11]:
modelData = getGameLogFeatureSet(gameLogs)

In [12]:
# Final Data Set before Train,Test, Validation Split
modelData

Unnamed: 0,HOME_LAST_GAME_OE,HOME_LAST_GAME_HOME_WIN_PCTG,HOME_NUM_REST_DAYS,HOME_LAST_GAME_AWAY_WIN_PCTG,HOME_LAST_GAME_TOTAL_WIN_PCTG,HOME_LAST_GAME_ROLLING_SCORING_MARGIN,HOME_LAST_GAME_ROLLING_OE,HOME_W,SEASON,AWAY_LAST_GAME_OE,AWAY_LAST_GAME_HOME_WIN_PCTG,AWAY_NUM_REST_DAYS,AWAY_LAST_GAME_AWAY_WIN_PCTG,AWAY_LAST_GAME_TOTAL_WIN_PCTG,AWAY_LAST_GAME_ROLLING_SCORING_MARGIN,AWAY_LAST_GAME_ROLLING_OE
0,0.555556,0.685714,3.0,0.444444,0.563380,9.333333,0.571405,1,2020-21,0.612903,0.250000,2.0,0.228571,0.239437,-2.000000,0.605315
1,0.573913,0.676471,1.0,0.444444,0.557143,-0.666667,0.589472,1,2020-21,0.500000,0.305556,2.0,0.303030,0.304348,-18.000000,0.512228
2,0.584746,0.666667,2.0,0.444444,0.550725,8.666667,0.640592,1,2020-21,0.612403,0.500000,2.0,0.428571,0.463768,0.666667,0.567718
3,0.609756,0.656250,4.0,0.444444,0.544118,11.333333,0.653327,1,2020-21,0.539568,0.500000,2.0,0.441176,0.470588,0.666667,0.570866
4,0.512397,0.354839,1.0,0.558824,0.461538,11.000000,0.596498,1,2020-21,0.727273,0.656250,1.0,0.457143,0.552239,17.333333,0.633081
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2900,0.578512,0.333333,4.0,0.250000,0.285714,-9.000000,0.565270,1,2022-23,0.537037,0.666667,2.0,0.300000,0.500000,0.000000,0.622617
2901,0.540146,0.600000,2.0,0.500000,0.562500,3.333333,0.556248,1,2022-23,0.511278,0.142857,2.0,0.300000,0.235294,-5.000000,0.555121
2902,0.490909,0.250000,2.0,0.333333,0.300000,-17.666667,0.488297,0,2022-23,0.568966,0.400000,1.0,0.400000,0.400000,-12.666667,0.565603
2903,0.531250,1.000000,2.0,0.500000,0.666667,-0.333333,0.547383,1,2022-23,0.533898,1.000000,3.0,0.000000,0.333333,-9.666667,0.549413


In [13]:
modelData.to_csv('nbaHomeWinLossModelDataset.csv')