In [25]:
#.............Get all stars..........#

import pandas as pd
from nba_api.stats.endpoints import leaguegamefinder, boxscoretraditionalv2
from nba_api.stats.library.parameters import SeasonTypeAllStar

def get_all_stars(season):
    """
    Returns a list of Player IDs who played in the All-Star Game for a given season.
    """
    print(f"Finding All-Star Game for {season}...")

    # 1. Find the All-Star Game ID
    gamefinder = leaguegamefinder.LeagueGameFinder(
        season_nullable= season,
        season_type_nullable= SeasonTypeAllStar.all_star
    )
    games = gamefinder.get_data_frames()[0]

    # Filter for the main All-Star Game (exclude Rising Stars, Celebrity game, etc.)
    # The main game usually has the highest Matchup count or specific Team abbreviations
    # A safe bet is looking for 'East'/'West' or Captain names (Team LeBron)
    # But usually, it's the game with the most FG_FGA (shot attempts) or specific ID pattern
    if games.empty:
        print("No All-Star game found (yet).")
        return []

    # The All-Star game is usually the last one in the list if sorted by date
    # GAME_ID usually starts with '003'
    all_star_game = games[games['GAME_ID'].astype(str).str.startswith('003')].iloc[0]
    game_id = all_star_game['GAME_ID']
    print(f"Found Game ID: {game_id} ({all_star_game['MATCHUP']})")

    # 2. Get the Box Score (The Roster)
    boxscore = boxscoretraditionalv2.BoxScoreTraditionalV2(game_id=game_id)
    players = boxscore.player_stats.get_data_frame()

    # 3. Return relevant info
    all_star_list = players[['PLAYER_ID', 'PLAYER_NAME']]
    return all_star_list

def get2024_all_stars():
    boxscore1 = boxscoretraditionalv2.BoxScoreTraditionalV2(game_id="0032400011")
    players1 = boxscore1.player_stats.get_data_frame()
    all_star_list1 = players1[['PLAYER_ID', 'PLAYER_NAME']]

    boxscore2 = boxscoretraditionalv2.BoxScoreTraditionalV2(game_id="0032400021")
    players2 = boxscore2.player_stats.get_data_frame()
    all_star_list2 = players2[players2['TEAM_ABBREVIATION']=="SHQ"][['PLAYER_ID', 'PLAYER_NAME']]

    allstars= pd.concat(
    [
        all_star_list1,
        all_star_list2
    ],
    axis=0,
    ignore_index=True
)

    return allstars

# Usage
df_stars22 = get_all_stars(season='2022-23')
df_stars23 = get_all_stars(season='2023-24')
df_stars24= get2024_all_stars()

allstars= pd.concat(
    [
        df_stars22,
        df_stars23,
        df_stars24,
    ],
    axis=0,
    ignore_index=True
)
allstars = allstars.drop_duplicates(subset=["PLAYER_ID"])


Finding All-Star Game for 2022-23...
Found Game ID: 0032200001 (LBN vs. GNS)
Finding All-Star Game for 2023-24...
Found Game ID: 0032300001 (EST vs. WST)


In [27]:
allstars.to_csv("allstars.csv", index= False)

In [2]:
#GET ALL STARS GAMELOG

import pandas as pd
import time
from nba_api.stats.endpoints import PlayerGameLog


SEASONS = ["2023-24", "2024-25", "2025-26"]
SLEEP_TIME = 0.6

# Load Player IDs
players_df = pd.read_csv("allstars.csv")
player_ids = players_df["PLAYER_ID"].unique()

all_games = []

# Fetch Game Logs
for player_id in player_ids:
    for season in SEASONS:
        try:
            gamelog = PlayerGameLog(
                player_id=player_id,
                season=season,
                season_type_all_star='Regular Season'
            )

            df = gamelog.get_data_frames()[0]
            df = df[["Game_ID", "MATCHUP"]]
            df["PLAYER_ID"] = player_id
            df["SEASON"] = season

            all_games.append(df)

            time.sleep(SLEEP_TIME)

        except Exception as e:
            print(f"Failed for PlayerID {player_id}, Season {season}: {e}")


final_df = pd.concat(all_games, ignore_index=True)
final_df.to_csv("allstars_gamelogs.csv", index=False)


In [33]:
#Added some rivalries later
rivalries= pd.read_csv("rivalries.csv")

#convert strings to lists
import ast

rivalries["rivals_id"] = rivalries["rivals_id"].apply(ast.literal_eval)
rivalries["rivals_abbreviation"] = rivalries["rivals_abbreviation"].apply(ast.literal_eval)

In [42]:
#.............Get schedule info..........#

#Got game times and national tv games by scraping the websites. But you can get all of this info just from the scheduleleaguev2 endpoint
#for 25/26 use this code to get day and time (I already scraped nat tv)

from nba_api.stats.endpoints import scheduleleaguev2
schedule25= pd.read_csv("25_schedule_info.csv")

# Fetch the schedule for the 2025-26 season
schedule = scheduleleaguev2.ScheduleLeagueV2(season='2025-26')
df_schedule = schedule.get_data_frames()[0]

schedule25["GAME_ID"] = "00"+schedule25["GAME_ID"].astype(str)
df_schedule["gameId"] = df_schedule["gameId"].astype(str)
# --- Ensure datetime parsing ---
df_schedule["gameDateTimeEst"] = pd.to_datetime(
    df_schedule["gameDateTimeEst"],
    errors="coerce",
    utc=True
)

# --- Merge schedule info into df_2025 ---
schedule25 = schedule25.merge(
    df_schedule[
        ["gameId", "gameDateTimeEst", "nationalBroadcasters_0_broadcasterScope"]
    ],
    left_on="GAME_ID",
    right_on="gameId",
    how="left"
)

# --- Extract TIME (HH:MM) ---
schedule25["TIME"] = schedule25["gameDateTimeEst"].dt.strftime("%H")

# --- Extract DAY as string ---
schedule25["DAY"] = schedule25["gameDateTimeEst"].dt.day_name().str.upper()

# --- Assign broadcaster scope ---
schedule25["nationalBroadcasters_broadcasterScope"] = (
    schedule25["nationalBroadcasters_0_broadcasterScope"]
)

# --- Optional cleanup ---
schedule25.drop(columns=["gameId", "gameDateTimeEst","nationalBroadcasters_0_broadcasterScope", "nationalBroadcasters_broadcasterScope"], inplace=True)

schedule25.to_csv("25_schedule_info.csv",index=False)

In [54]:
#Combining the schedule info to make processing easier
schedule23= pd.read_csv("23_schedule_info.csv")
schedule24= pd.read_csv("24_schedule_info.csv")

schedule23["TEAM1"] = schedule23["TEAMS"].str.split(r"\s+[@vVsS]+\s+", regex=True).str[0]
schedule23["TEAM2"] = schedule23["TEAMS"].str.split(r"\s+[@vVsS]+\s+", regex=True).str[1]
schedule23.drop(columns=["TEAMS"], inplace=True)
schedule24["GAME_ID"] = "00"+schedule24["GAME_ID"].astype(str)

schedule = pd.concat(
    [schedule25, schedule24, schedule23],
    axis=0,
    ignore_index=True
)
schedule.to_csv("schedule_info.csv",index=False)

In [58]:
#.............Get gamelogs..........#

#THIS IS THE MOST IMPORTANT PART BECAUSE IT WILL BE USED TO GET TEAMS FORM IN THE LAST 15 GAMES COMING INTO THE ACTUAL PREDICTIVE GAME

from nba_api.stats.endpoints import teamgamelogs
import time

#2025
Trad25 = teamgamelogs.TeamGameLogs(
    league_id_nullable="00",
    season_nullable="2025-26",
    season_type_nullable="Regular Season"
).get_data_frames()[0]
time.sleep(1)
Adv25 = teamgamelogs.TeamGameLogs(
    league_id_nullable="00",
    season_nullable="2025-26",
    season_type_nullable="Regular Season",
    measure_type_player_game_logs_nullable='Advanced'
).get_data_frames()[0]
time.sleep(1)
Scoring25 = teamgamelogs.TeamGameLogs(
    league_id_nullable="00",
    season_nullable="2025-26",
    season_type_nullable="Regular Season",
    measure_type_player_game_logs_nullable='Scoring'
).get_data_frames()[0]
time.sleep(1)
Misc25 = teamgamelogs.TeamGameLogs(
    league_id_nullable="00",
    season_nullable="2025-26",
    season_type_nullable="Regular Season",
    measure_type_player_game_logs_nullable='Misc'
).get_data_frames()[0]
time.sleep(1)


#2024
Trad24 = teamgamelogs.TeamGameLogs(
    league_id_nullable="00",
    season_nullable="2024-25",
    season_type_nullable="Regular Season"
).get_data_frames()[0]
time.sleep(1)
Adv24 = teamgamelogs.TeamGameLogs(
    league_id_nullable="00",
    season_nullable="2024-25",
    season_type_nullable="Regular Season",
    measure_type_player_game_logs_nullable='Advanced'
).get_data_frames()[0]
time.sleep(1)
Scoring24 = teamgamelogs.TeamGameLogs(
    league_id_nullable="00",
    season_nullable="2024-25",
    season_type_nullable="Regular Season",
    measure_type_player_game_logs_nullable='Scoring'
).get_data_frames()[0]
time.sleep(1)
Misc24 = teamgamelogs.TeamGameLogs(
    league_id_nullable="00",
    season_nullable="2024-25",
    season_type_nullable="Regular Season",
    measure_type_player_game_logs_nullable='Misc'
).get_data_frames()[0]
time.sleep(1)

#2023
Trad23 = teamgamelogs.TeamGameLogs(
    league_id_nullable="00",
    season_nullable="2023-24",
    season_type_nullable="Regular Season"
).get_data_frames()[0]
time.sleep(1)
Adv23 = teamgamelogs.TeamGameLogs(
    league_id_nullable="00",
    season_nullable="2023-24",
    season_type_nullable="Regular Season",
    measure_type_player_game_logs_nullable='Advanced'
).get_data_frames()[0]
time.sleep(1)
Scoring23 = teamgamelogs.TeamGameLogs(
    league_id_nullable="00",
    season_nullable="2023-24",
    season_type_nullable="Regular Season",
    measure_type_player_game_logs_nullable='Scoring'
).get_data_frames()[0]
time.sleep(1)
Misc23 = teamgamelogs.TeamGameLogs(
    league_id_nullable="00",
    season_nullable="2023-24",
    season_type_nullable="Regular Season",
    measure_type_player_game_logs_nullable='Misc'
).get_data_frames()[0]
time.sleep(1)

#2022
Trad22 = teamgamelogs.TeamGameLogs(
    league_id_nullable="00",
    season_nullable="2022-23",
    season_type_nullable="Regular Season",
    season_segment_nullable="Post All-Star"
).get_data_frames()[0]
time.sleep(1)
Adv22 = teamgamelogs.TeamGameLogs(
    league_id_nullable="00",
    season_nullable="2022-23",
    season_type_nullable="Regular Season",
    season_segment_nullable="Post All-Star",
    measure_type_player_game_logs_nullable='Advanced'
).get_data_frames()[0]
time.sleep(1)
Scoring22 = teamgamelogs.TeamGameLogs(
    league_id_nullable="00",
    season_nullable="2022-23",
    season_type_nullable="Regular Season",
    season_segment_nullable="Post All-Star",
    measure_type_player_game_logs_nullable='Scoring'
).get_data_frames()[0]
time.sleep(1)
Misc22 = teamgamelogs.TeamGameLogs(
    league_id_nullable="00",
    season_nullable="2022-23",
    season_type_nullable="Regular Season",
    season_segment_nullable="Post All-Star",
    measure_type_player_game_logs_nullable='Misc'
).get_data_frames()[0]


In [70]:
#Building boxscores dataframes to be used later for training set building

#2025
logs25 = (
    Trad25[['SEASON_YEAR', 'TEAM_ID', 'TEAM_ABBREVIATION', 'GAME_ID',
       'GAME_DATE', 'MATCHUP', 'WL', 'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M',
       'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST',
       'TOV', 'STL', 'BLK', 'BLKA', 'PF', 'PFD', 'PTS', 'PLUS_MINUS']]
    .merge(Adv25[['SEASON_YEAR', 'TEAM_ID', 'TEAM_ABBREVIATION', 'GAME_ID','E_OFF_RATING', 'OFF_RATING',
       'E_DEF_RATING', 'DEF_RATING', 'E_NET_RATING', 'NET_RATING', 'AST_PCT',
       'AST_TO', 'AST_RATIO', 'OREB_PCT', 'DREB_PCT', 'REB_PCT', 'TM_TOV_PCT',
       'EFG_PCT', 'TS_PCT', 'E_PACE', 'PACE', 'PACE_PER40', 'POSS', 'PIE']], on=["SEASON_YEAR","GAME_ID", "TEAM_ABBREVIATION","TEAM_ID"], how="left")
    .merge(Scoring25[['SEASON_YEAR', 'TEAM_ID', 'TEAM_ABBREVIATION', 'GAME_ID','PCT_FGA_2PT', 'PCT_FGA_3PT',
       'PCT_PTS_2PT', 'PCT_PTS_2PT_MR', 'PCT_PTS_3PT', 'PCT_PTS_FB',
       'PCT_PTS_FT', 'PCT_PTS_OFF_TOV', 'PCT_PTS_PAINT', 'PCT_AST_2PM',
       'PCT_UAST_2PM', 'PCT_AST_3PM', 'PCT_UAST_3PM', 'PCT_AST_FGM',
       'PCT_UAST_FGM']], on=["SEASON_YEAR","GAME_ID", "TEAM_ABBREVIATION","TEAM_ID"], how="left")
    .merge(Misc25[['SEASON_YEAR', 'TEAM_ID', 'TEAM_ABBREVIATION', 'GAME_ID', 'PTS_OFF_TOV', 'PTS_2ND_CHANCE',
       'PTS_FB', 'PTS_PAINT', 'OPP_PTS_OFF_TOV', 'OPP_PTS_2ND_CHANCE',
       'OPP_PTS_FB', 'OPP_PTS_PAINT']], on=["SEASON_YEAR","GAME_ID", "TEAM_ABBREVIATION","TEAM_ID"], how="left")
)

#2024
logs24 = (
    Trad24[['SEASON_YEAR', 'TEAM_ID', 'TEAM_ABBREVIATION', 'GAME_ID',
       'GAME_DATE', 'MATCHUP', 'WL', 'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M',
       'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST',
       'TOV', 'STL', 'BLK', 'BLKA', 'PF', 'PFD', 'PTS', 'PLUS_MINUS']]
    .merge(Adv24[['SEASON_YEAR', 'TEAM_ID', 'TEAM_ABBREVIATION', 'GAME_ID','E_OFF_RATING', 'OFF_RATING',
       'E_DEF_RATING', 'DEF_RATING', 'E_NET_RATING', 'NET_RATING', 'AST_PCT',
       'AST_TO', 'AST_RATIO', 'OREB_PCT', 'DREB_PCT', 'REB_PCT', 'TM_TOV_PCT',
       'EFG_PCT', 'TS_PCT', 'E_PACE', 'PACE', 'PACE_PER40', 'POSS', 'PIE']], on=["SEASON_YEAR","GAME_ID", "TEAM_ABBREVIATION","TEAM_ID"], how="left")
    .merge(Scoring24[['SEASON_YEAR', 'TEAM_ID', 'TEAM_ABBREVIATION', 'GAME_ID','PCT_FGA_2PT', 'PCT_FGA_3PT',
       'PCT_PTS_2PT', 'PCT_PTS_2PT_MR', 'PCT_PTS_3PT', 'PCT_PTS_FB',
       'PCT_PTS_FT', 'PCT_PTS_OFF_TOV', 'PCT_PTS_PAINT', 'PCT_AST_2PM',
       'PCT_UAST_2PM', 'PCT_AST_3PM', 'PCT_UAST_3PM', 'PCT_AST_FGM',
       'PCT_UAST_FGM']], on=["SEASON_YEAR","GAME_ID", "TEAM_ABBREVIATION","TEAM_ID"], how="left")
    .merge(Misc24[['SEASON_YEAR', 'TEAM_ID', 'TEAM_ABBREVIATION', 'GAME_ID', 'PTS_OFF_TOV', 'PTS_2ND_CHANCE',
       'PTS_FB', 'PTS_PAINT', 'OPP_PTS_OFF_TOV', 'OPP_PTS_2ND_CHANCE',
       'OPP_PTS_FB', 'OPP_PTS_PAINT']], on=["SEASON_YEAR","GAME_ID", "TEAM_ABBREVIATION","TEAM_ID"], how="left")
)

#2023
logs23 = (
    Trad23[['SEASON_YEAR', 'TEAM_ID', 'TEAM_ABBREVIATION', 'GAME_ID',
       'GAME_DATE', 'MATCHUP', 'WL', 'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M',
       'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST',
       'TOV', 'STL', 'BLK', 'BLKA', 'PF', 'PFD', 'PTS', 'PLUS_MINUS']]
    .merge(Adv23[['SEASON_YEAR', 'TEAM_ID', 'TEAM_ABBREVIATION', 'GAME_ID','E_OFF_RATING', 'OFF_RATING',
       'E_DEF_RATING', 'DEF_RATING', 'E_NET_RATING', 'NET_RATING', 'AST_PCT',
       'AST_TO', 'AST_RATIO', 'OREB_PCT', 'DREB_PCT', 'REB_PCT', 'TM_TOV_PCT',
       'EFG_PCT', 'TS_PCT', 'E_PACE', 'PACE', 'PACE_PER40', 'POSS', 'PIE']], on=["SEASON_YEAR","GAME_ID", "TEAM_ABBREVIATION","TEAM_ID"], how="left")
    .merge(Scoring23[['SEASON_YEAR', 'TEAM_ID', 'TEAM_ABBREVIATION', 'GAME_ID','PCT_FGA_2PT', 'PCT_FGA_3PT',
       'PCT_PTS_2PT', 'PCT_PTS_2PT_MR', 'PCT_PTS_3PT', 'PCT_PTS_FB',
       'PCT_PTS_FT', 'PCT_PTS_OFF_TOV', 'PCT_PTS_PAINT', 'PCT_AST_2PM',
       'PCT_UAST_2PM', 'PCT_AST_3PM', 'PCT_UAST_3PM', 'PCT_AST_FGM',
       'PCT_UAST_FGM']], on=["SEASON_YEAR","GAME_ID", "TEAM_ABBREVIATION","TEAM_ID"], how="left")
    .merge(Misc23[['SEASON_YEAR', 'TEAM_ID', 'TEAM_ABBREVIATION', 'GAME_ID', 'PTS_OFF_TOV', 'PTS_2ND_CHANCE',
       'PTS_FB', 'PTS_PAINT', 'OPP_PTS_OFF_TOV', 'OPP_PTS_2ND_CHANCE',
       'OPP_PTS_FB', 'OPP_PTS_PAINT']], on=["SEASON_YEAR","GAME_ID", "TEAM_ABBREVIATION","TEAM_ID"], how="left")
)

#2022
logs22 = (
    Trad22[['SEASON_YEAR', 'TEAM_ID', 'TEAM_ABBREVIATION', 'GAME_ID',
       'GAME_DATE', 'MATCHUP', 'WL', 'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M',
       'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST',
       'TOV', 'STL', 'BLK', 'BLKA', 'PF', 'PFD', 'PTS', 'PLUS_MINUS']]
    .merge(Adv22[['SEASON_YEAR', 'TEAM_ID', 'TEAM_ABBREVIATION', 'GAME_ID','E_OFF_RATING', 'OFF_RATING',
       'E_DEF_RATING', 'DEF_RATING', 'E_NET_RATING', 'NET_RATING', 'AST_PCT',
       'AST_TO', 'AST_RATIO', 'OREB_PCT', 'DREB_PCT', 'REB_PCT', 'TM_TOV_PCT',
       'EFG_PCT', 'TS_PCT', 'E_PACE', 'PACE', 'PACE_PER40', 'POSS', 'PIE']], on=["SEASON_YEAR","GAME_ID", "TEAM_ABBREVIATION","TEAM_ID"], how="left")
    .merge(Scoring22[['SEASON_YEAR', 'TEAM_ID', 'TEAM_ABBREVIATION', 'GAME_ID','PCT_FGA_2PT', 'PCT_FGA_3PT',
       'PCT_PTS_2PT', 'PCT_PTS_2PT_MR', 'PCT_PTS_3PT', 'PCT_PTS_FB',
       'PCT_PTS_FT', 'PCT_PTS_OFF_TOV', 'PCT_PTS_PAINT', 'PCT_AST_2PM',
       'PCT_UAST_2PM', 'PCT_AST_3PM', 'PCT_UAST_3PM', 'PCT_AST_FGM',
       'PCT_UAST_FGM']], on=["SEASON_YEAR","GAME_ID", "TEAM_ABBREVIATION","TEAM_ID"], how="left")
    .merge(Misc22[['SEASON_YEAR', 'TEAM_ID', 'TEAM_ABBREVIATION', 'GAME_ID', 'PTS_OFF_TOV', 'PTS_2ND_CHANCE',
       'PTS_FB', 'PTS_PAINT', 'OPP_PTS_OFF_TOV', 'OPP_PTS_2ND_CHANCE',
       'OPP_PTS_FB', 'OPP_PTS_PAINT']], on=["SEASON_YEAR","GAME_ID", "TEAM_ABBREVIATION","TEAM_ID"], how="left")
)

In [74]:
#concatenate all the years in df to make coding easier
gamelogs=pd.concat([logs25, logs24, logs23, logs22],axis=0,ignore_index=True)

In [76]:
gamelogs.to_csv("gamelogs.csv",index=False)