In [25]:
#.............Get all stars..........#

import pandas as pd
from nba_api.stats.endpoints import leaguegamefinder, boxscoretraditionalv2
from nba_api.stats.library.parameters import SeasonTypeAllStar

def get_all_stars(season):
    """
    Returns a list of Player IDs who played in the All-Star Game for a given season.
    """
    print(f"Finding All-Star Game for {season}...")

    # 1. Find the All-Star Game ID
    gamefinder = leaguegamefinder.LeagueGameFinder(
        season_nullable= season,
        season_type_nullable= SeasonTypeAllStar.all_star
    )
    games = gamefinder.get_data_frames()[0]

    # Filter for the main All-Star Game (exclude Rising Stars, Celebrity game, etc.)
    # The main game usually has the highest Matchup count or specific Team abbreviations
    # A safe bet is looking for 'East'/'West' or Captain names (Team LeBron)
    # But usually, it's the game with the most FG_FGA (shot attempts) or specific ID pattern
    if games.empty:
        print("No All-Star game found (yet).")
        return []

    # The All-Star game is usually the last one in the list if sorted by date
    # GAME_ID usually starts with '003'
    all_star_game = games[games['GAME_ID'].astype(str).str.startswith('003')].iloc[0]
    game_id = all_star_game['GAME_ID']
    print(f"Found Game ID: {game_id} ({all_star_game['MATCHUP']})")

    # 2. Get the Box Score (The Roster)
    boxscore = boxscoretraditionalv2.BoxScoreTraditionalV2(game_id=game_id)
    players = boxscore.player_stats.get_data_frame()

    # 3. Return relevant info
    all_star_list = players[['PLAYER_ID', 'PLAYER_NAME']]
    return all_star_list

def get2024_all_stars():
    boxscore1 = boxscoretraditionalv2.BoxScoreTraditionalV2(game_id="0032400011")
    players1 = boxscore1.player_stats.get_data_frame()
    all_star_list1 = players1[['PLAYER_ID', 'PLAYER_NAME']]

    boxscore2 = boxscoretraditionalv2.BoxScoreTraditionalV2(game_id="0032400021")
    players2 = boxscore2.player_stats.get_data_frame()
    all_star_list2 = players2[players2['TEAM_ABBREVIATION']=="SHQ"][['PLAYER_ID', 'PLAYER_NAME']]

    allstars= pd.concat(
    [
        all_star_list1,
        all_star_list2
    ],
    axis=0,
    ignore_index=True
)

    return allstars

# Usage
df_stars22 = get_all_stars(season='2022-23')
df_stars23 = get_all_stars(season='2023-24')
df_stars24= get2024_all_stars()

allstars= pd.concat(
    [
        df_stars22,
        df_stars23,
        df_stars24,
    ],
    axis=0,
    ignore_index=True
)
allstars = allstars.drop_duplicates(subset=["PLAYER_ID"])


Finding All-Star Game for 2022-23...
Found Game ID: 0032200001 (LBN vs. GNS)
Finding All-Star Game for 2023-24...
Found Game ID: 0032300001 (EST vs. WST)


In [27]:
allstars.to_csv("allstars.csv", index= False)

In [30]:
#.............Get Rivalries..........#

rivalries = [
    [1610612737,"ATL",["1610612742"],["DAL"]],
    [1610612751,"BKN",["1610612752"],["NYK"]],
    [1610612762,"UTA",[],[]],
    [1610612763,"MEM",["1610612740","1610612744","1610612750"],["NOP","GSW","MIN"]],
    [1610612760,"OKC",["1610612749","1610612759"],["MIL","SAS"]],
    [1610612758,"SAC",["1610612744"],["GSW"]],
    [1610612749,"MIL",["1610612748","1610612743","1610612760"],["MIA","DEN","OKC"]],
    [1610612738,"BOS",["1610612742","1610612748","1610612747","1610612752"],["DAL","MIA","LAL","NYK"]],
    [1610612757,"POR",["1610612759"],["SAS"]],
    [1610612743,"DEN",["1610612750","1610612749","1610612747","1610612755"],["MIN","MIL","LAL","PHI"]],
    [1610612764,"WAS",[],[]],
    [1610612744,"GSW",["1610612742","1610612763","1610612758","1610612747","1610612750"],["DAL","MEM","SAC","LAL","MIN"]],
    [1610612755,"PHI",["1610612752","1610612743"],["NYK","DEN"]],
    [1610612739,"CLE",["1610612766"],["CHA"]],
    [1610612761,"TOR",[],[]],
    [1610612746,"LAC",["1610612747"],["LAL"]],
    [1610612754,"IND",[],[]],
    [1610612747,"LAL",["1610612746","1610612738","1610612743","1610612742","1610612744"],["LAC","BOS","DEN","DAL","GSW"]],
    [1610612765,"DET",["1610612745"],["HOU"]],
    [1610612741,"CHI",[],[]],
    [1610612756,"PHX",["1610612742"],["DAL"]],
    [1610612752,"NYK",["1610612751","1610612738","1610612755","1610612748"],["BKN","BOS","PHI","MIA"]],
    [1610612740,"NOP",["1610612763"],["MEM"]],
    [1610612750,"MIN",["1610612763","1610612743","1610612742","1610612744"],["MEM","DEN","DAL","GSW"]],
    [1610612766,"CHA",["1610612739"],["CLE"]],
    [1610612748,"MIA",["1610612749","1610612738","1610612752"],["MIL","BOS","NYK"]],
    [1610612742,"DAL",["1610612750","1610612738","1610612756","1610612747","1610612737","1610612744"],["MIN","BOS","PHX","LAL","ATL","GSW"]],
    [1610612753,"ORL",[],[]],
    [1610612759,"SAS",["1610612757","1610612745","1610612760"],["POR","HOU","OKC"]],
    [1610612745,"HOU",["1610612759","1610612765"],["SAS","DET"]],
]

rivalries = pd.DataFrame(rivalries, columns=["team_id","team_abbreviation","rivals_id","rivals_abbreviation"])
rivalries.to_csv("rivalries.csv", index=False)


In [33]:
#Added some rivalries later
rivalries= pd.read_csv("rivalries.csv")

#convert strings to lists
import ast

rivalries["rivals_id"] = rivalries["rivals_id"].apply(ast.literal_eval)
rivalries["rivals_abbreviation"] = rivalries["rivals_abbreviation"].apply(ast.literal_eval)

In [42]:
#.............Get schedule info..........#

#Got game times and national tv games by scraping the websites. But you can get all of this info just from the scheduleleaguev2 endpoint
#for 25/26 use this code to get day and time (I already scraped nat tv)

from nba_api.stats.endpoints import scheduleleaguev2
schedule25= pd.read_csv("25_schedule_info.csv")

# Fetch the schedule for the 2025-26 season
schedule = scheduleleaguev2.ScheduleLeagueV2(season='2025-26')
df_schedule = schedule.get_data_frames()[0]

schedule25["GAME_ID"] = "00"+schedule25["GAME_ID"].astype(str)
df_schedule["gameId"] = df_schedule["gameId"].astype(str)
# --- Ensure datetime parsing ---
df_schedule["gameDateTimeEst"] = pd.to_datetime(
    df_schedule["gameDateTimeEst"],
    errors="coerce",
    utc=True
)

# --- Merge schedule info into df_2025 ---
schedule25 = schedule25.merge(
    df_schedule[
        ["gameId", "gameDateTimeEst", "nationalBroadcasters_0_broadcasterScope"]
    ],
    left_on="GAME_ID",
    right_on="gameId",
    how="left"
)

# --- Extract TIME (HH:MM) ---
schedule25["TIME"] = schedule25["gameDateTimeEst"].dt.strftime("%H")

# --- Extract DAY as string ---
schedule25["DAY"] = schedule25["gameDateTimeEst"].dt.day_name().str.upper()

# --- Assign broadcaster scope ---
schedule25["nationalBroadcasters_broadcasterScope"] = (
    schedule25["nationalBroadcasters_0_broadcasterScope"]
)

# --- Optional cleanup ---
schedule25.drop(columns=["gameId", "gameDateTimeEst","nationalBroadcasters_0_broadcasterScope", "nationalBroadcasters_broadcasterScope"], inplace=True)

schedule25.to_csv("25_schedule_info.csv",index=False)

In [54]:
#Combining the schedule info to make processing easier
schedule23= pd.read_csv("23_schedule_info.csv")
schedule24= pd.read_csv("24_schedule_info.csv")

schedule23["TEAM1"] = schedule23["TEAMS"].str.split(r"\s+[@vVsS]+\s+", regex=True).str[0]
schedule23["TEAM2"] = schedule23["TEAMS"].str.split(r"\s+[@vVsS]+\s+", regex=True).str[1]
schedule23.drop(columns=["TEAMS"], inplace=True)
schedule24["GAME_ID"] = "00"+schedule24["GAME_ID"].astype(str)

schedule = pd.concat(
    [schedule25, schedule24, schedule23],
    axis=0,
    ignore_index=True
)
schedule.to_csv("schedule_info.csv",index=False)

In [56]:
#.............Get gamelogs..........#

#THIS IS THE MOST IMPORTANT PART BECAUSE IT WILL BE USED TO GET TEAMS FORM IN THE LAST 15 GAMES COMING INTO THE ACTUAL PREDICTIVE GAME

