In [15]:
import numpy as np
import pandas as pd
from datetime import datetime

from nba_api.stats.endpoints import leaguedashplayerstats


def current_season_string():
    """
    Infer NBA season string like '2025-26' from today's date.
    If month >= Oct (10), season starts that year; else season started last year.
    """
    today = datetime.today()
    year = today.year
    if today.month >= 10:
        start_year = year
    else:
        start_year = year - 1
    end_year_short = (start_year + 1) % 100  # 2025 -> 26
    return f"{start_year}-{end_year_short:02d}"


def main():
    # ------------------------------------------------------------
    # 1. Determine season & pull rookie stats from nba_api
    # ------------------------------------------------------------
    season = current_season_string()
    print(f"Pulling ROOKIE per-game stats for season {season}...")

    stats = leaguedashplayerstats.LeagueDashPlayerStats(
        season=season,
        per_mode_detailed="PerGame",
        season_type_all_star="Regular Season",
        player_experience_nullable="Rookie",  # <-- only rookies
        timeout=60,
    )
    rookies = stats.get_data_frames()[0]

    print("\nColumns returned:")
    print(list(rookies.columns))

    # ------------------------------------------------------------
    # 2. Show rookie ages (your 2025 draft class ages)
    # ------------------------------------------------------------
    age_table = rookies[["PLAYER_NAME", "TEAM_ABBREVIATION", "AGE"]].sort_values("AGE")
    print("\nRookie ages this season:\n")
    print(age_table.to_string(index=False))

    print("\nAge distribution summary:")
    print(age_table["AGE"].describe())

    # ------------------------------------------------------------
    # 3. Build a simple ROY_score model (nba_api-only)
    # ------------------------------------------------------------
    rookies = rookies.rename(
        columns={
            "PLAYER_NAME": "player",
            "TEAM_ABBREVIATION": "team",
            "PTS": "pts",
            "REB": "reb",
            "AST": "ast",
            "STL": "stl",
            "BLK": "blk",
            "TOV": "tov",
            "GP": "gp",
            "MIN": "min",
        }
    )

    # Safe feature engineering: ensure needed cols exist
    for col in ["FGA", "FTA", "tov", "min"]:
        if col not in rookies.columns:
            rookies[col] = 0.0

    # Rough usage proxy (all nba_api-based)
    rookies["usage_proxy"] = (
        rookies["FGA"] + 0.44 * rookies["FTA"] + rookies["tov"]
    ) / rookies["min"].replace(0, np.nan)

    # Simple impact score from box score
    rookies["impact_score"] = (
        rookies["pts"] * 0.5
        + rookies["reb"] * 0.3
        + rookies["ast"] * 0.4
        + rookies["stl"] * 0.7
        + rookies["blk"] * 0.7
    )

    rookies = rookies.replace([np.inf, -np.inf], np.nan).fillna(0.0)

    feature_cols = [
        "pts",
        "reb",
        "ast",
        "stl",
        "blk",
        "min",
        "usage_proxy",
        "impact_score",
    ]

    for col in feature_cols:
        if col not in rookies.columns:
            rookies[col] = 0.0

    X = rookies[feature_cols].to_numpy()

    # ------------------------------------------------------------
    # 4. Compute ROY_score (manual standardization + weights)
    # ------------------------------------------------------------
    means = X.mean(axis=0)
    stds = X.std(axis=0, ddof=0)
    stds_safe = np.where(stds == 0, 1.0, stds)
    X_z = (X - means) / stds_safe

    # Hand-tuned weights: scoring & impact emphasized
    w = np.array([0.25, 0.15, 0.15, 0.10, 0.10, 0.10, 0.05, 0.10])
    w = w / w.sum()

    roy_scores = X_z.dot(w)

    # Min-max to [0,1] as a probability-like score
    min_s, max_s = roy_scores.min(), roy_scores.max()
    if max_s > min_s:
        roy_prob = (roy_scores - min_s) / (max_s - min_s)
    else:
        roy_prob = np.zeros_like(roy_scores)

    rookies["ROY_score"] = roy_scores
    rookies["ROY_probability"] = roy_prob

    # ------------------------------------------------------------
    # 5. Rank and save with date stamp
    # ------------------------------------------------------------
    rookies_ranked = rookies.sort_values("ROY_probability", ascending=False)

    print("\nTop 10 ROY candidates (nba_api-only heuristic):\n")
    print(
        rookies_ranked[
            ["player", "team", "AGE", "pts", "reb", "ast", "min", "ROY_probability"]
        ]
        .head(10)
        .to_string(index=False)
    )

    today = datetime.today().strftime("%Y%m%d")
    season_tag = season.replace("-", "")
    out_file = f"ROY_predictions_{season_tag}_{today}.csv"
    rookies_ranked.to_csv(out_file, index=False)

    print(f"\nSaved daily ROY prediction file: {out_file}")


if __name__ == "__main__":
    main()


Pulling ROOKIE per-game stats for season 2025-26...

Columns returned:
['PLAYER_ID', 'PLAYER_NAME', 'NICKNAME', 'TEAM_ID', 'TEAM_ABBREVIATION', 'AGE', 'GP', 'W', 'L', 'W_PCT', 'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'TOV', 'STL', 'BLK', 'BLKA', 'PF', 'PFD', 'PTS', 'PLUS_MINUS', 'NBA_FANTASY_PTS', 'DD2', 'TD3', 'WNBA_FANTASY_PTS', 'GP_RANK', 'W_RANK', 'L_RANK', 'W_PCT_RANK', 'MIN_RANK', 'FGM_RANK', 'FGA_RANK', 'FG_PCT_RANK', 'FG3M_RANK', 'FG3A_RANK', 'FG3_PCT_RANK', 'FTM_RANK', 'FTA_RANK', 'FT_PCT_RANK', 'OREB_RANK', 'DREB_RANK', 'REB_RANK', 'AST_RANK', 'TOV_RANK', 'STL_RANK', 'BLK_RANK', 'BLKA_RANK', 'PF_RANK', 'PFD_RANK', 'PTS_RANK', 'PLUS_MINUS_RANK', 'NBA_FANTASY_PTS_RANK', 'DD2_RANK', 'TD3_RANK', 'WNBA_FANTASY_PTS_RANK', 'TEAM_COUNT']

Rookie ages this season:

             PLAYER_NAME TEAM_ABBREVIATION  AGE
            Noa Essengue               CHI 18.0
            Cooper Flagg               DAL 18.0
        