In [3]:
# -------------------- Imports --------------------
from pathlib import Path
from datetime import datetime

import pandas as pd
import numpy as np
from nba_api.stats.endpoints import leaguegamelog, LeagueDashTeamStats

# -------------------- Configuration Constants --------------------
SEASON_2026 = "2025-26"
SEASON_2025 = "2024-25"
SEASON_2024 = "2023-24"
MIN_GAMES_THRESHOLD = 0  # Exclude rows with 0 minutes (forfeits / invalid rows)

# Project directories
PROJECT_ROOT = Path(r"C:\Users\kobyw\OneDrive\Documents\nba-pace-analytics").resolve()
DATA_DIR = PROJECT_ROOT / "data"
RAW_DIR = DATA_DIR / "raw"
PROC_DIR = DATA_DIR / "processed"

RAW_DIR.mkdir(parents=True, exist_ok=True)
PROC_DIR.mkdir(parents=True, exist_ok=True)

# -------------------- Helper Functions --------------------
def validate_game_logs(df: pd.DataFrame) -> bool:
    """Validates that required columns exist in a game-logs dataframe.

    Raises a ValueError if required columns are missing.
    """
    required = ["MIN", "FGA", "FTA", "OREB", "TOV", "PTS", "FG3A", "TEAM_NAME"]
    missing = [c for c in required if c not in df.columns]
    if missing:
        raise ValueError(f"Missing columns in game logs: {missing}")
    return True


def calculate_metrics(df: pd.DataFrame) -> pd.DataFrame:
    """Compute possessions, pace, ORTG, and TS% for a game-logs dataframe.

    Adds:
    - POSS: possessions
    - PACE: 48 * (POSS / (MIN / 5))
    - ORTG: points per 100 possessions
    - TS_PCT: true shooting percentage (0â€“1)
    """
    df = df.copy()
    df["POSS"] = df["FGA"] + 0.44 * (df["FTA"] - df["OREB"]) + df["TOV"]
    df["PACE"] = 48 * (df["POSS"] / (df["MIN"] / 5))
    df["ORTG"] = (df["PTS"] / df["POSS"]) * 100
    df["TS_PCT"] = df["PTS"] / (2 * (df["FGA"] + 0.44 * df["FTA"]))
    return df


def calculate_team_metric(
    game_logs_df: pd.DataFrame,
    metric_col: str,
    min_threshold: int = MIN_GAMES_THRESHOLD,
) -> pd.Series:
    """Return season-average metric by TEAM_NAME."""
    return (
        game_logs_df[game_logs_df["MIN"] > min_threshold]
        .groupby("TEAM_NAME")[metric_col]
        .mean()
    )

# -------------------- Fetch & Prepare Game Log Data --------------------
season_map = {
    SEASON_2024: None,
    SEASON_2025: None,
    SEASON_2026: None,
}

all_logs = []

for season in season_map.keys():
    logs = leaguegamelog.LeagueGameLog(
        season=season,
        season_type_all_star="Regular Season",
    ).get_data_frames()[0]

    # basic validation
    validate_game_logs(logs)

    # add metrics and season label
    logs = calculate_metrics(logs)
    logs["SEASON"] = season

    # save raw-ish season file
    logs.to_csv(RAW_DIR / f"team_game_logs_{season}.csv", index=False)

    # keep for combined file
    all_logs.append(logs)

# combined logs across seasons (used in 02_analysis.ipynb)
all_logs_df = pd.concat(all_logs, ignore_index=True)
all_logs_path = PROC_DIR / "team_game_logs_with_metrics_2024_2026.csv"
all_logs_df.to_csv(all_logs_path, index=False)

print(f"Saved combined logs with metrics to: {all_logs_path}")

# -------------------- Fetch & Prepare Team Outcome Data (Win%, PM/G) --------------------
team_outcomes = []

for season in season_map.keys():
    stats = LeagueDashTeamStats(
        season=season,
        per_mode_detailed="Totals",
        season_type_all_star="Regular Season",
    ).get_data_frames()[0]

    # keep only fields we use later (TEAM_NAME, W_PCT, GP, PLUS_MINUS)
    stats = stats[["TEAM_NAME", "GP", "W_PCT", "PLUS_MINUS"]].copy()
    stats["SEASON"] = season
    team_outcomes.append(stats)

team_outcomes_df = pd.concat(team_outcomes, ignore_index=True)
team_outcomes_path = PROC_DIR / "team_season_outcomes_2024_2026.csv"
team_outcomes_df.to_csv(team_outcomes_path, index=False)

print(f"Saved team outcomes to: {team_outcomes_path}")
print("Data collection complete.")


Saved combined logs with metrics to: C:\Users\kobyw\OneDrive\Documents\nba-pace-analytics\data\processed\team_game_logs_with_metrics_2024_2026.csv
Saved team outcomes to: C:\Users\kobyw\OneDrive\Documents\nba-pace-analytics\data\processed\team_season_outcomes_2024_2026.csv
Data collection complete.
