In [11]:
import pandas as pd
from sklearn.linear_model import LinearRegression
import joblib
from nba_api.stats.endpoints import playergamelog

# ---------------------------
# Step 1: Fetch Game Logs
# ---------------------------
def fetch_player_data(player_id, season='2023-24'):
    """Fetches game log data for a specific player and season."""
    gamelog = playergamelog.PlayerGameLog(player_id=player_id, season=season)
    df = gamelog.get_data_frames()[0]
    df["PLAYER_ID"] = player_id  # keep track of which player the row belongs to
    return df

def main():
    print("📡 Fetching NBA player game logs...")

    # 🔑 List of player IDs you want to include
    player_ids = [201939, 2544, 203507]  # Curry, LeBron, Giannis
    season = "2023-24"

    # Fetch and combine all players' data
    all_dataframes = [fetch_player_data(pid, season) for pid in player_ids]
    df = pd.concat(all_dataframes, ignore_index=True)

    if df.empty:
        raise SystemExit("❌ No game data returned for selected players/season.")

    # Keep only the columns we care about
    df = df[["PLAYER_ID", "PTS", "AST", "REB"]].dropna()

    print("✅ Sample training data (first 5 rows):")
    print(df.head())

    # ---------------------------
    # Step 2: Train Model
    # ---------------------------
    X = df[["AST", "REB"]]
    y = df["PTS"]

    model = LinearRegression()
    model.fit(X, y)

    # ---------------------------
    # Step 3: Save Model
    # ---------------------------
    joblib.dump(model, "nba_model.pkl")
    print(f"✅ Model trained on {len(df)} rows and saved as nba_model.pkl")

if __name__ == "__main__":
    main()


📡 Fetching NBA player game logs...
✅ Sample training data (first 5 rows):
   PLAYER_ID  PTS  AST  REB
0     201939   33    5    4
1     201939   22    8    7
2     201939   23    8    7
3     201939   28    5    6
4     201939   29    6    6
✅ Model trained on 218 rows and saved as nba_model.pkl
