In [45]:
# ===============================================================
# Imports
# ===============================================================
import pandas as pd
import numpy as np
import os
import pickle

# Configuration
MODEL_DIR = "c/"
TEST_DIR = "test/"

print(f"Model directory: {MODEL_DIR}")
print(f"Test directory: {TEST_DIR}")

Model directory: c/
Test directory: test/


In [46]:
# ===============================================================
# (1) Load trained models and feature sets
# ===============================================================
print("\n>>> Loading trained models...")

with open(f"{MODEL_DIR}model_mvp.pkl", "rb") as f:
    model_mvp = pickle.load(f)
print("MVP model loaded")

with open(f"{MODEL_DIR}model_dpoy.pkl", "rb") as f:
    model_dpoy = pickle.load(f)
print("DPOY model loaded")

with open(f"{MODEL_DIR}model_mip.pkl", "rb") as f:
    model_mip = pickle.load(f)
print("MIP model loaded")

with open(f"{MODEL_DIR}model_fmvp.pkl", "rb") as f:
    model_fmvp = pickle.load(f)
print("FMVP model loaded")

with open(f"{MODEL_DIR}model_roy.pkl", "rb") as f:
    model_roy = pickle.load(f)
print("ROY model loaded")

with open(f"{MODEL_DIR}model_coty.pkl", "rb") as f:
    model_coty = pickle.load(f)
print("COTY model loaded")

with open(f"{MODEL_DIR}model_asg_mvp.pkl", "rb") as f:
    model_asg_mvp = pickle.load(f)
print("ASG_MVP model loaded")

with open(f"{MODEL_DIR}feature_sets.pkl", "rb") as f:
    feature_sets = pickle.load(f)
print("Feature sets loaded")

features_mvp = feature_sets['mvp']
features_dpoy = feature_sets['dpoy']
features_mip = feature_sets['mip']
features_fmvp = feature_sets['fmvp']
features_roy = feature_sets['roy']
features_coty = feature_sets['coty']
features_asg_mvp = feature_sets['asg_mvp']
main_awards = feature_sets['main_awards']
coach_award = feature_sets.get('coach_award', 'Coach of the Year')

print(f"\nMain awards: {main_awards}")
print(f"Coach award: {coach_award}")


>>> Loading trained models...
MVP model loaded
DPOY model loaded
MIP model loaded
FMVP model loaded
ROY model loaded
COTY model loaded
ASG_MVP model loaded
Feature sets loaded

Main awards: ['Most Valuable Player', 'Defensive Player of the Year', 'Most Improved Player', 'WNBA Finals Most Valuable Player', 'Rookie of the Year', 'All-Star Game Most Valuable Player']
Coach award: Coach of the Year


In [47]:
# ===============================================================
# (2) Load test roster and historical data
# ===============================================================
print("\n>>> Loading data...")

# Test data - ONLY used to know WHO is on WHICH team
players_teams_test = pd.read_csv(f"{TEST_DIR}players_teams.csv")
teams_test = pd.read_csv(f"{TEST_DIR}teams.csv")

# Historical data - for computing T-1 features
players = pd.read_csv("data/players.csv")
players_teams_hist = pd.read_csv("data/players_teams.csv")
teams_hist = pd.read_csv("data/teams.csv")
teams_post_hist = pd.read_csv("data/teams_post.csv")
awards_hist = pd.read_csv("data/awards_players.csv")
coaches_hist = pd.read_csv("data/coaches.csv")

# Also load test coaches if available
coaches_test_path = f"{TEST_DIR}coaches.csv"
if os.path.exists(coaches_test_path):
    coaches_test = pd.read_csv(coaches_test_path)
    print(f"Test coaches loaded: {len(coaches_test)} records")
else:
    coaches_test = None
    print("No test coaches file found - will use historical coaches")

# League averages from training
league_avg = pd.read_csv(f"{MODEL_DIR}league_averages.csv")
elite_cutoffs = pd.read_csv(f"{MODEL_DIR}elite_cutoffs.csv")

test_year = players_teams_test['year'].iloc[0]
prev_year = test_year - 1

print(f"\nData loaded:")
print(f"   Test year (to predict): {test_year}")
print(f"   Features from year: {prev_year} (T-1)")
print(f"   Players in test roster: {players_teams_test['playerID'].nunique()}")
print(f"   Teams in test: {teams_test['tmID'].nunique()}")


>>> Loading data...
Test coaches loaded: 12 records

Data loaded:
   Test year (to predict): 11
   Features from year: 10 (T-1)
   Players in test roster: 141
   Teams in test: 12


In [48]:
# ===============================================================
# (3) Calculate player stats from historical data (T-1 = prev_year)
# ===============================================================
print("\n>>> Computing T-1 player stats from historical data...")

# Get prev_year stats
prev_year_stats = players_teams_hist[players_teams_hist['year'] == prev_year].copy()
prev_year_stats = prev_year_stats[prev_year_stats['GP'] >= 10]

# Per-game statistics
prev_year_stats['ppg'] = prev_year_stats['points'] / prev_year_stats['GP']
prev_year_stats['rpg'] = prev_year_stats['rebounds'] / prev_year_stats['GP']
prev_year_stats['apg'] = prev_year_stats['assists'] / prev_year_stats['GP']
prev_year_stats['spg'] = prev_year_stats['steals'] / prev_year_stats['GP']
prev_year_stats['bpg'] = prev_year_stats['blocks'] / prev_year_stats['GP']
prev_year_stats['mpg'] = prev_year_stats['minutes'] / prev_year_stats['GP']
prev_year_stats['fg_pct'] = np.where(prev_year_stats['fgAttempted'] > 0, 
                                      prev_year_stats['fgMade'] / prev_year_stats['fgAttempted'], 0)

# Efficiency
prev_year_stats['efficiency'] = (prev_year_stats['points'] + prev_year_stats['rebounds'] + 
                                  prev_year_stats['assists'] + prev_year_stats['steals'] + 
                                  prev_year_stats['blocks'] - prev_year_stats['turnovers']) / prev_year_stats['GP']

# Player rating
prev_year_stats['player_rating'] = (
    prev_year_stats['points'] +
    prev_year_stats['rebounds'] * 1.2 +
    prev_year_stats['assists'] * 1.5 +
    prev_year_stats['steals'] * 3 +
    prev_year_stats['blocks'] * 3 -
    prev_year_stats['turnovers'] * 2 -
    prev_year_stats['PF'] * 0.5
) / (prev_year_stats['minutes'] / 36 + 1)

prev_year_stats['gs_pct'] = prev_year_stats['GS'] / prev_year_stats['GP']

# Defensive-specific stats
prev_year_stats['drpg'] = prev_year_stats['dRebounds'] / prev_year_stats['GP']
prev_year_stats['def_rating'] = (prev_year_stats['steals'] * 2 + prev_year_stats['blocks'] * 3 + prev_year_stats['dRebounds']) / prev_year_stats['GP']

print(f"Players with stats in year {prev_year}: {len(prev_year_stats)}")


>>> Computing T-1 player stats from historical data...
Players with stats in year 10: 145


In [49]:
# ===============================================================
# (4) Get league averages and elite cutoff for prev_year
# ===============================================================
prev_league_avg = league_avg[league_avg['year'] == prev_year]
prev_elite_cutoff = elite_cutoffs[elite_cutoffs['year'] == prev_year]

if len(prev_league_avg) > 0:
    prev_league_avg = prev_league_avg.iloc[0]
    prev_year_stats['league_ppg'] = prev_league_avg['league_ppg']
    prev_year_stats['league_rpg'] = prev_league_avg['league_rpg']
    prev_year_stats['league_apg'] = prev_league_avg['league_apg'] if 'league_apg' in prev_league_avg else prev_year_stats['apg'].mean()
    prev_year_stats['league_efficiency'] = prev_league_avg['league_efficiency']
    prev_year_stats['league_player_rating'] = prev_league_avg['league_player_rating']
else:
    # Fallback: compute from current data
    prev_year_stats['league_ppg'] = prev_year_stats['ppg'].mean()
    prev_year_stats['league_rpg'] = prev_year_stats['rpg'].mean()
    prev_year_stats['league_apg'] = prev_year_stats['apg'].mean()
    prev_year_stats['league_efficiency'] = prev_year_stats['efficiency'].mean()
    prev_year_stats['league_player_rating'] = prev_year_stats['player_rating'].mean()

if len(prev_elite_cutoff) > 0:
    elite_thresh = prev_elite_cutoff.iloc[0]['elite_cutoff']
else:
    elite_thresh = prev_year_stats['player_rating'].quantile(0.90)

prev_year_stats['is_elite'] = (prev_year_stats['player_rating'] >= elite_thresh).astype(int)

print(f"Elite threshold: {elite_thresh:.2f}")
print(f"Elite players: {prev_year_stats['is_elite'].sum()}")

Elite threshold: 31.34
Elite players: 15


In [50]:
# ===============================================================
# (5) Get team context from prev_year
# ===============================================================
teams_prev = teams_hist[teams_hist['year'] == prev_year].copy()
teams_prev['win_pct'] = teams_prev['won'] / (teams_prev['won'] + teams_prev['lost'])
teams_prev['conf_rank'] = teams_prev.groupby('confID')['win_pct'].rank(ascending=False)
teams_prev['made_playoffs'] = teams_prev['playoff'].notna().astype(int)
teams_prev['margin_per_game'] = (teams_prev['o_pts'] - teams_prev['d_pts']) / teams_prev['GP']

# Playoff data
playoff_wins = teams_post_hist[teams_post_hist['year'] == prev_year].groupby('tmID')['W'].sum().reset_index()
playoff_wins.columns = ['tmID', 'playoff_wins']

if len(playoff_wins) > 0:
    champion_tm = playoff_wins.loc[playoff_wins['playoff_wins'].idxmax(), 'tmID']
    teams_prev['won_championship'] = (teams_prev['tmID'] == champion_tm).astype(int)
else:
    teams_prev['won_championship'] = 0

teams_prev = teams_prev.merge(playoff_wins, on='tmID', how='left')
teams_prev['playoff_wins'] = teams_prev['playoff_wins'].fillna(0).astype(int)

print(f"Teams in year {prev_year}: {len(teams_prev)}")

Teams in year 10: 13


In [51]:
# ===============================================================
# (6) Build test dataset: roster from test + stats from T-1
# ===============================================================
print("\n>>> Building test dataset...")

# Get unique players from test roster
test_roster = players_teams_test[['playerID', 'tmID']].drop_duplicates()
test_roster['target_year'] = test_year

print(f"Players in test roster: {len(test_roster)}")

# Merge with prev_year stats
# Note: We match on playerID only, as player might have changed teams
prev_cols = ['playerID', 'ppg', 'rpg', 'apg', 'spg', 'bpg', 'mpg', 'fg_pct',
             'efficiency', 'player_rating', 'is_elite', 'GP', 'GS', 'gs_pct', 'minutes',
             'drpg', 'def_rating',
             'league_ppg', 'league_rpg', 'league_apg', 'league_efficiency', 'league_player_rating']

test_df = test_roster.merge(
    prev_year_stats[prev_cols],
    on='playerID',
    how='left'
)

# Rename to _prev suffix
rename_dict = {col: f"{col}_prev" for col in prev_cols if col != 'playerID'}
# But keep league_ columns without _prev suffix
for col in ['league_ppg', 'league_rpg', 'league_apg', 'league_efficiency', 'league_player_rating']:
    if col in rename_dict:
        del rename_dict[col]
test_df = test_df.rename(columns=rename_dict)

print(f"Players with T-1 stats: {test_df['ppg_prev'].notna().sum()}")
print(f"Rookies (no T-1 stats): {test_df['ppg_prev'].isna().sum()}")


>>> Building test dataset...
Players in test roster: 148
Players with T-1 stats: 97
Rookies (no T-1 stats): 53


In [52]:
# ===============================================================
# (7) Add team context from prev_year
# ===============================================================
# Use the team they're on NOW (test roster), but team stats from prev_year
# This is a simplification - in reality the team composition changed

team_cols = ['tmID', 'win_pct', 'conf_rank', 'made_playoffs', 'margin_per_game', 
             'won_championship', 'playoff_wins']
test_df = test_df.merge(
    teams_prev[team_cols],
    on='tmID',
    how='left'
)

# Rename team columns
test_df = test_df.rename(columns={
    'win_pct': 'win_pct_prev',
    'conf_rank': 'conf_rank_prev',
    'made_playoffs': 'made_playoffs_prev',
    'margin_per_game': 'margin_prev',
    'won_championship': 'won_championship_prev',
    'playoff_wins': 'playoff_wins_prev'
})

# Fill missing team data with neutral values
test_df['win_pct_prev'] = test_df['win_pct_prev'].fillna(0.5)
test_df['conf_rank_prev'] = test_df['conf_rank_prev'].fillna(6)
test_df['made_playoffs_prev'] = test_df['made_playoffs_prev'].fillna(0)
test_df['margin_prev'] = test_df['margin_prev'].fillna(0)
test_df['won_championship_prev'] = test_df['won_championship_prev'].fillna(0)
test_df['playoff_wins_prev'] = test_df['playoff_wins_prev'].fillna(0)

print(f"Team context added")

Team context added


In [53]:
# ===============================================================
# (8) Add career features
# ===============================================================
# Experience: how many years before test_year
player_first_year = players_teams_hist.groupby('playerID')['year'].min().reset_index()
player_first_year.columns = ['playerID', 'first_year']
test_df = test_df.merge(player_first_year, on='playerID', how='left')
test_df['years_experience'] = prev_year - test_df['first_year'].fillna(prev_year)

# Career awards - using vectorized approach
awards_main = awards_hist[awards_hist['award'].isin(main_awards)]
awards_before_test = awards_main[awards_main['year'] < test_year].copy()

# Initialize award columns
for award in main_awards:
    col = f"career_{award.replace(' ', '_').lower()}"
    test_df[col] = 0
test_df['career_total_awards'] = 0

# Count awards for each player and award type
for award in main_awards:
    col = f"career_{award.replace(' ', '_').lower()}"
    award_counts = awards_before_test[awards_before_test['award'] == award].groupby('playerID').size().reset_index(name='count')
    
    # Merge counts into test_df
    test_df = test_df.merge(award_counts.rename(columns={'count': col + '_temp'}), on='playerID', how='left')
    test_df[col] = test_df[col + '_temp'].fillna(0).astype(int)
    test_df = test_df.drop(columns=[col + '_temp'])

# Calculate total awards
total_awards = awards_before_test.groupby('playerID').size().reset_index(name='total')
test_df = test_df.merge(total_awards.rename(columns={'total': 'career_total_awards_temp'}), on='playerID', how='left')
test_df['career_total_awards'] = test_df['career_total_awards_temp'].fillna(0).astype(int)
test_df = test_df.drop(columns=['career_total_awards_temp'])

print(f"Career features added")

Career features added


In [54]:
# ===============================================================
# (9) Add comparison to league features
# ===============================================================
test_df['ppg_vs_league'] = test_df['ppg_prev'] - test_df['league_ppg']
test_df['rpg_vs_league'] = test_df['rpg_prev'] - test_df['league_rpg']
test_df['apg_vs_league'] = test_df['apg_prev'] - test_df['league_apg']
test_df['efficiency_vs_league'] = test_df['efficiency_prev'] - test_df['league_efficiency']
test_df['rating_vs_league'] = test_df['player_rating_prev'] - test_df['league_player_rating']

# Rename some columns to match training
test_df = test_df.rename(columns={
    'GP_prev': 'GP_prev',
    'GS_prev': 'GS_prev',
    'is_elite_prev': 'is_elite_prev'
})

print(f"League comparison features added")

League comparison features added


In [55]:
# ===============================================================
# (10) Add improvement features (T-1 vs T-2)
# ===============================================================
# Get T-2 stats
prev2_year = prev_year - 1
prev2_stats = players_teams_hist[players_teams_hist['year'] == prev2_year].copy()
if len(prev2_stats) > 0:
    prev2_stats = prev2_stats[prev2_stats['GP'] >= 10]
    prev2_stats['ppg_prev2'] = prev2_stats['points'] / prev2_stats['GP']
    prev2_stats['efficiency_prev2'] = (prev2_stats['points'] + prev2_stats['rebounds'] + 
                                        prev2_stats['assists'] + prev2_stats['steals'] + 
                                        prev2_stats['blocks'] - prev2_stats['turnovers']) / prev2_stats['GP']
    prev2_stats['player_rating_prev2'] = (
        prev2_stats['points'] + prev2_stats['rebounds'] * 1.2 + prev2_stats['assists'] * 1.5 +
        prev2_stats['steals'] * 3 + prev2_stats['blocks'] * 3 -
        prev2_stats['turnovers'] * 2 - prev2_stats['PF'] * 0.5
    ) / (prev2_stats['minutes'] / 36 + 1)
    prev2_stats['mpg_prev2'] = prev2_stats['minutes'] / prev2_stats['GP']
    
    test_df = test_df.merge(
        prev2_stats[['playerID', 'ppg_prev2', 'efficiency_prev2', 'player_rating_prev2', 'mpg_prev2']],
        on='playerID',
        how='left'
    )
else:
    test_df['ppg_prev2'] = np.nan
    test_df['efficiency_prev2'] = np.nan
    test_df['player_rating_prev2'] = np.nan
    test_df['mpg_prev2'] = np.nan

# Calculate improvement
test_df['ppg_improvement'] = test_df['ppg_prev'] - test_df['ppg_prev2'].fillna(test_df['ppg_prev'])
test_df['efficiency_improvement'] = test_df['efficiency_prev'] - test_df['efficiency_prev2'].fillna(test_df['efficiency_prev'])
test_df['rating_improvement'] = test_df['player_rating_prev'] - test_df['player_rating_prev2'].fillna(test_df['player_rating_prev'])
test_df['mpg_improvement'] = test_df['mpg_prev'] - test_df['mpg_prev2'].fillna(test_df['mpg_prev'])

print(f"Improvement features added")

Improvement features added


In [56]:
# ===============================================================
# (11) Add playoff performance features
# ===============================================================
playoff_cols = ['playerID', 'year', 'PostGP', 'PostGS', 'PostMinutes', 
                'PostPoints', 'PostRebounds', 'PostAssists', 'PostSteals', 'PostBlocks']
existing_cols = [c for c in playoff_cols if c in players_teams_hist.columns]

prev_playoffs = players_teams_hist[players_teams_hist['year'] == prev_year][existing_cols].copy()
rename_playoff = {c: f"{c}_prev" for c in existing_cols if c not in ['playerID', 'year']}
prev_playoffs = prev_playoffs.rename(columns=rename_playoff)

test_df = test_df.merge(prev_playoffs.drop(columns=['year']), on='playerID', how='left')

for col in test_df.columns:
    if 'Post' in col:
        test_df[col] = test_df[col].fillna(0)

# Calculate per-game playoff stats
if 'PostGP_prev' in test_df.columns and 'PostPoints_prev' in test_df.columns:
    test_df['playoff_ppg_prev'] = np.where(
        test_df['PostGP_prev'] > 0,
        test_df['PostPoints_prev'] / test_df['PostGP_prev'],
        0
    )

print(f"\nTest dataset shape: {test_df.shape}")
print(f"Playoff features added")


Test dataset shape: (156, 61)
Playoff features added


In [57]:
# ===============================================================
# (12) Filter to players with T-1 data (non-rookies)
# ===============================================================
# For MVP, DPOY, MIP, FMVP - need previous year stats
test_eligible = test_df[test_df['ppg_prev'].notna()].copy()
test_rookies = test_df[test_df['ppg_prev'].isna()].copy()

print(f"\nEligible for prediction (have T-1 stats): {len(test_eligible)}")
print(f"Rookies (no T-1 stats - ROY candidates): {len(test_rookies)}")


Eligible for prediction (have T-1 stats): 102
Rookies (no T-1 stats - ROY candidates): 54


In [58]:
# ===============================================================
# (13) MVP Predictions
# ===============================================================
print("\n" + "="*70)
print("MVP PREDICTIONS")
print("="*70)

# Ensure all features exist
for feat in features_mvp:
    if feat not in test_eligible.columns:
        test_eligible[feat] = 0

X_mvp = test_eligible[features_mvp].fillna(0)
test_eligible['mvp_proba'] = model_mvp.predict_proba(X_mvp)[:, 1]
test_eligible['mvp_rank'] = test_eligible['mvp_proba'].rank(ascending=False, method='first')

print("\nüìä Top 10 MVP Candidates:")
top_mvp = test_eligible.nsmallest(10, 'mvp_rank')[[
    'playerID', 'tmID', 'ppg_prev', 'efficiency_prev', 'player_rating_prev', 
    'win_pct_prev', 'mvp_proba', 'mvp_rank'
]]
print(top_mvp.to_string(index=False))

mvp_winner = test_eligible.loc[test_eligible['mvp_rank'] == 1]
print(f"\nüèÜ Predicted MVP: {mvp_winner['playerID'].values[0]} ({mvp_winner['tmID'].values[0]})")
print(f"   Based on Y-1 stats: {mvp_winner['ppg_prev'].values[0]:.1f} PPG, {mvp_winner['player_rating_prev'].values[0]:.1f} rating")


MVP PREDICTIONS

üìä Top 10 MVP Candidates:
  playerID tmID  ppg_prev  efficiency_prev  player_rating_prev  win_pct_prev  mvp_proba  mvp_rank
tauradi01w  PHO 20.354839        29.516129           36.768379      0.676471   0.274470       1.0
jacksla01w  SEA 19.192308        28.576923           35.422526      0.588235   0.134837       2.0
powelni01w  NYL 16.676471        24.029412           30.067227      0.382353   0.084661       3.0
catchta01w  IND 15.058824        26.264706           35.212500      0.647059   0.059764       4.0
jonesas01w  CON 16.695652        24.347826           27.964875      0.470588   0.054545       5.0
parkeca01w  LAS 13.120000        25.960000           33.178378      0.529412   0.044935       6.0
youngso01w  SAS 18.181818        26.272727           30.125326      0.441176   0.029992       7.0
anosini01w  WAS 13.200000        24.900000           36.498283      0.470588   0.029890       8.0
dupreca01w  PHO 15.705882        25.647059           29.669067      0.67

In [59]:
# ===============================================================
# (14) DPOY Predictions
# ===============================================================
print("\n" + "="*70)
print("DPOY PREDICTIONS")
print("="*70)

for feat in features_dpoy:
    if feat not in test_eligible.columns:
        test_eligible[feat] = 0

X_dpoy = test_eligible[features_dpoy].fillna(0)
test_eligible['dpoy_proba'] = model_dpoy.predict_proba(X_dpoy)[:, 1]
test_eligible['dpoy_rank'] = test_eligible['dpoy_proba'].rank(ascending=False, method='first')

print("\nüìä Top 10 DPOY Candidates:")
top_dpoy = test_eligible.nsmallest(10, 'dpoy_rank')[[
    'playerID', 'tmID', 'spg_prev', 'bpg_prev', 'rpg_prev', 'dpoy_proba', 'dpoy_rank'
]]
print(top_dpoy.to_string(index=False))

dpoy_winner = test_eligible.loc[test_eligible['dpoy_rank'] == 1]
print(f"\nüèÜ Predicted DPOY: {dpoy_winner['playerID'].values[0]} ({dpoy_winner['tmID'].values[0]})")


DPOY PREDICTIONS



üìä Top 10 DPOY Candidates:
  playerID tmID  spg_prev  bpg_prev  rpg_prev  dpoy_proba  dpoy_rank
catchta01w  IND  2.911765  0.529412  7.205882    0.314490        1.0
tauradi01w  PHO  1.161290  1.387097  5.741935    0.274341        2.0
jacksla01w  SEA  1.461538  1.730769  7.000000    0.259342        3.0
parkeca01w  LAS  0.600000  2.120000  9.760000    0.104869        4.0
lyttlsa01w  ATL  2.000000  0.647059  7.470588    0.099967        5.0
anosini01w  WAS  2.700000  0.933333  7.400000    0.069931        6.0
mccouan01w  ATL  2.176471  0.352941  3.088235    0.030000        7.0
beviltu01w  SAS  1.823529  0.235294  2.323529    0.025000        8.0
perkiji01w  SAS  2.147059  0.176471  3.411765    0.020000        9.0
 cashsw01w  SEA  0.875000  0.500000  6.687500    0.005000       10.0

üèÜ Predicted DPOY: catchta01w (IND)


In [60]:
# ===============================================================
# (15) MIP Predictions
# ===============================================================
print("\n" + "="*70)
print("MIP PREDICTIONS")
print("="*70)

test_mip = test_eligible[
    (test_eligible['years_experience'] >= 0)
].copy()

if len(test_mip) > 0:
    for feat in features_mip:
        if feat not in test_mip.columns:
            test_mip[feat] = 0
    
    X_mip = test_mip[features_mip].fillna(0)
    test_mip['mip_proba'] = model_mip.predict_proba(X_mip)[:, 1]
    test_mip['mip_rank'] = test_mip['mip_proba'].rank(ascending=False, method='first')
    
    print("\nüìä Top 10 MIP Candidates:")
    top_mip = test_mip.nsmallest(10, 'mip_rank')[[
        'playerID', 'tmID', 'ppg_prev', 'ppg_improvement', 'rating_improvement', 'mip_proba', 'mip_rank'
    ]]
    print(top_mip.to_string(index=False))
    
    mip_winner = test_mip.loc[test_mip['mip_rank'] == 1]
    print(f"\nüèÜ Predicted MIP: {mip_winner['playerID'].values[0]} ({mip_winner['tmID'].values[0]})")
    print(f"   PPG improvement: {mip_winner['ppg_improvement'].values[0]:.1f}")
else:
    print("‚ö†Ô∏è No MIP-eligible players found")


MIP PREDICTIONS

üìä Top 10 MIP Candidates:
  playerID tmID  ppg_prev  ppg_improvement  rating_improvement  mip_proba  mip_rank
parisco01w  ATL  4.848485         0.000000            0.000000   0.280895       1.0
jacksti02w  TUL  5.294118        -3.025882           -3.468230   0.180696       2.0
januabr01w  IND  6.878788         0.000000            0.000000   0.160330       3.0
 snowmi01w  CHI  5.441176        -4.441176           -1.384327   0.105520       4.0
philler01w  IND  8.062500         0.000000            0.000000   0.103842       5.0
montgre01w  CON  9.000000         0.000000            0.000000   0.100237       6.0
holliqu01w  NYL  4.823529         0.000000            0.000000   0.079321       7.0
ajavoma01w  WAS  8.000000        -0.029412            0.214526   0.069380       8.0
colemma01w  WAS  6.107143         0.000000            0.000000   0.049414       9.0
walkede01w  CON  8.558824         0.000000            0.000000   0.045409      10.0

üèÜ Predicted MIP: parisco01

In [61]:
# ===============================================================
# (16) FMVP Predictions
# ===============================================================
print("\n" + "="*70)
print("FMVP PREDICTIONS")
print("="*70)
print("Note: FMVP is predicted based on previous playoff performance.")
print("Actual FMVP depends on upcoming season's playoffs.")

# Filter to players with playoff experience
test_fmvp = test_eligible[
    (test_eligible['made_playoffs_prev'] == 1) &
    (test_eligible['PostGP_prev'] > 0)
].copy()

if len(test_fmvp) > 0:
    for feat in features_fmvp:
        if feat not in test_fmvp.columns:
            test_fmvp[feat] = 0
    
    X_fmvp = test_fmvp[features_fmvp].fillna(0)
    test_fmvp['fmvp_proba'] = model_fmvp.predict_proba(X_fmvp)[:, 1]
    test_fmvp['fmvp_rank'] = test_fmvp['fmvp_proba'].rank(ascending=False, method='first')
    
    print("\nüìä Top 10 FMVP Candidates:")
    top_fmvp = test_fmvp.nsmallest(10, 'fmvp_rank')[[
        'playerID', 'tmID', 'PostPoints_prev', 'PostRebounds_prev', 'playoff_wins_prev', 'fmvp_proba', 'fmvp_rank'
    ]]
    print(top_fmvp.to_string(index=False))
    
    fmvp_winner = test_fmvp.loc[test_fmvp['fmvp_rank'] == 1]
    print(f"\nüèÜ Predicted FMVP: {fmvp_winner['playerID'].values[0]} ({fmvp_winner['tmID'].values[0]})")
else:
    print("‚ö†Ô∏è No FMVP-eligible players found")


FMVP PREDICTIONS
Note: FMVP is predicted based on previous playoff performance.
Actual FMVP depends on upcoming season's playoffs.

üìä Top 10 FMVP Candidates:
  playerID tmID  PostPoints_prev  PostRebounds_prev  playoff_wins_prev  fmvp_proba  fmvp_rank
parkeca01w  LAS            108.0               64.0                3.0    0.187312        1.0
youngso01w  SAS             58.0               16.0                1.0    0.176013        2.0
douglka01w  IND            155.0               35.0                6.0    0.171345        3.0
tauradi01w  PHO            245.0               65.0                7.0    0.163124        4.0
catchta01w  IND            172.0              104.0                6.0    0.097899        5.0
willile01w  SEA             90.0               48.0                1.0    0.058230        6.0
pricear01w  ATL             10.0                6.0                0.0    0.055000        7.0
johnste01w  PHO             83.0               21.0                7.0    0.045000    

In [62]:
# ===============================================================
# (16b) ASG_MVP Predictions (All-Star Game MVP)
# ===============================================================
print("\n" + "="*70)
print("ASG_MVP PREDICTIONS (All-Star Game MVP)")
print("="*70)
print("Note: Filtering to elite players only (top 10% by rating) as proxy for All-Star selection")

# Filter to elite players
test_asg = test_eligible[test_eligible['is_elite_prev'] == 1].copy()

if len(test_asg) > 0:
    for feat in features_asg_mvp:
        if feat not in test_asg.columns:
            test_asg[feat] = 0
    
    X_asg = test_asg[features_asg_mvp].fillna(0)
    test_asg['asg_mvp_proba'] = model_asg_mvp.predict_proba(X_asg)[:, 1]
    test_asg['asg_mvp_rank'] = test_asg['asg_mvp_proba'].rank(ascending=False, method='first')
    
    print(f"\nElite players (All-Star candidates): {len(test_asg)}")
    
    print("\nüìä Top 10 ASG_MVP Candidates:")
    top_asg = test_asg.nsmallest(10, 'asg_mvp_rank')[[
        'playerID', 'tmID', 'ppg_prev', 'player_rating_prev', 'asg_mvp_proba', 'asg_mvp_rank'
    ]]
    print(top_asg.to_string(index=False))
    
    asg_winner = test_asg.loc[test_asg['asg_mvp_rank'] == 1]
    print(f"\nüèÜ Predicted ASG_MVP: {asg_winner['playerID'].values[0]} ({asg_winner['tmID'].values[0]})")
    print(f"   Based on Y-1 stats: {asg_winner['ppg_prev'].values[0]:.1f} PPG, {asg_winner['player_rating_prev'].values[0]:.1f} rating")
else:
    print("‚ö†Ô∏è No elite players found for ASG_MVP prediction")
    test_asg = pd.DataFrame()


ASG_MVP PREDICTIONS (All-Star Game MVP)
Note: Filtering to elite players only (top 10% by rating) as proxy for All-Star selection

Elite players (All-Star candidates): 14

üìä Top 10 ASG_MVP Candidates:
  playerID tmID  ppg_prev  player_rating_prev  asg_mvp_proba  asg_mvp_rank
tauradi01w  PHO 20.354839           36.768379       0.143101           1.0
jacksla01w  SEA 19.192308           35.422526       0.078475           2.0
parkeca01w  LAS 13.120000           33.178378       0.064136           3.0
catchta01w  IND 15.058824           35.212500       0.049370           4.0
pondeca01w  NYL 19.058824           31.910711       0.028279           5.0
anosini01w  WAS 13.200000           36.498283       0.014775           6.0
braxtka01w  NYL  9.000000           32.226667       0.009604           7.0
bonnede01w  PHO 11.205882           32.054211       0.004910           8.0
lyttlsa01w  ATL 13.029412           34.019442       0.004866           9.0
desouer01w  ATL 11.823529           32.090041

In [63]:
# ===============================================================
# (17) ROY Predictions
# ===============================================================
print("\n" + "="*70)
print("ROY PREDICTIONS")
print("="*70)
print("Note: ROY uses biometrics, college reputation, and coach data.")
print("No T-1 stats available for rookies - predictions based on pre-season info only.")

# get test roster and identify rookies
test_roster = players_teams_test[['playerID', 'tmID']].drop_duplicates()

# get first year for each player from historical data to identify rookies
player_first_year_hist = players_teams_hist.groupby('playerID')['year'].min().reset_index()
player_first_year_hist.columns = ['playerID', 'first_year']

# a rookie is someone whose first year is the test_year (not in historical data)
# OR someone in test roster who has never appeared in historical data
test_roster = test_roster.merge(player_first_year_hist, on='playerID', how='left')

# players not in historical data are rookies
rookies_new = test_roster[test_roster['first_year'].isna()].copy()
# also check if any have first_year == test_year (edge case)
# if test_year is in historical data range, this won't apply
rookies_test = rookies_new[['playerID', 'tmID']].copy()

print(f"\nRookies identified (no prior history): {len(rookies_test)}")

if len(rookies_test) > 0:
    # build ROY features for rookies
    
    # calculate college scores
    past_roy_winners = awards_hist[(awards_hist['award'] == 'Rookie of the Year') & 
                                    (awards_hist['year'] < test_year)]['playerID'].tolist()
    past_roy_colleges = players[players['bioID'].isin(past_roy_winners)]['college'].value_counts()
    
    all_colleges = players['college'].unique()
    college_scores = {}
    for college in all_colleges:
        if college in past_roy_colleges.index:
            college_scores[college] = past_roy_colleges[college] + 1
        elif college and college != 'none' and college != '':
            college_scores[college] = 1
        else:
            college_scores[college] = 0
    
    # get coach history
    coaches_before_test = coaches_hist[coaches_hist['year'] < test_year]
    coach_history = coaches_before_test.groupby('coachID').agg({
        'won': 'sum',
        'lost': 'sum',
        'post_wins': 'sum'
    }).reset_index()
    coach_history['coach_career_win_pct'] = coach_history['won'] / (coach_history['won'] + coach_history['lost'] + 0.001)
    coach_history['coach_career_playoff_wins'] = coach_history['post_wins']
    
    # get team context from previous year
    teams_prev = teams_hist[teams_hist['year'] == prev_year].copy()
    teams_prev['win_pct'] = teams_prev['won'] / (teams_prev['won'] + teams_prev['lost'])
    teams_prev['conf_rank'] = teams_prev.groupby('confID')['win_pct'].rank(ascending=False)
    teams_prev['made_playoffs'] = teams_prev['playoff'].notna().astype(int)
    
    # get coach for each team in test year
    test_coaches = coaches_hist[coaches_hist['year'] == test_year][['tmID', 'coachID']].drop_duplicates()
    # fallback: if no test_year coaches, use prev_year coaches
    if len(test_coaches) == 0:
        test_coaches = coaches_hist[coaches_hist['year'] == prev_year][['tmID', 'coachID']].drop_duplicates()
    
    # build ROY test records
    roy_records = []
    for _, rookie_row in rookies_test.iterrows():
        player_id = rookie_row['playerID']
        tm_id = rookie_row['tmID']
        
        # get player biometric data
        player_bio = players[players['bioID'] == player_id]
        
        if len(player_bio) == 0:
            continue
        
        player_bio = player_bio.iloc[0]
        
        # get position encoding
        pos = str(player_bio['pos']) if pd.notna(player_bio['pos']) else ''
        is_guard = 1 if 'G' in pos else 0
        is_forward = 1 if 'F' in pos else 0
        is_center = 1 if 'C' in pos else 0
        
        # get college info
        college = player_bio['college'] if pd.notna(player_bio['college']) else ''
        college_score = college_scores.get(college, 0)
        has_college = 1 if college and college != 'none' and college != '' else 0
        
        # get coach info
        team_coach = test_coaches[test_coaches['tmID'] == tm_id]
        if len(team_coach) > 0:
            coach_id = team_coach.iloc[0]['coachID']
            coach_stats = coach_history[coach_history['coachID'] == coach_id]
            if len(coach_stats) > 0:
                coach_win_pct = coach_stats.iloc[0]['coach_career_win_pct']
                coach_playoff_wins = coach_stats.iloc[0]['coach_career_playoff_wins']
            else:
                coach_win_pct = 0.5
                coach_playoff_wins = 0
        else:
            coach_win_pct = 0.5
            coach_playoff_wins = 0
        
        # get team context from previous year
        team_row = teams_prev[teams_prev['tmID'] == tm_id]
        if len(team_row) > 0:
            team_row = team_row.iloc[0]
            team_win_pct = team_row['win_pct']
            team_conf_rank = team_row['conf_rank']
            team_made_playoffs = team_row['made_playoffs']
        else:
            team_win_pct = 0.5
            team_conf_rank = 6
            team_made_playoffs = 0
        
        roy_records.append({
            'playerID': player_id,
            'tmID': tm_id,
            'target_year': test_year,
            'height': player_bio['height'] if pd.notna(player_bio['height']) else 70,
            'weight': player_bio['weight'] if pd.notna(player_bio['weight']) else 160,
            'is_guard': is_guard,
            'is_forward': is_forward,
            'is_center': is_center,
            'college_score': college_score,
            'has_college': has_college,
            'coach_win_pct': coach_win_pct,
            'coach_playoff_wins': coach_playoff_wins,
            'team_win_pct': team_win_pct,
            'team_conf_rank': team_conf_rank,
            'team_made_playoffs': team_made_playoffs
        })
    
    test_roy = pd.DataFrame(roy_records)
    
    if len(test_roy) > 0:
        for feat in features_roy:
            if feat not in test_roy.columns:
                test_roy[feat] = 0
        
        X_roy = test_roy[features_roy].fillna(0)
        test_roy['roy_proba'] = model_roy.predict_proba(X_roy)[:, 1]
        test_roy['roy_rank'] = test_roy['roy_proba'].rank(ascending=False, method='first')
        
        print("\nüìä Top 10 ROY Candidates:")
        top_roy = test_roy.nsmallest(10, 'roy_rank')[[
            'playerID', 'tmID', 'height', 'weight', 'college_score', 
            'coach_win_pct', 'roy_proba', 'roy_rank'
        ]]
        print(top_roy.to_string(index=False))
        
        roy_winner = test_roy.loc[test_roy['roy_rank'] == 1]
        print(f"\nüèÜ Predicted ROY: {roy_winner['playerID'].values[0]} ({roy_winner['tmID'].values[0]})")
        print(f"   Height: {roy_winner['height'].values[0]}, Weight: {roy_winner['weight'].values[0]}")
        print(f"   College score: {roy_winner['college_score'].values[0]}, Coach win%: {roy_winner['coach_win_pct'].values[0]:.3f}")
    else:
        print("No rookie biometric data found")
        test_roy = pd.DataFrame()
else:
    print("No rookies identified in test roster")
    test_roy = pd.DataFrame()


ROY PREDICTIONS
Note: ROY uses biometrics, college reputation, and coach data.
No T-1 stats available for rookies - predictions based on pre-season info only.

Rookies identified (no prior history): 46

üìä Top 10 ROY Candidates:
  playerID tmID  height  weight  college_score  coach_win_pct  roy_proba  roy_rank
moorema01w  MIN    72.0     175              2       0.411753   0.254748       1.0
colsosy01w  NYL    68.0     140              1       0.473681   0.149090       2.0
robinda01w  SAS    69.0     125              1       0.483332   0.135737       3.0
brelaje01w  NYL    75.0     170              1       0.473681   0.129790       4.0
rileyan01w  TUL    65.0     136              1       0.500000   0.128393       5.0
bjorkan01w  CHI    72.0     166              3       0.411759   0.087388       6.0
hightal01w  CON    70.0     139              3       0.600838   0.081106       7.0
montgal01w  NYL    73.0     185              1       0.473681   0.078906       8.0
chestfe01w  NYL    75

In [64]:
# ===============================================================
# (18) COTY Predictions
# ===============================================================
print("\n" + "="*70)
print("COTY PREDICTIONS")
print("="*70)
print("Note: COTY uses T-1 team performance + coach career history ONLY.")

# Determine which coaches data to use
if coaches_test is not None:
    year_coaches = coaches_test.copy()
    print(f"Using test coaches data: {len(year_coaches)} coaches")
else:
    # Fall back to historical coaches for test_year if available
    year_coaches = coaches_hist[coaches_hist['year'] == test_year].copy()
    if len(year_coaches) == 0:
        year_coaches = coaches_hist[coaches_hist['year'] == prev_year].copy()
        print(f"Using prev_year coaches as fallback: {len(year_coaches)} coaches")
    else:
        print(f"Using historical coaches for test_year: {len(year_coaches)} coaches")

if len(year_coaches) > 0:
    # Build teams_success for T-1 and T-2 years
    teams_success_all = teams_hist[['year', 'tmID', 'won', 'lost', 'confID', 'playoff', 'o_pts', 'd_pts']].copy()
    teams_success_all['win_pct'] = teams_success_all['won'] / (teams_success_all['won'] + teams_success_all['lost'])
    teams_success_all['conf_rank'] = teams_success_all.groupby(['year', 'confID'])['win_pct'].rank(ascending=False)
    teams_success_all['made_playoffs'] = teams_success_all['playoff'].notna().astype(int)
    teams_success_all['GP'] = teams_success_all['won'] + teams_success_all['lost']
    teams_success_all['margin_per_game'] = (teams_success_all['o_pts'] - teams_success_all['d_pts']) / teams_success_all['GP']
    
    # T-1 teams (previous year - what we can see before the season)
    prev_year_teams = teams_success_all[teams_success_all['year'] == prev_year].copy()
    
    # T-2 teams (for improvement calculation)
    prev2_year_teams = teams_success_all[teams_success_all['year'] == prev_year - 1].copy()
    
    # Calculate coach career history using ONLY years BEFORE test_year
    coaches_before_test = coaches_hist[coaches_hist['year'] < test_year]
    coach_career = coaches_before_test.groupby('coachID').agg({
        'won': 'sum',
        'lost': 'sum',
        'post_wins': 'sum',
        'year': 'nunique'
    }).reset_index()
    coach_career.columns = ['coachID', 'career_wins', 'career_losses', 'career_playoff_wins', 'years_coaching']
    coach_career['career_win_pct'] = coach_career['career_wins'] / (coach_career['career_wins'] + coach_career['career_losses'] + 0.001)
    
    # Check if coach won COTY before
    awards_coach = awards_hist[awards_hist['award'] == coach_award]
    past_coty = awards_coach[awards_coach['year'] < test_year]['playerID'].tolist()
    
    # Build COTY test records
    coty_records = []
    for _, coach_row in year_coaches.iterrows():
        coach_id = coach_row['coachID']
        tm_id = coach_row['tmID']
        
        # T-1 team performance (previous season results - what we know before season starts)
        prev_team = prev_year_teams[prev_year_teams['tmID'] == tm_id]
        if len(prev_team) > 0:
            prev_team = prev_team.iloc[0]
            wins_prev = prev_team['won']
            win_pct_prev = prev_team['win_pct']
            conf_rank_prev = prev_team['conf_rank']
            made_playoffs_prev = prev_team['made_playoffs']
            margin_prev = prev_team['margin_per_game']
            # Get previous year coach playoff wins from coaches_hist
            prev_coach = coaches_before_test[(coaches_before_test['coachID'] == coach_id) & 
                                              (coaches_before_test['year'] == prev_year)]
            if len(prev_coach) > 0:
                playoff_wins_prev = prev_coach.iloc[0]['post_wins']
            else:
                playoff_wins_prev = 0
        else:
            wins_prev = 0
            win_pct_prev = 0.5
            conf_rank_prev = 6
            made_playoffs_prev = 0
            playoff_wins_prev = 0
            margin_prev = 0
        
        # T-1 vs T-2 improvement (team trajectory before season starts)
        prev2_team = prev2_year_teams[prev2_year_teams['tmID'] == tm_id]
        if len(prev2_team) > 0 and len(prev_team) > 0:
            prev2_win_pct = prev2_team.iloc[0]['win_pct']
            prev2_made_playoffs = prev2_team.iloc[0]['made_playoffs']
            win_improvement_prev = win_pct_prev - prev2_win_pct
            playoff_improvement_prev = made_playoffs_prev - prev2_made_playoffs
        else:
            win_improvement_prev = 0
            playoff_improvement_prev = 0
        
        # Coach career history
        coach_hist_row = coach_career[coach_career['coachID'] == coach_id]
        if len(coach_hist_row) > 0:
            career_win_pct = coach_hist_row.iloc[0]['career_win_pct']
            career_playoff_wins = coach_hist_row.iloc[0]['career_playoff_wins']
            years_coaching = coach_hist_row.iloc[0]['years_coaching']
        else:
            career_win_pct = 0.5
            career_playoff_wins = 0
            years_coaching = 0
        
        won_coty_before = 1 if coach_id in past_coty else 0
        
        coty_records.append({
            'coachID': coach_id,
            'tmID': tm_id,
            'target_year': test_year,
            # T-1 team performance
            'wins_prev': wins_prev,
            'win_pct_prev': win_pct_prev,
            'conf_rank_prev': conf_rank_prev,
            'made_playoffs_prev': made_playoffs_prev,
            'playoff_wins_prev': playoff_wins_prev,
            'margin_prev': margin_prev,
            # Team trajectory (T-1 vs T-2)
            'win_improvement_prev': win_improvement_prev,
            'playoff_improvement_prev': playoff_improvement_prev,
            # Coach career history
            'career_win_pct': career_win_pct,
            'career_playoff_wins': career_playoff_wins,
            'years_coaching': years_coaching,
            'won_coty_before': won_coty_before
        })
    
    test_coty = pd.DataFrame(coty_records)
    
    if len(test_coty) > 0:
        for feat in features_coty:
            if feat not in test_coty.columns:
                test_coty[feat] = 0
        
        X_coty = test_coty[features_coty].fillna(0)
        test_coty['coty_proba'] = model_coty.predict_proba(X_coty)[:, 1]
        test_coty['coty_rank'] = test_coty['coty_proba'].rank(ascending=False, method='first')
        
        print("\nüìä Top 10 COTY Candidates:")
        top_coty = test_coty.nsmallest(10, 'coty_rank')[[
            'coachID', 'tmID', 'wins_prev', 'win_pct_prev', 'win_improvement_prev', 
            'career_win_pct', 'coty_proba', 'coty_rank'
        ]]
        print(top_coty.to_string(index=False))
        
        coty_winner = test_coty.loc[test_coty['coty_rank'] == 1]
        print(f"\nüèÜ Predicted COTY: {coty_winner['coachID'].values[0]} ({coty_winner['tmID'].values[0]})")
        print(f"   Team T-1: Wins={coty_winner['wins_prev'].values[0]}, Win%={coty_winner['win_pct_prev'].values[0]:.3f}")
        print(f"   Team improvement (T-1 vs T-2): {coty_winner['win_improvement_prev'].values[0]:.3f}")
    else:
        print("No COTY candidates found")
        test_coty = pd.DataFrame()
else:
    print("No coaches data available")
    test_coty = pd.DataFrame()


COTY PREDICTIONS
Note: COTY uses T-1 team performance + coach career history ONLY.
Using test coaches data: 12 coaches

üìä Top 10 COTY Candidates:
   coachID tmID  wins_prev  win_pct_prev  win_improvement_prev  career_win_pct  coty_proba  coty_rank
hugheda99w  SAS         15      0.441176             -0.264706        0.483332    0.189231        1.0
gaineco01w  PHO         23      0.676471              0.205882        0.573521    0.178177        2.0
meadoma99w  ATL         18      0.529412              0.411765        0.323525    0.126311        3.0
whisejo99w  NYL         13      0.382353             -0.176471        0.611506    0.095632        4.0
thibami99w  CON         16      0.470588             -0.147059        0.600838    0.087861        5.0
dunnli99wc  IND         22      0.647059              0.147059        0.439022    0.080009        6.0
laceytr99w  WAS         16      0.470588              0.176471        0.402170    0.069632        7.0
gilloje01w  LAS         18      0.

In [None]:
# ===============================================================
# (18) Save predictions
# ===============================================================
print("\n" + "="*70)
print("SAVING PREDICTIONS")
print("="*70)

# Save MVP predictions
mvp_cols = ['playerID', 'tmID', 'target_year', 'ppg_prev', 'efficiency_prev', 
            'player_rating_prev', 'win_pct_prev', 'mvp_proba', 'mvp_rank']
test_eligible[mvp_cols].to_csv("c/predictions_mvp_test.csv", index=False)
print(f"MVP predictions saved: c/predictions_mvp_test.csv")

# Save DPOY predictions
dpoy_cols = ['playerID', 'tmID', 'target_year', 'spg_prev', 'bpg_prev', 
             'rpg_prev', 'drpg_prev', 'def_rating_prev', 'dpoy_proba', 'dpoy_rank']
test_eligible[dpoy_cols].to_csv("c/predictions_dpoy_test.csv", index=False)
print(f"DPOY predictions saved: c/predictions_dpoy_test.csv")

# Save MIP predictions
if len(test_mip) > 0 and 'mip_rank' in test_mip.columns:
    mip_cols = ['playerID', 'tmID', 'target_year', 'ppg_prev', 'ppg_improvement', 
                'rating_improvement', 'years_experience', 'mip_proba', 'mip_rank']
    test_mip[mip_cols].to_csv("c/predictions_mip_test.csv", index=False)
    print(f"MIP predictions saved: c/predictions_mip_test.csv")

# Save FMVP predictions
if len(test_fmvp) > 0 and 'fmvp_rank' in test_fmvp.columns:
    fmvp_cols = ['playerID', 'tmID', 'target_year', 'PostPoints_prev', 'PostRebounds_prev', 
                 'playoff_wins_prev', 'won_championship_prev', 'fmvp_proba', 'fmvp_rank']
    test_fmvp[fmvp_cols].to_csv("c/predictions_fmvp_test.csv", index=False)
    print(f"FMVP predictions saved: c/predictions_fmvp_test.csv")

# Save ASG_MVP predictions
if len(test_asg) > 0 and 'asg_mvp_rank' in test_asg.columns:
    asg_cols = ['playerID', 'tmID', 'target_year', 'ppg_prev', 'player_rating_prev', 
                'is_elite_prev', 'asg_mvp_proba', 'asg_mvp_rank']
    test_asg[asg_cols].to_csv("c/predictions_asg_mvp_test.csv", index=False)
    print(f"ASG_MVP predictions saved: c/predictions_asg_mvp_test.csv")

# Save ROY predictions
if len(test_roy) > 0 and 'roy_rank' in test_roy.columns:
    roy_cols = ['playerID', 'tmID', 'target_year', 'height', 'weight', 'college_score', 
                'coach_win_pct', 'team_win_pct', 'roy_proba', 'roy_rank']
    test_roy[roy_cols].to_csv("c/predictions_roy_test.csv", index=False)
    print(f"ROY predictions saved: c/predictions_roy_test.csv")

# Save COTY predictions
if len(test_coty) > 0 and 'coty_rank' in test_coty.columns:
    coty_cols = ['coachID', 'tmID', 'target_year', 'wins_prev', 'win_pct_prev', 
                 'win_improvement_prev', 'career_win_pct', 'coty_proba', 'coty_rank']
    test_coty[coty_cols].to_csv("c/predictions_coty_test.csv", index=False)
    print(f"COTY predictions saved: c/predictions_coty_test.csv")

# Summary
summary = {
    'Award': ['MVP', 'DPOY'],
    'Predicted_Winner': [
        test_eligible.loc[test_eligible['mvp_rank'] == 1, 'playerID'].values[0],
        test_eligible.loc[test_eligible['dpoy_rank'] == 1, 'playerID'].values[0]
    ],
    'Team': [
        test_eligible.loc[test_eligible['mvp_rank'] == 1, 'tmID'].values[0],
        test_eligible.loc[test_eligible['dpoy_rank'] == 1, 'tmID'].values[0]
    ]
}

if len(test_mip) > 0 and 'mip_rank' in test_mip.columns:
    summary['Award'].append('MIP')
    summary['Predicted_Winner'].append(test_mip.loc[test_mip['mip_rank'] == 1, 'playerID'].values[0])
    summary['Team'].append(test_mip.loc[test_mip['mip_rank'] == 1, 'tmID'].values[0])

if len(test_fmvp) > 0 and 'fmvp_rank' in test_fmvp.columns:
    summary['Award'].append('FMVP')
    summary['Predicted_Winner'].append(test_fmvp.loc[test_fmvp['fmvp_rank'] == 1, 'playerID'].values[0])
    summary['Team'].append(test_fmvp.loc[test_fmvp['fmvp_rank'] == 1, 'tmID'].values[0])

if len(test_asg) > 0 and 'asg_mvp_rank' in test_asg.columns:
    summary['Award'].append('ASG_MVP')
    summary['Predicted_Winner'].append(test_asg.loc[test_asg['asg_mvp_rank'] == 1, 'playerID'].values[0])
    summary['Team'].append(test_asg.loc[test_asg['asg_mvp_rank'] == 1, 'tmID'].values[0])

if len(test_roy) > 0 and 'roy_rank' in test_roy.columns:
    summary['Award'].append('ROY')
    summary['Predicted_Winner'].append(test_roy.loc[test_roy['roy_rank'] == 1, 'playerID'].values[0])
    summary['Team'].append(test_roy.loc[test_roy['roy_rank'] == 1, 'tmID'].values[0])

if len(test_coty) > 0 and 'coty_rank' in test_coty.columns:
    summary['Award'].append('COTY')
    summary['Predicted_Winner'].append(test_coty.loc[test_coty['coty_rank'] == 1, 'coachID'].values[0])
    summary['Team'].append(test_coty.loc[test_coty['coty_rank'] == 1, 'tmID'].values[0])

summary_df = pd.DataFrame(summary)
summary_df.to_csv("c/predictions_summary.csv", index=False)
print(f"‚úÖ Summary saved: c/predictions_summary.csv")

print("\n" + "="*70)
print(f"AWARD PREDICTIONS FOR YEAR {test_year}")
print("="*70)
print(summary_df.to_string(index=False))


SAVING PREDICTIONS
Full predictions saved: c/predictions_test.csv


ROY predictions saved: c/predictions_roy_test.csv
ASG_MVP predictions saved: c/predictions_asg_mvp_test.csv
COTY predictions saved: c/predictions_coty_test.csv
‚úÖ Summary saved: c/predictions_summary.csv

AWARD PREDICTIONS FOR YEAR 11
  Award Predicted_Winner Team
    MVP       tauradi01w  PHO
   DPOY       catchta01w  IND
    MIP       parisco01w  ATL
   FMVP       parkeca01w  LAS
ASG_MVP       tauradi01w  PHO
    ROY       moorema01w  MIN
   COTY       hugheda99w  SAS


In [66]:
# ===============================================================
# (19) Summary
# ===============================================================
print("\n" + "="*70)
print("TESTING COMPLETE")
print("="*70)

print(f"\nüìä Prediction Approach:")
print(f"   - Predicting awards for year {test_year}")
print(f"   - Using stats from year {prev_year} (T-1)")
print(f"   - Test roster used ONLY to identify players/teams")
print(f"   - No future data used - predictions can be made before season starts")
print(f"   - ROY uses biometrics, college reputation, and coach data")
print(f"   - COTY uses current year team stats + coach career history")
print(f"   - ASG_MVP filters to elite players only (top 10% by rating)")

print(f"\nüìÅ Output files in c/:")
for f in sorted(os.listdir('c')):
    if 'prediction' in f:
        print(f"  - {f}")

print("\n" + "="*70)


TESTING COMPLETE

üìä Prediction Approach:
   - Predicting awards for year 11
   - Using stats from year 10 (T-1)
   - Test roster used ONLY to identify players/teams
   - No future data used - predictions can be made before season starts
   - ROY uses biometrics, college reputation, and coach data
   - COTY uses current year team stats + coach career history
   - ASG_MVP filters to elite players only (top 10% by rating)

üìÅ Output files in c/:
  - predictions_asg_mvp_test.csv
  - predictions_coty_test.csv
  - predictions_roy_test.csv
  - predictions_summary.csv
  - predictions_test.csv

