# MLB Win Prediction - Model Debugging

This notebook inspects the trained model, feature importances, calibration, and case studies.


In [None]:
import sys
from pathlib import Path

# Add src to path
project_root = Path().resolve().parent
sys.path.insert(0, str(project_root / "src"))

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
from sklearn.metrics import calibration_curve, roc_auc_score, log_loss
from mlb_win_pred.config import get_config
from mlb_win_pred.utils import get_data_path, get_model_path
from mlb_win_pred.dataset_builder import train_val_test_split

config = get_config()
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (12, 6)


In [None]:
# Load model
model_path = get_model_path(config, "win_model_xgb.pkl")
if not model_path.exists():
    model_path = get_model_path(config, "win_model_lr.pkl")

model_data = joblib.load(model_path)
model = model_data['model']
scaler = model_data.get('scaler')
feature_cols = model_data['feature_cols']
model_type = model_data.get('model_type', 'unknown')

print(f"Model type: {model_type}")
print(f"Number of features: {len(feature_cols)}")


## Feature Importances


In [None]:
# Feature importances (if available)
if hasattr(model, 'feature_importances_'):
    importances = model.feature_importances_
    indices = np.argsort(importances)[::-1]
    
    print("Top 20 Most Important Features:")
    for i in range(min(20, len(indices))):
        idx = indices[i]
        print(f"{i+1:2d}. {feature_cols[idx]:30s} {importances[idx]:.4f}")
    
    # Plot
    plt.figure(figsize=(10, 12))
    top_n = 20
    top_indices = indices[:top_n]
    plt.barh(range(top_n), importances[top_indices])
    plt.yticks(range(top_n), [feature_cols[i] for i in top_indices])
    plt.xlabel('Importance')
    plt.title(f'Top {top_n} Feature Importances')
    plt.gca().invert_yaxis()
    plt.tight_layout()
    plt.show()
else:
    print("Model does not have feature_importances_ attribute")


## Calibration Analysis


In [None]:
# Load test data
processed_file = get_data_path(config, "games_processed.csv", subdir="processed")
df = pd.read_csv(processed_file)
df['game_date'] = pd.to_datetime(df['game_date'])

# Get test set
_, _, test_df = train_val_test_split(df, config)
if test_df.empty:
    train_df, val_df, _ = train_val_test_split(df, config)
    test_df = val_df

# Prepare features
X_test = test_df[feature_cols].fillna(0).values
y_test = test_df['win'].values

# Make predictions
if scaler is not None:
    X_test_scaled = scaler.transform(X_test)
    y_pred_proba = model.predict_proba(X_test_scaled)[:, 1]
else:
    y_pred_proba = model.predict_proba(X_test)[:, 1]

print(f"Test set size: {len(y_test)}")
print(f"Test ROC-AUC: {roc_auc_score(y_test, y_pred_proba):.4f}")
print(f"Test Log Loss: {log_loss(y_test, y_pred_proba):.4f}")


In [None]:
# Calibration by deciles
deciles = np.percentile(y_pred_proba, np.arange(0, 101, 10))
decile_labels = []
decile_actual = []
decile_pred = []

for i in range(len(deciles) - 1):
    mask = (y_pred_proba >= deciles[i]) & (y_pred_proba < deciles[i+1])
    if i == len(deciles) - 2:  # Include upper bound for last decile
        mask = (y_pred_proba >= deciles[i]) & (y_pred_proba <= deciles[i+1])
    
    if mask.sum() > 0:
        decile_labels.append(f"{i*10}-{(i+1)*10}%")
        decile_actual.append(y_test[mask].mean())
        decile_pred.append(y_pred_proba[mask].mean())

calibration_df = pd.DataFrame({
    'decile': decile_labels,
    'actual_rate': decile_actual,
    'predicted_rate': decile_pred
})

print("Calibration by Deciles:")
print(calibration_df)

# Plot
plt.figure()
x = np.arange(len(decile_labels))
width = 0.35
plt.bar(x - width/2, decile_actual, width, label='Actual Win Rate', alpha=0.7)
plt.bar(x + width/2, decile_pred, width, label='Predicted Rate', alpha=0.7)
plt.xlabel('Predicted Probability Decile')
plt.ylabel('Rate')
plt.title('Calibration: Actual vs Predicted by Decile')
plt.xticks(x, decile_labels, rotation=45)
plt.legend()
plt.grid(True, alpha=0.3, axis='y')
plt.tight_layout()
plt.show()


## Case Studies


In [None]:
# Find some interesting games to analyze
test_df_with_pred = test_df.copy()
test_df_with_pred['pred_proba'] = y_pred_proba
test_df_with_pred['pred'] = (y_pred_proba >= 0.5).astype(int)
test_df_with_pred['correct'] = (test_df_with_pred['pred'] == test_df_with_pred['win']).astype(int)

# High confidence correct predictions
high_conf_correct = test_df_with_pred[
    ((test_df_with_pred['pred_proba'] > 0.8) | (test_df_with_pred['pred_proba'] < 0.2)) &
    (test_df_with_pred['correct'] == 1)
].head(5)

print("High Confidence Correct Predictions:")
for idx, row in high_conf_correct.iterrows():
    print(f"\n{row['team']} vs {row['opponent']} on {row['game_date'].date()}")
    print(f"  Predicted prob: {row['pred_proba']:.3f}")
    print(f"  Actual result: {'Win' if row['win'] == 1 else 'Loss'}")
    if 'runs_scored' in row:
        print(f"  Score: {row['runs_scored']} - {row['runs_allowed']}")


In [None]:
# High confidence incorrect predictions
high_conf_incorrect = test_df_with_pred[
    ((test_df_with_pred['pred_proba'] > 0.8) | (test_df_with_pred['pred_proba'] < 0.2)) &
    (test_df_with_pred['correct'] == 0)
].head(5)

print("High Confidence Incorrect Predictions:")
for idx, row in high_conf_incorrect.iterrows():
    print(f"\n{row['team']} vs {row['opponent']} on {row['game_date'].date()}")
    print(f"  Predicted prob: {row['pred_proba']:.3f}")
    print(f"  Actual result: {'Win' if row['win'] == 1 else 'Loss'}")
    if 'runs_scored' in row:
        print(f"  Score: {row['runs_scored']} - {row['runs_allowed']}")


In [None]:
# Prediction distribution by actual outcome
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

axes[0].hist(y_pred_proba[y_test == 0], bins=30, alpha=0.7, label='Losses', density=True)
axes[0].hist(y_pred_proba[y_test == 1], bins=30, alpha=0.7, label='Wins', density=True)
axes[0].set_xlabel('Predicted Probability')
axes[0].set_ylabel('Density')
axes[0].set_title('Prediction Distribution by Outcome')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# Calibration curve
prob_true, prob_pred = calibration_curve(y_test, y_pred_proba, n_bins=10)
axes[1].plot(prob_pred, prob_true, marker='o', label='Model')
axes[1].plot([0, 1], [0, 1], 'k--', label='Perfect Calibration')
axes[1].set_xlabel('Mean Predicted Probability')
axes[1].set_ylabel('Fraction of Positives')
axes[1].set_title('Calibration Curve')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()
