In [8]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score, classification_report
from joblib import Parallel, delayed
import pickle
import time

# Load data
print("Loading data...")
df = pd.read_csv('men_2026_matchups_training.csv')

# Filter for Sweet 16 and Elite Eight rounds only
print(f"\nTotal samples in dataset: {len(df)}")
df_filtered = df[df['round'].isin(['Sweet 16', 'Elite Eight'])].copy()
print(f"Sweet 16 + Elite Eight samples: {len(df_filtered)}")

# 30 candidate features
candidate_features = [
    '3man_bpm',
    'wab',
    'kenpom_rtg',
    'rotation_balance',
    'assist_to_usage_ratio',
    '5man_bpm',
    '5man_dbpm',
    'def_experience_impact',
    '3man_dbpm',
    '3man_obpm',
    '5man_obpm',
    'torvik_def',
    'kenpom_def',
    'experience_weighted_production',
    'def_four_factors_composite',
    'efgd_pct',
    'torvik_rtg',
    'def_3pt_fg_pct',
    'def_effective_possession_rate',
    'def_lineup_depth_quality',
    '3pd_pct',
    'torvik_off',
    'kenpom_off',
    'four_factors_composite',
    '2pd_pct',
    'lineup_depth_quality',
    'astd_pct',
    'elite_outcome_probability',
    'def_assist_suppression',
    '3man_prpg'
]

# Check if features exist
missing = [f for f in candidate_features if f not in df_filtered.columns]
if missing:
    print(f"\n Warning: Missing features: {missing}")
    candidate_features = [f for f in candidate_features if f in df_filtered.columns]

print(f"\nUsing {len(candidate_features)} candidate features")

# Prepare data
X = df_filtered[candidate_features].copy()
y = df_filtered['win'].copy()

# Handle missing values
print(f"Missing values: {X.isnull().sum().sum()}")
X = X.fillna(X.median())

# Train/test split (40 train, 20 test)
print("\n" + "="*70)
print("TRAIN/TEST SPLIT")
print("="*70)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=.30, random_state=42, stratify=y
)
print(f"Training samples: {len(X_train)}")
print(f"Test samples: {len(X_test)}")

# Standardize
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print("\n" + "="*70)
print("GREEDY FORWARD SELECTION WITH EXHAUSTIVE SEARCH")
print("="*70)
print("At each stage:")
print("  1. Test ALL remaining features added to current set")
print("  2. Pick the ONE that gives best CV score")
print("  3. Lock it in and move to next stage")
print("  4. Save results after each stage")
print("\n" + "="*70)

# Function to evaluate adding one feature
def evaluate_feature_addition(new_feature_idx, selected_indices, X_train_scaled,
                               X_test_scaled, y_train, y_test):
    """Evaluate adding one feature to the current selected set"""

    # Combine selected + new feature
    feature_indices = selected_indices + [new_feature_idx]

    # Get subset of features
    X_train_subset = X_train_scaled[:, feature_indices]
    X_test_subset = X_test_scaled[:, feature_indices]

    # Train logistic regression
    lr = LogisticRegression(max_iter=1000, random_state=42, solver='liblinear')

    # Cross-validation on training set
    cv_scores = cross_val_score(lr, X_train_subset, y_train, cv=3, scoring='accuracy')
    avg_cv_score = cv_scores.mean()

    # Test score
    lr.fit(X_train_subset, y_train)
    train_score = lr.score(X_train_subset, y_train)
    test_score = lr.score(X_test_subset, y_test)

    return {
        'feature_idx': new_feature_idx,
        'cv_score': avg_cv_score,
        'train_score': train_score,
        'test_score': test_score
    }

# Initialize
selected_features = []  # Indices of selected features
remaining_features = list(range(len(candidate_features)))  # Indices of remaining features
stage_results = []

start_time = time.time()

# Forward selection loop
for stage in range(1, len(candidate_features) + 1):
    print(f"\n{'='*70}")
    print(f"STAGE {stage}: Adding feature #{stage}")
    print(f"{'='*70}")
    print(f"Currently selected: {len(selected_features)} features")
    print(f"Remaining to test: {len(remaining_features)} features")

    if len(remaining_features) == 0:
        break

    stage_start = time.time()

    # Test all remaining features in parallel
    print(f"Testing all {len(remaining_features)} remaining features...")

    results = Parallel(n_jobs=-1, verbose=1)(
        delayed(evaluate_feature_addition)(
            feature_idx, selected_features, X_train_scaled,
            X_test_scaled, y_train, y_test
        )
        for feature_idx in remaining_features
    )

    # Find best feature to add
    best_result = max(results, key=lambda x: x['cv_score'])
    best_feature_idx = best_result['feature_idx']
    best_feature_name = candidate_features[best_feature_idx]

    # Add to selected set
    selected_features.append(best_feature_idx)
    remaining_features.remove(best_feature_idx)

    stage_time = time.time() - stage_start

    # Display results
    print(f"\n✓ Stage {stage} completed in {stage_time:.1f} seconds")
    print(f"\n BEST FEATURE TO ADD: {best_feature_name}")
    print(f"   CV Score: {best_result['cv_score']:.4f}")
    print(f"   Train Score: {best_result['train_score']:.4f}")
    print(f"   Test Score: {best_result['test_score']:.4f}")

    print(f"\n Current feature set ({len(selected_features)} features):")
    for i, idx in enumerate(selected_features, 1):
        marker = "★" if i == len(selected_features) else " "
        print(f"   {marker} {i:2d}. {candidate_features[idx]}")

    # Store stage result
    stage_results.append({
        'stage': stage,
        'n_features': len(selected_features),
        'added_feature': best_feature_name,
        'features': [candidate_features[idx] for idx in selected_features],
        'cv_score': best_result['cv_score'],
        'train_score': best_result['train_score'],
        'test_score': best_result['test_score']
    })

    # SAVE AFTER EACH STAGE
    stage_df = pd.DataFrame(stage_results)
    stage_df.to_csv('forward_selection_results.csv', index=False)

    # Save readable summary
    with open('forward_selection_summary.txt', 'w') as f:
        f.write("FORWARD SELECTION RESULTS\n")
        f.write("="*70 + "\n\n")
        f.write(f"Completed {stage}/{len(candidate_features)} stages\n\n")

        for result in stage_results:
            f.write(f"\nStage {result['stage']}: {result['n_features']} features\n")
            f.write(f"  Added: {result['added_feature']}\n")
            f.write(f"  CV Score: {result['cv_score']:.4f}\n")
            f.write(f"  Train Score: {result['train_score']:.4f}\n")
            f.write(f"  Test Score: {result['test_score']:.4f}\n")
            f.write(f"  Features: {result['features']}\n")

        f.write(f"\n{'='*70}\n")
        f.write(f"CURRENT BEST (Stage {stage}):\n")
        f.write(f"CV Score: {best_result['cv_score']:.4f}\n")
        f.write(f"Features ({len(selected_features)}):\n")
        for feat_idx in selected_features:
            f.write(f"  - {candidate_features[feat_idx]}\n")

    print(f"\n Results saved to:")
    print(f"   - forward_selection_results.csv")
    print(f"   - forward_selection_summary.txt")

total_time = time.time() - start_time

# ============================================================================
# FINAL SUMMARY
# ============================================================================
print("\n" + "="*70)
print("FORWARD SELECTION COMPLETE!")
print("="*70)

print(f"\nTotal time: {total_time/60:.1f} minutes")
print(f"Stages completed: {len(stage_results)}")

# Find best stage by CV score
best_stage = max(stage_results, key=lambda x: x['cv_score'])

print(f"\n BEST MODEL:")
print(f"   Stage: {best_stage['stage']}")
print(f"   Features: {best_stage['n_features']}")
print(f"   CV Score: {best_stage['cv_score']:.4f}")
print(f"   Train Score: {best_stage['train_score']:.4f}")
print(f"   Test Score: {best_stage['test_score']:.4f}")
print(f"\n   Selected features:")
for i, feat in enumerate(best_stage['features'], 1):
    print(f"      {i:2d}. {feat}")

# Show progression
print(f"\n SCORE PROGRESSION:")
print(f"{'Stage':>6} {'Features':>9} {'CV Score':>10} {'Train':>8} {'Test':>8}")
print("-" * 50)
for result in stage_results:
    print(f"{result['stage']:6d} {result['n_features']:9d} "
          f"{result['cv_score']:10.4f} {result['train_score']:8.4f} "
          f"{result['test_score']:8.4f}")

# Train final model with best feature set
best_feature_indices = [candidate_features.index(f) for f in best_stage['features']]
X_train_best = X_train_scaled[:, best_feature_indices]
X_test_best = X_test_scaled[:, best_feature_indices]

final_model = LogisticRegression(max_iter=1000, random_state=42, solver='liblinear')
final_model.fit(X_train_best, y_train)

# Save final model
with open('best_forward_selection_model.pkl', 'wb') as f:
    pickle.dump(final_model, f)

with open('best_forward_selection_scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)

print(f"\n Final model saved:")
print(f"   - best_forward_selection_model.pkl")
print(f"   - best_forward_selection_scaler.pkl")

print("\n" + "="*70)

Loading data...

Total samples in dataset: 1254
Sweet 16 + Elite Eight samples: 238

Using 30 candidate features
Missing values: 0

TRAIN/TEST SPLIT
Training samples: 166
Test samples: 72

GREEDY FORWARD SELECTION WITH EXHAUSTIVE SEARCH
At each stage:
  1. Test ALL remaining features added to current set
  2. Pick the ONE that gives best CV score
  3. Lock it in and move to next stage
  4. Save results after each stage


STAGE 1: Adding feature #1
Currently selected: 0 features
Remaining to test: 30 features
Testing all 30 remaining features...

✓ Stage 1 completed in 0.1 seconds

 BEST FEATURE TO ADD: 5man_bpm
   CV Score: 0.6393
   Train Score: 0.6325
   Test Score: 0.7083

 Current feature set (1 features):
   ★  1. 5man_bpm

 Results saved to:
   - forward_selection_results.csv
   - forward_selection_summary.txt

STAGE 2: Adding feature #2
Currently selected: 1 features
Remaining to test: 29 features
Testing all 29 remaining features...


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  30 out of  30 | elapsed:    0.1s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  29 out of  29 | elapsed:    0.1s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  28 out of  28 | elapsed:    0.1s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.



✓ Stage 2 completed in 0.1 seconds

 BEST FEATURE TO ADD: wab
   CV Score: 0.6455
   Train Score: 0.6506
   Test Score: 0.6944

 Current feature set (2 features):
      1. 5man_bpm
   ★  2. wab

 Results saved to:
   - forward_selection_results.csv
   - forward_selection_summary.txt

STAGE 3: Adding feature #3
Currently selected: 2 features
Remaining to test: 28 features
Testing all 28 remaining features...

✓ Stage 3 completed in 0.1 seconds

 BEST FEATURE TO ADD: torvik_rtg
   CV Score: 0.6755
   Train Score: 0.6867
   Test Score: 0.7083

 Current feature set (3 features):
      1. 5man_bpm
      2. wab
   ★  3. torvik_rtg

 Results saved to:
   - forward_selection_results.csv
   - forward_selection_summary.txt

STAGE 4: Adding feature #4
Currently selected: 3 features
Remaining to test: 27 features
Testing all 27 remaining features...


[Parallel(n_jobs=-1)]: Done  27 out of  27 | elapsed:    0.1s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  26 out of  26 | elapsed:    0.1s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 out of  25 | elapsed:    0.1s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.



✓ Stage 4 completed in 0.1 seconds

 BEST FEATURE TO ADD: elite_outcome_probability
   CV Score: 0.6877
   Train Score: 0.6807
   Test Score: 0.7083

 Current feature set (4 features):
      1. 5man_bpm
      2. wab
      3. torvik_rtg
   ★  4. elite_outcome_probability

 Results saved to:
   - forward_selection_results.csv
   - forward_selection_summary.txt

STAGE 5: Adding feature #5
Currently selected: 4 features
Remaining to test: 26 features
Testing all 26 remaining features...

✓ Stage 5 completed in 0.1 seconds

 BEST FEATURE TO ADD: 5man_dbpm
   CV Score: 0.6996
   Train Score: 0.6928
   Test Score: 0.7222

 Current feature set (5 features):
      1. 5man_bpm
      2. wab
      3. torvik_rtg
      4. elite_outcome_probability
   ★  5. 5man_dbpm

 Results saved to:
   - forward_selection_results.csv
   - forward_selection_summary.txt

STAGE 6: Adding feature #6
Currently selected: 5 features
Remaining to test: 25 features
Testing all 25 remaining features...

✓ Stage 6 complete

[Parallel(n_jobs=-1)]: Done  24 out of  24 | elapsed:    0.1s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  23 out of  23 | elapsed:    0.1s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  22 out of  22 | elapsed:    0.1s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.



✓ Stage 7 completed in 0.1 seconds

 BEST FEATURE TO ADD: four_factors_composite
   CV Score: 0.7235
   Train Score: 0.7048
   Test Score: 0.7083

 Current feature set (7 features):
      1. 5man_bpm
      2. wab
      3. torvik_rtg
      4. elite_outcome_probability
      5. 5man_dbpm
      6. 3man_prpg
   ★  7. four_factors_composite

 Results saved to:
   - forward_selection_results.csv
   - forward_selection_summary.txt

STAGE 8: Adding feature #8
Currently selected: 7 features
Remaining to test: 23 features
Testing all 23 remaining features...

✓ Stage 8 completed in 0.1 seconds

 BEST FEATURE TO ADD: lineup_depth_quality
   CV Score: 0.7175
   Train Score: 0.7108
   Test Score: 0.7083

 Current feature set (8 features):
      1. 5man_bpm
      2. wab
      3. torvik_rtg
      4. elite_outcome_probability
      5. 5man_dbpm
      6. 3man_prpg
      7. four_factors_composite
   ★  8. lineup_depth_quality

 Results saved to:
   - forward_selection_results.csv
   - forward_selection

[Parallel(n_jobs=-1)]: Done  21 out of  21 | elapsed:    0.1s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed:    0.1s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  19 out of  19 | elapsed:    0.1s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.



✓ Stage 10 completed in 0.1 seconds

 BEST FEATURE TO ADD: 3man_dbpm
   CV Score: 0.7115
   Train Score: 0.7108
   Test Score: 0.7361

 Current feature set (10 features):
      1. 5man_bpm
      2. wab
      3. torvik_rtg
      4. elite_outcome_probability
      5. 5man_dbpm
      6. 3man_prpg
      7. four_factors_composite
      8. lineup_depth_quality
      9. 3man_bpm
   ★ 10. 3man_dbpm

 Results saved to:
   - forward_selection_results.csv
   - forward_selection_summary.txt

STAGE 11: Adding feature #11
Currently selected: 10 features
Remaining to test: 20 features
Testing all 20 remaining features...

✓ Stage 11 completed in 0.1 seconds

 BEST FEATURE TO ADD: assist_to_usage_ratio
   CV Score: 0.7115
   Train Score: 0.6988
   Test Score: 0.7361

 Current feature set (11 features):
      1. 5man_bpm
      2. wab
      3. torvik_rtg
      4. elite_outcome_probability
      5. 5man_dbpm
      6. 3man_prpg
      7. four_factors_composite
      8. lineup_depth_quality
      9. 3man_b

[Parallel(n_jobs=-1)]: Done  18 out of  18 | elapsed:    0.1s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 out of  17 | elapsed:    0.1s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  16 out of  16 | elapsed:    0.1s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.



✓ Stage 13 completed in 0.1 seconds

 BEST FEATURE TO ADD: 3man_obpm
   CV Score: 0.7175
   Train Score: 0.6988
   Test Score: 0.7361

 Current feature set (13 features):
      1. 5man_bpm
      2. wab
      3. torvik_rtg
      4. elite_outcome_probability
      5. 5man_dbpm
      6. 3man_prpg
      7. four_factors_composite
      8. lineup_depth_quality
      9. 3man_bpm
     10. 3man_dbpm
     11. assist_to_usage_ratio
     12. 5man_obpm
   ★ 13. 3man_obpm

 Results saved to:
   - forward_selection_results.csv
   - forward_selection_summary.txt

STAGE 14: Adding feature #14
Currently selected: 13 features
Remaining to test: 17 features
Testing all 17 remaining features...

✓ Stage 14 completed in 0.1 seconds

 BEST FEATURE TO ADD: def_lineup_depth_quality
   CV Score: 0.7175
   Train Score: 0.6988
   Test Score: 0.7361

 Current feature set (14 features):
      1. 5man_bpm
      2. wab
      3. torvik_rtg
      4. elite_outcome_probability
      5. 5man_dbpm
      6. 3man_prpg
     

[Parallel(n_jobs=-1)]: Done  15 out of  15 | elapsed:    0.1s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  14 out of  14 | elapsed:    0.1s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  13 out of  13 | elapsed:    0.1s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  10 out of  12 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  12 out of  12 | elapsed:    0.1s finished



✓ Stage 16 completed in 0.1 seconds

 BEST FEATURE TO ADD: kenpom_off
   CV Score: 0.7174
   Train Score: 0.7108
   Test Score: 0.7361

 Current feature set (16 features):
      1. 5man_bpm
      2. wab
      3. torvik_rtg
      4. elite_outcome_probability
      5. 5man_dbpm
      6. 3man_prpg
      7. four_factors_composite
      8. lineup_depth_quality
      9. 3man_bpm
     10. 3man_dbpm
     11. assist_to_usage_ratio
     12. 5man_obpm
     13. 3man_obpm
     14. def_lineup_depth_quality
     15. torvik_off
   ★ 16. kenpom_off

 Results saved to:
   - forward_selection_results.csv
   - forward_selection_summary.txt

STAGE 17: Adding feature #17
Currently selected: 16 features
Remaining to test: 14 features
Testing all 14 remaining features...

✓ Stage 17 completed in 0.1 seconds

 BEST FEATURE TO ADD: astd_pct
   CV Score: 0.7054
   Train Score: 0.7289
   Test Score: 0.7222

 Current feature set (17 features):
      1. 5man_bpm
      2. wab
      3. torvik_rtg
      4. elite_outc

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   8 out of  11 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  11 out of  11 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   6 out of  10 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   4 out of   9 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   9 out of   9 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 out of   8 | elapsed:    0.0s remaining:    0.1s
[Parallel(n_jobs=-1)]: Done   8 out of   8 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.



✓ Stage 20 completed in 0.1 seconds

 BEST FEATURE TO ADD: 3pd_pct
   CV Score: 0.6751
   Train Score: 0.7108
   Test Score: 0.7222

 Current feature set (20 features):
      1. 5man_bpm
      2. wab
      3. torvik_rtg
      4. elite_outcome_probability
      5. 5man_dbpm
      6. 3man_prpg
      7. four_factors_composite
      8. lineup_depth_quality
      9. 3man_bpm
     10. 3man_dbpm
     11. assist_to_usage_ratio
     12. 5man_obpm
     13. 3man_obpm
     14. def_lineup_depth_quality
     15. torvik_off
     16. kenpom_off
     17. astd_pct
     18. 2pd_pct
     19. def_3pt_fg_pct
   ★ 20. 3pd_pct

 Results saved to:
   - forward_selection_results.csv
   - forward_selection_summary.txt

STAGE 21: Adding feature #21
Currently selected: 20 features
Remaining to test: 10 features
Testing all 10 remaining features...

✓ Stage 21 completed in 0.1 seconds

 BEST FEATURE TO ADD: kenpom_def
   CV Score: 0.6630
   Train Score: 0.7108
   Test Score: 0.7222

 Current feature set (21 featur

[Parallel(n_jobs=-1)]: Done   7 out of   7 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   6 out of   6 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 out of   5 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   4 out of   4 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 out of   2 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.



✓ Stage 25 completed in 0.0 seconds

 BEST FEATURE TO ADD: efgd_pct
   CV Score: 0.6510
   Train Score: 0.7289
   Test Score: 0.7222

 Current feature set (25 features):
      1. 5man_bpm
      2. wab
      3. torvik_rtg
      4. elite_outcome_probability
      5. 5man_dbpm
      6. 3man_prpg
      7. four_factors_composite
      8. lineup_depth_quality
      9. 3man_bpm
     10. 3man_dbpm
     11. assist_to_usage_ratio
     12. 5man_obpm
     13. 3man_obpm
     14. def_lineup_depth_quality
     15. torvik_off
     16. kenpom_off
     17. astd_pct
     18. 2pd_pct
     19. def_3pt_fg_pct
     20. 3pd_pct
     21. kenpom_def
     22. kenpom_rtg
     23. torvik_def
     24. def_assist_suppression
   ★ 25. efgd_pct

 Results saved to:
   - forward_selection_results.csv
   - forward_selection_summary.txt

STAGE 26: Adding feature #26
Currently selected: 25 features
Remaining to test: 5 features
Testing all 5 remaining features...

✓ Stage 26 completed in 0.0 seconds

 BEST FEATURE TO ADD:

In [9]:
# View summary
with open('forward_selection_summary.txt', 'r') as f:
    print(f.read())

# Or load CSV
import pandas as pd
results = pd.read_csv('forward_selection_results.csv')
print(results)

FORWARD SELECTION RESULTS

Completed 30/30 stages


Stage 1: 1 features
  Added: 5man_bpm
  CV Score: 0.6393
  Train Score: 0.6325
  Test Score: 0.7083
  Features: ['5man_bpm']

Stage 2: 2 features
  Added: wab
  CV Score: 0.6455
  Train Score: 0.6506
  Test Score: 0.6944
  Features: ['5man_bpm', 'wab']

Stage 3: 3 features
  Added: torvik_rtg
  CV Score: 0.6755
  Train Score: 0.6867
  Test Score: 0.7083
  Features: ['5man_bpm', 'wab', 'torvik_rtg']

Stage 4: 4 features
  Added: elite_outcome_probability
  CV Score: 0.6877
  Train Score: 0.6807
  Test Score: 0.7083
  Features: ['5man_bpm', 'wab', 'torvik_rtg', 'elite_outcome_probability']

Stage 5: 5 features
  Added: 5man_dbpm
  CV Score: 0.6996
  Train Score: 0.6928
  Test Score: 0.7222
  Features: ['5man_bpm', 'wab', 'torvik_rtg', 'elite_outcome_probability', '5man_dbpm']

Stage 6: 6 features
  Added: 3man_prpg
  CV Score: 0.6994
  Train Score: 0.7169
  Test Score: 0.7222
  Features: ['5man_bpm', 'wab', 'torvik_rtg', 'elite_outcome_

In [None]:
# Stage 9: 9 features
#  Added: 3man_bpm
#  CV Score: 0.7175
#  Train Score: 0.7108
#  Test Score: 0.7361

#  Features: [

# '5man_bpm',
# 'wab',
# 'torvik_rtg',
# 'elite_outcome_probability',
# '5man_dbpm',
# '3man_prpg',
# 'four_factors_composite',
# 'lineup_depth_quality',
# '3man_bpm']
