In [2]:
#!/usr/bin/env python
"""
Enhanced Binary Tumor Grade Classifier with Robust Validation
Classifies gliomas as High Grade (4) vs Low Grade (2&3)
"""

# ============= COMPLETE IMPORTS =============
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import joblib
from datetime import datetime

from sklearn.model_selection import (
    train_test_split, cross_val_score, StratifiedKFold, 
    GridSearchCV, cross_validate
)
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.utils.class_weight import compute_class_weight
from sklearn.feature_selection import SelectKBest, f_classif, RFECV
from sklearn.calibration import CalibratedClassifierCV, calibration_curve
from sklearn.utils import resample
from sklearn.metrics import (
    classification_report, confusion_matrix, roc_auc_score,
    roc_curve, accuracy_score, balanced_accuracy_score,
    precision_recall_curve, average_precision_score,
    f1_score, precision_score, recall_score, make_scorer
)

warnings.filterwarnings('ignore')
print("✓ All libraries imported successfully!")

# ============= CLINICAL FEATURE ENHANCEMENT FUNCTION =============
def enhance_clinical_features(df):
    """
    Create more informative clinical features based on domain knowledge
    This function should be called BEFORE one-hot encoding
    """
    print("\nEnhancing clinical features...")
    
    # 1. Create molecular subtype based on WHO 2021 classification
    def get_molecular_class(row):
        idh = str(row.get('idh_status', row.get('IDH', 'unknown'))).lower()
        p19q = str(row.get('1p19q', row.get('1p/19q', 'unknown'))).lower()
        
        if 'wildtype' in idh:
            return 'glioblastoma_IDHwt'
        elif 'co-deletion' in p19q and 'wildtype' not in idh:
            return 'oligodendroglioma_IDHmut_1p19q'
        elif idh not in ['unknown', 'missing', 'nan', ''] and 'wildtype' not in idh:
            return 'astrocytoma_IDHmut'
        else:
            return 'molecular_unknown'
    
    df['molecular_class'] = df.apply(get_molecular_class, axis=1)
    
    # 2. Age categories (clinically meaningful cutoffs)
    age_col = 'age_at_mri' if 'age_at_mri' in df.columns else 'Age at MRI'
    if age_col in df.columns:
        df['age_pediatric'] = (df[age_col] < 18).astype(int)
        df['age_young_adult'] = ((df[age_col] >= 18) & (df[age_col] < 40)).astype(int)
        df['age_middle'] = ((df[age_col] >= 40) & (df[age_col] < 55)).astype(int)
        df['age_older'] = ((df[age_col] >= 55) & (df[age_col] < 65)).astype(int)
        df['age_elderly'] = (df[age_col] >= 65).astype(int)
        
        # Age as risk factor (continuous)
        df['age_risk_score'] = df[age_col] / 100  # Normalize to 0-1 range
    
    # 3. Process MGMT index as ordinal (if available)
    mgmt_idx_col = 'mgmt_index' if 'mgmt_index' in df.columns else 'MGMT index'
    if mgmt_idx_col in df.columns:
        df['mgmt_index_numeric'] = pd.to_numeric(df[mgmt_idx_col], errors='coerce')
        df['mgmt_methylated_high'] = (df['mgmt_index_numeric'] > 10).astype(int)
        df['mgmt_methylated_low'] = ((df['mgmt_index_numeric'] > 0) & (df['mgmt_index_numeric'] <= 10)).astype(int)
        df['mgmt_unmethylated'] = (df['mgmt_index_numeric'] == 0).astype(int)
    
    # 4. IDH mutation binary (simplified)
    idh_col = 'idh_status' if 'idh_status' in df.columns else 'IDH'
    if idh_col in df.columns:
        df['idh_wildtype'] = df[idh_col].str.contains('wildtype', case=False, na=False).astype(int)
        df['idh_mutant'] = (~df[idh_col].str.contains('wildtype', case=False, na=False) & 
                           df[idh_col].notna() & 
                           (df[idh_col] != 'unknown')).astype(int)
    
    # 5. 1p19q co-deletion binary
    p19q_col = '1p19q' if '1p19q' in df.columns else '1p/19q'
    if p19q_col in df.columns:
        df['has_1p19q_codeletion'] = df[p19q_col].str.contains('co-deletion', case=False, na=False).astype(int)
        df['p19q_intact'] = df[p19q_col].str.contains('intact', case=False, na=False).astype(int)
    
    # 6. Combined molecular risk score
    df['molecular_risk_score'] = 0
    if 'idh_wildtype' in df.columns:
        df['molecular_risk_score'] += df['idh_wildtype'] * 3  # Strongest predictor of Grade 4
    if 'has_1p19q_codeletion' in df.columns:
        df['molecular_risk_score'] -= df['has_1p19q_codeletion'] * 2  # Predicts lower grade
    
    mgmt_col = 'mgmt_status' if 'mgmt_status' in df.columns else 'MGMT status'
    if mgmt_col in df.columns:
        df['molecular_risk_score'] += (df[mgmt_col] == 'negative').astype(int)
        df['molecular_risk_score'] -= (df[mgmt_col] == 'positive').astype(int)
    
    # 7. Extent of resection score
    eor_col = 'eor' if 'eor' in df.columns else 'EOR'
    if eor_col in df.columns:
        eor_map = {'GTR': 3, 'STR': 2, 'biopsy': 1}
        df['eor_score'] = df[eor_col].map(eor_map).fillna(0)
    
    # 8. Create interaction features
    if 'age_risk_score' in df.columns and 'idh_wildtype' in df.columns:
        df['age_idh_interaction'] = df['age_risk_score'] * df['idh_wildtype']
    
    # 9. Favorable vs unfavorable profile
    df['favorable_profile'] = 0
    if 'idh_mutant' in df.columns:
        df['favorable_profile'] += df['idh_mutant']
    if 'has_1p19q_codeletion' in df.columns:
        df['favorable_profile'] += df['has_1p19q_codeletion']
    if mgmt_col in df.columns:
        df['favorable_profile'] += (df[mgmt_col] == 'positive').astype(int)
    if age_col in df.columns:
        df['favorable_profile'] += (df[age_col] < 50).astype(int)
    
    print(f"  Added {sum([1 for col in df.columns if col not in ['molecular_class']])} new clinical features")
    print(f"  Molecular classes: {df['molecular_class'].value_counts().to_dict()}")
    
    return df

# ============= BOOTSTRAP FEATURE IMPORTANCE =============
def bootstrap_feature_importance(X, y, model_class, model_params, n_bootstrap=50, random_state=42):
    """
    Calculate bootstrap confidence intervals for feature importance
    """
    print(f"\nCalculating bootstrap feature importance ({n_bootstrap} iterations)...")
    
    np.random.seed(random_state)
    importances_list = []
    
    for i in range(n_bootstrap):
        # Bootstrap sample
        X_boot, y_boot = resample(X, y, random_state=i)
        
        # Fit model
        model = model_class(**model_params)
        model.fit(X_boot, y_boot)
        
        # Store importances
        importances_list.append(model.feature_importances_)
        
        if (i + 1) % 10 == 0:
            print(f"  Completed {i + 1}/{n_bootstrap} bootstrap samples")
    
    # Calculate statistics
    importances_array = np.array(importances_list)
    importance_mean = np.mean(importances_array, axis=0)
    importance_std = np.std(importances_array, axis=0)
    importance_ci_lower = np.percentile(importances_array, 2.5, axis=0)
    importance_ci_upper = np.percentile(importances_array, 97.5, axis=0)
    
    return {
        'mean': importance_mean,
        'std': importance_std,
        'ci_lower': importance_ci_lower,
        'ci_upper': importance_ci_upper
    }

# ============= ENHANCED EVALUATION FUNCTION =============
def evaluate_model_comprehensive(model, X_test, y_test, model_name="Model"):
    """
    Comprehensive model evaluation with multiple metrics
    """
    print(f"\n{'='*60}")
    print(f"{model_name.upper()} EVALUATION")
    print(f"{'='*60}")
    
    # Predictions
    y_pred = model.predict(X_test)
    y_pred_proba = model.predict_proba(X_test)[:, 1]
    
    # Classification report
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred, 
                              target_names=['Low Grade (2&3)', 'High Grade (4)'],
                              digits=3))
    
    # Confusion Matrix
    cm = confusion_matrix(y_test, y_pred)
    print(f"\nConfusion Matrix:")
    print(f"                Predicted")
    print(f"Actual    Low   High")
    print(f"Low      {cm[0,0]:4d}  {cm[0,1]:4d}")
    print(f"High     {cm[1,0]:4d}  {cm[1,1]:4d}")
    
    # Metrics
    metrics = {
        'accuracy': accuracy_score(y_test, y_pred),
        'balanced_accuracy': balanced_accuracy_score(y_test, y_pred),
        'roc_auc': roc_auc_score(y_test, y_pred_proba),
        'f1_score': f1_score(y_test, y_pred),
        'precision': precision_score(y_test, y_pred),
        'recall': recall_score(y_test, y_pred),
        'avg_precision': average_precision_score(y_test, y_pred_proba)
    }
    
    print(f"\nDetailed Metrics:")
    for metric, value in metrics.items():
        print(f"  {metric:20s}: {value:.3f}")
    
    return metrics, y_pred, y_pred_proba

# ============= MAIN EXECUTION =============
print("\n" + "="*60)
print("ENHANCED BINARY TUMOR GRADE CLASSIFIER")
print("High Grade (4) vs Low Grade (2&3)")
print("="*60)

# Load your original data
df = pd.read_csv('patient_level_comprehensive_features_fixed.csv')
print(f"\nInitial data shape: {df.shape}")

# ============= DROP SHELL FEATURES =============
shell_features_to_drop = [
    'patient_id', 'shell_0_fa_mean', 'shell_0_fa_std', 'shell_0_fa_median', 'shell_0_voxel_count',
    'shell_1_fa_mean', 'shell_1_fa_std', 'shell_1_fa_median', 'shell_1_voxel_count',
    'shell_2_fa_mean', 'shell_2_fa_std', 'shell_2_fa_median', 'shell_2_voxel_count',
    'shell_3_fa_mean', 'shell_3_fa_std', 'shell_3_fa_median', 'shell_3_voxel_count',
    'shell_0_gradient_mean', 'shell_0_gradient_std', 'shell_1_gradient_mean',
    'shell_1_gradient_std', 'shell_2_gradient_mean', 'shell_2_gradient_std',
    'shell_3_gradient_mean', 'shell_3_gradient_std'
]

cols_to_drop = [col for col in shell_features_to_drop if col in df.columns]
df = df.drop(columns=cols_to_drop)
print(f"Shape after dropping shell features: {df.shape}")

# ============= CREATE BINARY TARGET =============
df['grade_binary'] = (df['who_grade'] == 4).astype(int)

print(f"\nOriginal grade distribution:")
print(df['who_grade'].value_counts().sort_index())
print(f"\nBinary grade distribution:")
print(f"High Grade (1): {(df['grade_binary']==1).sum()} samples ({(df['grade_binary']==1).mean():.1%})")
print(f"Low Grade (0): {(df['grade_binary']==0).sum()} samples ({(df['grade_binary']==0).mean():.1%})")

# ============= ENHANCE CLINICAL FEATURES =============
df = enhance_clinical_features(df)

# ============= HANDLE REMAINING CATEGORICAL FEATURES =============
categorical_cols = ['sex', 'mgmt_status', 'mgmt_index', '1p19q', 'idh_status', 'eor', 'molecular_class']
existing_cat_cols = [col for col in categorical_cols if col in df.columns]
print(f"\nCategorical columns to encode: {existing_cat_cols}")

# Handle missing values and encode
df_processed = df.copy()
for col in existing_cat_cols:
    df_processed[col] = df_processed[col].fillna('missing')
    df_processed[col] = df_processed[col].replace('unknown', 'missing')

if existing_cat_cols:
    df_processed = pd.get_dummies(df_processed, columns=existing_cat_cols, drop_first=False)

# Handle missing values in numerical features
numerical_cols = df_processed.select_dtypes(include=[np.number]).columns
numerical_cols = numerical_cols.drop(['who_grade', 'grade_binary'])

imputer = SimpleImputer(strategy='median')
df_processed[numerical_cols] = imputer.fit_transform(df_processed[numerical_cols])

print(f"\nFinal processed data shape: {df_processed.shape}")

# ============= IDENTIFY CLINICAL VS IMAGING FEATURES =============
clinical_keywords = ['age', 'sex', 'mgmt', 'idh', '1p19q', 'eor', 'molecular', 'favorable', 
                    'risk_score', 'methylated', 'wildtype', 'mutant', 'codeletion', 'intact']
imaging_keywords = ['fa_', 'boundary', 'core', 'enhancing', 'edema', 'gradient', 'voxel']

all_features = [col for col in df_processed.columns if col not in ['who_grade', 'grade_binary']]
clinical_features = [f for f in all_features if any(k in f.lower() for k in clinical_keywords)]
imaging_features = [f for f in all_features if any(k in f.lower() for k in imaging_keywords)]
other_features = [f for f in all_features if f not in clinical_features and f not in imaging_features]

print(f"\nFeature breakdown:")
print(f"  Clinical features: {len(clinical_features)}")
print(f"  Imaging features: {len(imaging_features)}")
print(f"  Other features: {len(other_features)}")

# ============= PREPARE FEATURES AND TARGET =============
X = df_processed.drop(columns=['who_grade', 'grade_binary'])
y = df_processed['grade_binary']

print(f"\nFeatures shape: {X.shape}")
print(f"Target shape: {y.shape}")

# ============= TRAIN-TEST SPLIT =============
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

print(f"\nTrain/Test split:")
print(f"  Train: {X_train.shape}, High Grade: {y_train.mean():.1%}")
print(f"  Test: {X_test.shape}, High Grade: {y_test.mean():.1%}")

# ============= FEATURE SCALING =============
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ============= FEATURE SELECTION (NEW!) =============
print("\n" + "="*60)
print("FEATURE SELECTION")
print("="*60)

# Calculate class weights
classes = np.unique(y_train)
weights = compute_class_weight('balanced', classes=classes, y=y_train)
class_weight_dict = dict(zip(classes, weights))
print(f"\nClass weights: {class_weight_dict}")

# Feature selection using SelectKBest
print("\nPerforming feature selection...")
selector = SelectKBest(f_classif, k=min(50, X_train_scaled.shape[1]))
X_train_selected = selector.fit_transform(X_train_scaled, y_train)
X_test_selected = selector.transform(X_test_scaled)

selected_features = X.columns[selector.get_support()]
print(f"Selected {len(selected_features)} features from {X_train_scaled.shape[1]} original features")

# ============= HYPERPARAMETER OPTIMIZATION (NEW!) =============
print("\n" + "="*60)
print("HYPERPARAMETER OPTIMIZATION")
print("="*60)

# Define parameter grid
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [15, 20, 25],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'max_features': ['sqrt', 'log2']
}

print(f"Parameter grid size: {np.prod([len(v) for v in param_grid.values()])} combinations")

# Grid search with cross-validation
rf_base = RandomForestClassifier(
    class_weight=class_weight_dict,
    random_state=42,
    n_jobs=-1
)

grid_search = GridSearchCV(
    rf_base,
    param_grid,
    cv=StratifiedKFold(n_splits=5, shuffle=True, random_state=42),
    scoring='balanced_accuracy',
    n_jobs=-1,
    verbose=1
)

print("\nPerforming grid search...")
grid_search.fit(X_train_selected, y_train)

print(f"\nBest parameters: {grid_search.best_params_}")
print(f"Best cross-validation score: {grid_search.best_score_:.3f}")

# Get best model
best_rf = grid_search.best_estimator_

# ============= CROSS-VALIDATION ASSESSMENT (NEW!) =============
print("\n" + "="*60)
print("CROSS-VALIDATION ASSESSMENT")
print("="*60)

# Define scoring metrics
scoring = {
    'accuracy': 'accuracy',
    'balanced_accuracy': 'balanced_accuracy',
    'roc_auc': 'roc_auc',
    'f1': 'f1',
    'precision': 'precision',
    'recall': 'recall'
}

print("Performing comprehensive cross-validation...")
cv_results = cross_validate(
    best_rf,
    X_train_selected,
    y_train,
    cv=StratifiedKFold(n_splits=5, shuffle=True, random_state=42),
    scoring=scoring,
    return_train_score=True,
    n_jobs=-1
)

print("\nCross-Validation Results (5-fold):")
print("-" * 50)
for metric in scoring.keys():
    train_scores = cv_results[f'train_{metric}']
    test_scores = cv_results[f'test_{metric}']
    print(f"{metric:20s}: {test_scores.mean():.3f} ± {test_scores.std():.3f} "
          f"(train: {train_scores.mean():.3f})")

# ============= MODEL TRAINING ON FULL TRAINING SET =============
print("\n" + "="*60)
print("FINAL MODEL TRAINING")
print("="*60)

print("Training final model on full training set...")
best_rf.fit(X_train_selected, y_train)

# ============= COMPREHENSIVE MODEL EVALUATION =============
final_metrics, y_pred, y_pred_proba = evaluate_model_comprehensive(
    best_rf, X_test_selected, y_test, "Final Optimized Model"
)

# ============= BOOTSTRAP FEATURE IMPORTANCE (NEW!) =============
print("\n" + "="*60)
print("BOOTSTRAP FEATURE IMPORTANCE ANALYSIS")
print("="*60)

# Get bootstrap feature importance
bootstrap_importance = bootstrap_feature_importance(
    X_train_selected, y_train,
    RandomForestClassifier,
    best_rf.get_params(),
    n_bootstrap=50,
    random_state=42
)

# Create comprehensive feature importance DataFrame
feature_importance_df = pd.DataFrame({
    'feature': selected_features,
    'importance_mean': bootstrap_importance['mean'],
    'importance_std': bootstrap_importance['std'],
    'importance_ci_lower': bootstrap_importance['ci_lower'],
    'importance_ci_upper': bootstrap_importance['ci_upper'],
    'category': ['clinical' if any(k in f.lower() for k in clinical_keywords) 
                 else 'imaging' if any(k in f.lower() for k in imaging_keywords)
                 else 'other' for f in selected_features]
}).sort_values('importance_mean', ascending=False)

# Display top features with confidence intervals
print("\nTop 15 Features (with 95% confidence intervals):")
print("-" * 80)
for idx, row in feature_importance_df.head(15).iterrows():
    ci_width = row['importance_ci_upper'] - row['importance_ci_lower']
    print(f"{row['feature']:35s}: {row['importance_mean']:.4f} "
          f"[{row['importance_ci_lower']:.4f}, {row['importance_ci_upper']:.4f}] "
          f"({row['category']})")

# Category importance with confidence
clinical_importance = feature_importance_df[feature_importance_df['category'] == 'clinical']['importance_mean'].sum()
imaging_importance = feature_importance_df[feature_importance_df['category'] == 'imaging']['importance_mean'].sum()
total_importance = clinical_importance + imaging_importance

print(f"\nOverall Feature Category Importance:")
print(f"  Clinical: {clinical_importance:.3f} ({clinical_importance/total_importance:.1%})")
print(f"  Imaging: {imaging_importance:.3f} ({imaging_importance/total_importance:.1%})")

# ============= MODEL CALIBRATION ASSESSMENT (NEW!) =============
print("\n" + "="*60)
print("MODEL CALIBRATION ASSESSMENT")
print("="*60)

# Calibrate probabilities
calibrated_model = CalibratedClassifierCV(best_rf, method='isotonic', cv=3)
calibrated_model.fit(X_train_selected, y_train)

# Get calibrated predictions
y_pred_cal_proba = calibrated_model.predict_proba(X_test_selected)[:, 1]

# Calibration assessment
fraction_pos, mean_pred_value = calibration_curve(y_test, y_pred_proba, n_bins=10)
fraction_pos_cal, mean_pred_value_cal = calibration_curve(y_test, y_pred_cal_proba, n_bins=10)

print("Calibration Assessment:")
print(f"  Original model reliability: Mean absolute difference = "
      f"{np.mean(np.abs(fraction_pos - mean_pred_value)):.3f}")
print(f"  Calibrated model reliability: Mean absolute difference = "
      f"{np.mean(np.abs(fraction_pos_cal - mean_pred_value_cal)):.3f}")

# ============= CLINICAL-ONLY MODEL COMPARISON (ENHANCED) =============
print("\n" + "="*60)
print("CLINICAL-ONLY MODEL COMPARISON")
print("="*60)

# Select only clinical features from selected features
clinical_selected = [f for f in selected_features if f in clinical_features]
clinical_indices = [i for i, f in enumerate(selected_features) if f in clinical_selected]

if len(clinical_selected) > 0:
    X_train_clinical = X_train_selected[:, clinical_indices]
    X_test_clinical = X_test_selected[:, clinical_indices]
    
    print(f"Using {len(clinical_selected)} clinical features")
    
    # Train clinical-only model with same hyperparameters
    rf_clinical = RandomForestClassifier(**best_rf.get_params())
    rf_clinical.fit(X_train_clinical, y_train)
    
    # Evaluate clinical-only model
    clinical_metrics, _, _ = evaluate_model_comprehensive(
        rf_clinical, X_test_clinical, y_test, "Clinical-Only Model"
    )
    
    print(f"\nModel Comparison:")
    print("-" * 40)
    for metric in ['accuracy', 'balanced_accuracy', 'roc_auc', 'f1_score']:
        improvement = final_metrics[metric] - clinical_metrics[metric]
        print(f"{metric:20s}: Full {final_metrics[metric]:.3f} vs Clinical {clinical_metrics[metric]:.3f} "
              f"(+{improvement:+.3f})")

# ============= SAVE ENHANCED RESULTS =============
print("\n" + "="*60)
print("SAVING ENHANCED RESULTS")
print("="*60)

# Create results dictionary
results = {
    'timestamp': datetime.now().isoformat(),
    'model_type': 'RandomForestClassifier',
    'best_params': best_rf.get_params(),
    'cv_results': {metric: {'mean': cv_results[f'test_{metric}'].mean(), 
                           'std': cv_results[f'test_{metric}'].std()} 
                  for metric in scoring.keys()},
    'test_metrics': final_metrics,
    'selected_features': selected_features.tolist(),
    'feature_importance': feature_importance_df.to_dict('records')
}

# Save models and results
joblib.dump(best_rf, 'enhanced_binary_classifier_optimized.pkl')
joblib.dump(calibrated_model, 'enhanced_binary_classifier_calibrated.pkl')
joblib.dump(scaler, 'enhanced_binary_scaler_optimized.pkl')
joblib.dump(selector, 'enhanced_feature_selector.pkl')
feature_importance_df.to_csv('enhanced_feature_importance_with_ci.csv', index=False)

# Save results summary
import json

def convert_numpy_types(obj):
    """Convert numpy types to native Python types for JSON serialization"""
    if isinstance(obj, np.integer):
        return int(obj)
    elif isinstance(obj, np.floating):
        return float(obj)
    elif isinstance(obj, np.ndarray):
        return obj.tolist()
    elif isinstance(obj, dict):
        return {str(key): convert_numpy_types(value) for key, value in obj.items()}
    elif isinstance(obj, list):
        return [convert_numpy_types(item) for item in obj]
    else:
        return obj

# Convert results to JSON-serializable format
results_serializable = convert_numpy_types(results)

with open('model_results_summary.json', 'w') as f:
    json.dump(results_serializable, f, indent=2)

print("✓ Optimized model saved as 'enhanced_binary_classifier_optimized.pkl'")
print("✓ Calibrated model saved as 'enhanced_binary_classifier_calibrated.pkl'")
print("✓ Feature selector saved as 'enhanced_feature_selector.pkl'")
print("✓ Feature importance with CI saved as 'enhanced_feature_importance_with_ci.csv'")
print("✓ Results summary saved as 'model_results_summary.json'")

print("\n" + "="*60)
print("ENHANCED ANALYSIS COMPLETE!")
print("="*60)
print(f"Final model performance:")
print(f"  Cross-validation balanced accuracy: {cv_results['test_balanced_accuracy'].mean():.3f} ± {cv_results['test_balanced_accuracy'].std():.3f}")
print(f"  Test set balanced accuracy: {final_metrics['balanced_accuracy']:.3f}")
print(f"  Test set ROC-AUC: {final_metrics['roc_auc']:.3f}")
print(f"  Features used: {len(selected_features)} of {X.shape[1]} original features")
print("="*60)

✓ All libraries imported successfully!

ENHANCED BINARY TUMOR GRADE CLASSIFIER
High Grade (4) vs Low Grade (2&3)

Initial data shape: (500, 91)
Shape after dropping shell features: (500, 66)

Original grade distribution:
who_grade
2     56
3     43
4    401
Name: count, dtype: int64

Binary grade distribution:
High Grade (1): 401 samples (80.2%)
Low Grade (0): 99 samples (19.8%)

Enhancing clinical features...
  Added 85 new clinical features
  Molecular classes: {'glioblastoma_IDHwt': 397, 'astrocytoma_IDHmut': 88, 'oligodendroglioma_IDHmut_1p19q': 15}

Categorical columns to encode: ['sex', 'mgmt_status', 'mgmt_index', '1p19q', 'idh_status', 'eor', 'molecular_class']

Final processed data shape: (500, 124)

Feature breakdown:
  Clinical features: 64
  Imaging features: 50
  Other features: 11

Features shape: (500, 122)
Target shape: (500,)

Train/Test split:
  Train: (400, 122), High Grade: 80.2%
  Test: (100, 122), High Grade: 80.0%

FEATURE SELECTION

Class weights: {0: 2.53164556