In [None]:
# ============================================================================
# DEPENDENCIES & SETUP
# ============================================================================
import os
import gc
import random
import warnings
from pathlib import Path
from typing import Dict, List, Tuple, Optional

import numpy as np
import pandas as pd
import torch
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import OrdinalEncoder, RobustScaler
import matplotlib.pyplot as plt
import seaborn as sns
from pytabkit import TabM_D_Regressor

warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)

# ============================================================================
# CONFIGURATION
# ============================================================================
class Config:
    """Centralized configuration management"""
    
    # Experiment Settings
    EXPERIMENT_NAME: str = "TabM_Production_v2"
    SEED: int = 42
    N_FOLDS: int = 10
    TARGET: str = 'exam_score'
    
    # Hardware
    DEVICE: str = 'cuda' if torch.cuda.is_available() else 'cpu'
    N_WORKERS: int = 4
    
    # TabM Hyperparameters (Optimized)
    TABM_CONFIG: Dict = {
        'device': DEVICE,
        'random_state': 42,
        'verbosity': 2,
        'arch_type': 'tabm-mini-normal',
        'tabm_k': 24,  # Increased ensemble diversity
        'num_emb_type': 'pwl',
        'd_embedding': 16,  # Richer embeddings
        'batch_size': 256,
        'lr': 1e-3,  # Slightly lower for stability
        'n_epochs': 100,  # More training budget
        'dropout': 0.11,  # Increased regularization
        'd_block': 256,  # Wider architecture
        'n_blocks': 5,  # Deeper network
        'patience': 4,  # More patience for convergence
        'weight_decay': 1e-2,
    }
    
    # Feature Engineering
    POLY_DEGREE: int = 2
    INTERACTION_PAIRS: List[Tuple[str, str]] = [
        ('study_hours', 'class_attendance'),
        ('study_hours', 'sleep_hours'),
        ('sleep_hours', 'sleep_quality'),
    ]
    
    # Blending Strategy
    ENSEMBLE_WEIGHT: float = 0.5  # Weight for CV ensemble (0.5 = equal blend)
    REFIT_WEIGHT: float = 0.5     # Weight for full refit (0.5 = equal blend)
    
    # Output Settings
    OUTPUT_DIR: Path = Path('./outputs')
    SAVE_PLOTS: bool = True
    SAVE_MODEL: bool = True  # Save final model
    CLIP_MIN: float = 19.6
    CLIP_MAX: float = 100.0


# ============================================================================
# UTILITY FUNCTIONS
# ============================================================================
def seed_everything(seed: int = 42) -> None:
    """Ensure reproducibility across all libraries"""
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False


def setup_directories(config: Config) -> None:
    """Create output directory structure"""
    config.OUTPUT_DIR.mkdir(exist_ok=True, parents=True)
    (config.OUTPUT_DIR / 'plots').mkdir(exist_ok=True)
    (config.OUTPUT_DIR / 'models').mkdir(exist_ok=True)


def print_section(title: str, char: str = '=') -> None:
    """Pretty print section headers"""
    print(f"\n{char * 80}")
    print(f"{title.center(80)}")
    print(f"{char * 80}\n")

# ============================================================================
# FEATURE ENGINEERING ‚Äî KAGGLE / TABM EMBEDDING HACK
# ============================================================================
class FeatureEngineer:
    """
    Kaggle-optimized Feature Engineering for TabM
    (Intentionally exploits categorical embeddings on numeric features)
    """

    def __init__(self, config: Config):
        self.config = config
        self.eps = 1e-6

        # ============================
        # TRUE CATEGORICAL FEATURES
        # ============================
        self.base_categorical = [
            'gender',
            'course',
            'study_method'
        ]

        # ============================
        # NUMERICS TO DUPLICATE AS CATS
        # ============================
        self.numeric_to_cat = [
            'age',
            'study_hours',
            'class_attendance',
            'sleep_hours',
            'sleep_quality',
            'facility_rating',
            'exam_difficulty',
            'internet_access'
        ]

    # =========================================================================
    def engineer_features(self, df: pd.DataFrame) -> pd.DataFrame:
        df = df.copy()

        # =====================================================
        # 1. FORCE BASE NUMERICS
        # =====================================================
        numeric_cols = [
            'age',
            'study_hours',
            'class_attendance',
            'sleep_hours'
        ]
        for col in numeric_cols:
            df[col] = df[col].astype(float)

        # =====================================================
        # 2. MAP SEMI-CATEGORICALS TO NUMERIC (FOR FORMULAS)
        # =====================================================
        df['internet_access_num'] = df['internet_access'].map({'no': 0, 'yes': 1}).fillna(0)
        df['sleep_quality_num'] = df['sleep_quality'].map(
            {'poor': 1, 'average': 2, 'good': 3, 'excellent': 4}
        ).fillna(2)

        # =====================================================
        # 3. NUMERIC FEATURE ENGINEERING
        # =====================================================
        # Cyclic
        df['study_hours_sin'] = np.sin(2 * np.pi * df['study_hours'] / 12)
        df['class_attendance_sin'] = np.sin(2 * np.pi * df['class_attendance'] / 12)

        # Polynomials
        for col in ['study_hours', 'class_attendance', 'sleep_hours']:
            df[f'{col}_sq'] = df[col] ** 2
            df[f'log_{col}'] = np.log1p(df[col].clip(lower=0))

        # Interactions
        df['study_x_attendance'] = df['study_hours'] * df['class_attendance']
        df['study_x_sleep'] = df['study_hours'] * df['sleep_hours']
        df['efficiency'] = (
            df['study_hours'] * df['class_attendance']
        ) / (df['sleep_hours'] + self.eps)

        # =====================================================
        # 4. MAGIC LINEAR FORMULA (CORE SIGNAL)
        # =====================================================
        df['feature_formula'] = (
            5.9051154511950499 * df['study_hours'] +
            0.34540967058057986 * df['class_attendance'] +
            1.423461171860262 * df['sleep_hours'] +
            4.7819
        )

        df['formula_sq'] = df['feature_formula'] ** 2
        df['formula_log'] = np.log1p(df['feature_formula'] - df['feature_formula'].min() + 1)

        # =====================================================
        # 5. BINNING ‚Üí EXTRA TOKENS
        # =====================================================
        df['study_intensity'] = pd.cut(
            df['study_hours'],
            bins=4,
            labels=['low', 'medium', 'high', 'very_high']
        )

        df['attendance_level'] = pd.cut(
            df['class_attendance'],
            bins=4,
            labels=['poor', 'fair', 'good', 'excellent']
        )

        # =====================================================
        # 6. DUPLICATE NUMERICS AS CATEGORICAL TOKENS (THE HACK)
        # =====================================================
        for col in self.numeric_to_cat:
            df[f'{col}_cat'] = df[col].astype(str)

        # =====================================================
        # 7. CAST TRUE CATEGORICALS
        # =====================================================
        for col in self.base_categorical + ['study_intensity', 'attendance_level']:
            df[col] = df[col].astype(str)

        return df

    # =========================================================================
    def get_feature_groups(self, df: pd.DataFrame):
        # ============================
        # CATEGORICAL FEATURES
        # ============================
        categorical = (
            self.base_categorical +
            ['study_intensity', 'attendance_level'] +
            [f'{c}_cat' for c in self.numeric_to_cat]
        )
    
        # ============================
        # NUMERICAL FEATURES
        # ============================
        numerical = []
    
        for col in df.columns:
            if col in categorical:
                continue
            if col in [self.config.TARGET, 'id', 'student_id']:
                continue
    
            # üîí SAFETY CHECK: only real numerics
            if pd.api.types.is_numeric_dtype(df[col]):
                numerical.append(col)
    
        return categorical, numerical

        
# ============================================================================
# DATA PREPROCESSING
# ============================================================================
class DataPreprocessor:
    """Robust data preprocessing pipeline"""
    
    def __init__(self):
        self.encoder = OrdinalEncoder(
            handle_unknown='use_encoded_value',
            unknown_value=-1,
            encoded_missing_value=-2
        )
        self.scaler = RobustScaler()  # More robust to outliers than StandardScaler
        self.categorical_cols = None
        self.numerical_cols = None
        
    def fit(self, df: pd.DataFrame, categorical: List[str], numerical: List[str]) -> 'DataPreprocessor':
        """Fit encoders and scalers"""
        self.categorical_cols = categorical
        self.numerical_cols = numerical
        
        if categorical:
            self.encoder.fit(df[categorical])
        if numerical:
            self.scaler.fit(df[numerical])
        
        return self
    
    def transform(self, df: pd.DataFrame) -> pd.DataFrame:
        """Apply preprocessing transformations"""
        result_dfs = []
        
        if self.categorical_cols:
            cats_encoded = pd.DataFrame(
                self.encoder.transform(df[self.categorical_cols]),
                columns=self.categorical_cols,
                index=df.index
            )
            result_dfs.append(cats_encoded)
        
        if self.numerical_cols:
            nums_scaled = pd.DataFrame(
                self.scaler.transform(df[self.numerical_cols]),
                columns=self.numerical_cols,
                index=df.index
            )
            result_dfs.append(nums_scaled)
        
        return pd.concat(result_dfs, axis=1)


# ============================================================================
# MODEL TRAINING & VALIDATION
# ============================================================================
class TabMTrainer:
    """Orchestrates TabM model training with cross-validation"""
    
    def __init__(self, config: Config):
        self.config = config
        self.models = []
        self.oof_predictions = None
        self.test_predictions = []
        self.fold_scores = []
        self.final_model = None  # For full refit
        
    def train_cv(
        self,
        X: pd.DataFrame,
        y: np.ndarray,
        X_test: pd.DataFrame,
        categorical_cols: List[str],
        X_augment: Optional[pd.DataFrame] = None,
        y_augment: Optional[np.ndarray] = None
    ) -> None:
        """Execute cross-validated training"""
        
        print_section("CROSS-VALIDATION TRAINING")
        
        self.oof_predictions = np.zeros(len(X))
        kfold = KFold(n_splits=self.config.N_FOLDS, shuffle=True, random_state=self.config.SEED)
        
        for fold, (train_idx, val_idx) in enumerate(kfold.split(X)):
            print(f"\n{'‚îÄ' * 80}")
            print(f"Fold {fold + 1}/{self.config.N_FOLDS}")
            print(f"{'‚îÄ' * 80}")
            
            # Split data
            X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
            y_train, y_val = y[train_idx], y[val_idx]
            
            # Augment with original data if available
            if X_augment is not None and y_augment is not None:
                X_train = pd.concat([X_train, X_augment], axis=0)
                y_train = np.concatenate([y_train, y_augment])
                print(f"Training samples (augmented): {len(X_train):,}")
            else:
                print(f"Training samples: {len(X_train):,}")
            
            print(f"Validation samples: {len(X_val):,}")
            
            # Initialize and train model
            model = TabM_D_Regressor(**self.config.TABM_CONFIG)
            
            model.fit(
                X_train, y_train,
                X_val, y_val,
                cat_col_names=categorical_cols
            )
            
            # Generate predictions
            val_preds = model.predict(X_val)
            test_preds = model.predict(X_test)
            
            # Store results
            self.oof_predictions[val_idx] = val_preds
            self.test_predictions.append(test_preds)
            
            # Calculate metrics
            fold_rmse = np.sqrt(mean_squared_error(y_val, val_preds))
            self.fold_scores.append(fold_rmse)
            
            print(f"\n‚úì Fold {fold + 1} RMSE: {fold_rmse:.5f}")
            
            # Memory cleanup
            del model
            gc.collect()
            if torch.cuda.is_available():
                torch.cuda.empty_cache()
        
        # Final OOF score
        overall_rmse = np.sqrt(mean_squared_error(y, self.oof_predictions))
        print(f"\n{'=' * 80}")
        print(f"OVERALL OOF RMSE: {overall_rmse:.5f}")
        print(f"Mean Fold RMSE: {np.mean(self.fold_scores):.5f} ¬± {np.std(self.fold_scores):.5f}")
        print(f"{'=' * 80}\n")
    
    def train_full_refit(
        self,
        X: pd.DataFrame,
        y: np.ndarray,
        X_test: pd.DataFrame,
        categorical_cols: List[str],
        X_augment: Optional[pd.DataFrame] = None,
        y_augment: Optional[np.ndarray] = None
    ) -> np.ndarray:
        """Train final model on full dataset"""
        
        print_section("FULL DATASET REFIT")
        
        # Combine all training data
        X_full = X.copy()
        y_full = y.copy()
        
        if X_augment is not None and y_augment is not None:
            X_full = pd.concat([X_full, X_augment], axis=0)
            y_full = np.concatenate([y_full, y_augment])
        
        print(f"Total training samples: {len(X_full):,}")
        
        # Train on full dataset (use 10% validation split for early stopping)
        val_size = int(len(X_full) * 0.1)
        indices = np.arange(len(X_full))
        np.random.shuffle(indices)
        
        train_idx = indices[val_size:]
        val_idx = indices[:val_size]
        
        X_train_full = X_full.iloc[train_idx]
        y_train_full = y_full[train_idx]
        X_val_full = X_full.iloc[val_idx]
        y_val_full = y_full[val_idx]
        
        print(f"Training: {len(X_train_full):,} | Validation (for early stopping): {len(X_val_full):,}")
        
        # Initialize and train final model
        self.final_model = TabM_D_Regressor(**self.config.TABM_CONFIG)
        
        self.final_model.fit(
            X_train_full, y_train_full,
            X_val_full, y_val_full,
            cat_col_names=categorical_cols
        )
        
        # Generate predictions
        refit_preds = self.final_model.predict(X_test)
        
        print(f"\n‚úì Full refit model trained successfully")
        print(f"Prediction range: [{refit_preds.min():.2f}, {refit_preds.max():.2f}]")
        
        return refit_preds
    
    def get_ensemble_predictions(self) -> np.ndarray:
        """Average test predictions across folds"""
        return np.mean(self.test_predictions, axis=0)
    
    def save_final_model(self, path: Path) -> None:
        """Save the final refitted model"""
        if self.final_model is None:
            print("Warning: No final model to save. Run train_full_refit first.")
            return
        
        import joblib
        model_path = path / 'final_model.pkl'
        joblib.dump(self.final_model, model_path)
        print(f"‚úì Final model saved to: {model_path}")


# ============================================================================
# VISUALIZATION
# ============================================================================
class Visualizer:
    """Generate diagnostic plots"""
    
    def __init__(self, config: Config):
        self.config = config
        sns.set_style("whitegrid")
        plt.rcParams['figure.figsize'] = (12, 6)
        plt.rcParams['font.size'] = 10
        
    def plot_target_distribution(self, train_df: pd.DataFrame, original_df: pd.DataFrame) -> None:
        """Compare train vs original target distribution"""
        fig, ax = plt.subplots(1, 1, figsize=(10, 5))
        
        sns.kdeplot(data=train_df[self.config.TARGET], label='Train', fill=True, alpha=0.5, ax=ax)
        if not original_df.empty:
            sns.kdeplot(data=original_df[self.config.TARGET], label='Original', fill=True, alpha=0.5, ax=ax)
        
        ax.set_xlabel('Exam Score')
        ax.set_ylabel('Density')
        ax.set_title('Target Distribution: Train vs Original Dataset')
        ax.legend()
        plt.tight_layout()
        
        if self.config.SAVE_PLOTS:
            plt.savefig(self.config.OUTPUT_DIR / 'plots' / 'target_distribution.png', dpi=150)
        plt.show()
    
    def plot_oof_predictions(self, y_true: np.ndarray, y_pred: np.ndarray) -> None:
        """Scatter plot of OOF predictions vs actual"""
        fig, ax = plt.subplots(1, 1, figsize=(8, 8))
        
        # Sample for performance
        n_sample = min(10000, len(y_true))
        idx = np.random.choice(len(y_true), n_sample, replace=False)
        
        ax.scatter(y_true[idx], y_pred[idx], alpha=0.3, s=10)
        
        # Perfect prediction line
        min_val, max_val = min(y_true.min(), y_pred.min()), max(y_true.max(), y_pred.max())
        ax.plot([min_val, max_val], [min_val, max_val], 'r--', lw=2, label='Perfect Prediction')
        
        ax.set_xlabel('Actual Exam Score')
        ax.set_ylabel('Predicted Exam Score')
        ax.set_title('Out-of-Fold Predictions vs Actual')
        ax.legend()
        ax.grid(True, alpha=0.3)
        plt.tight_layout()
        
        if self.config.SAVE_PLOTS:
            plt.savefig(self.config.OUTPUT_DIR / 'plots' / 'oof_predictions.png', dpi=150)
        plt.show()
    
    def plot_fold_scores(self, fold_scores: List[float]) -> None:
        """Bar plot of per-fold RMSE"""
        fig, ax = plt.subplots(1, 1, figsize=(10, 5))
        
        folds = list(range(1, len(fold_scores) + 1))
        ax.bar(folds, fold_scores, alpha=0.7, color='steelblue')
        ax.axhline(np.mean(fold_scores), color='red', linestyle='--', label=f'Mean: {np.mean(fold_scores):.5f}')
        
        ax.set_xlabel('Fold')
        ax.set_ylabel('RMSE')
        ax.set_title('Per-Fold RMSE Performance')
        ax.legend()
        ax.grid(True, alpha=0.3, axis='y')
        plt.tight_layout()
        
        if self.config.SAVE_PLOTS:
            plt.savefig(self.config.OUTPUT_DIR / 'plots' / 'fold_scores.png', dpi=150)
        plt.show()
    
    def plot_residuals(self, y_true: np.ndarray, y_pred: np.ndarray) -> None:
        """Residual plot for error analysis"""
        residuals = y_true - y_pred
        
        fig, axes = plt.subplots(1, 2, figsize=(14, 5))
        
        # Residual scatter
        axes[0].scatter(y_pred, residuals, alpha=0.3, s=10)
        axes[0].axhline(0, color='red', linestyle='--', lw=2)
        axes[0].set_xlabel('Predicted Values')
        axes[0].set_ylabel('Residuals')
        axes[0].set_title('Residual Plot')
        axes[0].grid(True, alpha=0.3)
        
        # Residual distribution
        axes[1].hist(residuals, bins=50, alpha=0.7, color='steelblue', edgecolor='black')
        axes[1].axvline(0, color='red', linestyle='--', lw=2)
        axes[1].set_xlabel('Residuals')
        axes[1].set_ylabel('Frequency')
        axes[1].set_title('Residual Distribution')
        axes[1].grid(True, alpha=0.3, axis='y')
        
        plt.tight_layout()
        
        if self.config.SAVE_PLOTS:
            plt.savefig(self.config.OUTPUT_DIR / 'plots' / 'residuals.png', dpi=150)
        plt.show()

In [None]:
# ============================================================================
# MAIN EXECUTION
# ============================================================================
"""Main execution pipeline"""

# Initialize
config = Config()
seed_everything(config.SEED)
setup_directories(config)

print_section(f"EXAM SCORE PREDICTION - {config.EXPERIMENT_NAME}", char='#')
print(f"Device: {config.DEVICE}")
print(f"Cross-Validation: {config.N_FOLDS}-Fold")
print(f"Random Seed: {config.SEED}")

# ========================================================================
# STEP 1: DATA LOADING
# ========================================================================
print_section("STEP 1: DATA LOADING")

train_df = pd.read_csv("/kaggle/input/playground-series-s6e1/train.csv")
test_df = pd.read_csv("/kaggle/input/playground-series-s6e1/test.csv")
original_df = pd.read_csv("/kaggle/input/exam-score-prediction-dataset/Exam_Score_Prediction.csv")
submission_df = pd.read_csv("/kaggle/input/playground-series-s6e1/sample_submission.csv")

print(f"Train shape: {train_df.shape}")
print(f"Test shape: {test_df.shape}")
print(f"Original shape: {original_df.shape}")

# ========================================================================
# STEP 2: FEATURE ENGINEERING
# ========================================================================
print_section("STEP 2: FEATURE ENGINEERING")

fe = FeatureEngineer(config)

train_eng = fe.engineer_features(train_df)
test_eng = fe.engineer_features(test_df)
original_eng = fe.engineer_features(original_df) if not original_df.empty else pd.DataFrame()

categorical_cols, numerical_cols = fe.get_feature_groups(train_eng)

print(f"Categorical features: {len(categorical_cols)}")
print(f"Numerical features: {len(numerical_cols)}")
print(f"Total features: {len(categorical_cols) + len(numerical_cols)}")

# ========================================================================
# STEP 3: DATA PREPROCESSING
# ========================================================================
print_section("STEP 3: DATA PREPROCESSING")

preprocessor = DataPreprocessor()
preprocessor.fit(train_eng, categorical_cols, numerical_cols)

X = preprocessor.transform(train_eng)
X_test = preprocessor.transform(test_eng)
X_original = preprocessor.transform(original_eng) if not original_eng.empty else None

y = train_df[config.TARGET].values
y_original = original_df[config.TARGET].values if not original_df.empty else None

print(f"Preprocessed train shape: {X.shape}")
print(f"Preprocessed test shape: {X_test.shape}")
if X_original is not None:
    print(f"Preprocessed original shape: {X_original.shape}")

# ========================================================================
# STEP 4: VISUALIZATION (PRE-TRAINING)
# ========================================================================
viz = Visualizer(config)
viz.plot_target_distribution(train_df, original_df)

# ========================================================================
# STEP 5: MODEL TRAINING
# ========================================================================
trainer = TabMTrainer(config)
trainer.train_cv(
    X=X,
    y=y,
    X_test=X_test,
    categorical_cols=categorical_cols,
    X_augment=X_original,
    y_augment=y_original
)

# ========================================================================
# STEP 5B: FULL DATASET REFIT
# ========================================================================
refit_preds = trainer.train_full_refit(
    X=X,
    y=y,
    X_test=X_test,
    categorical_cols=categorical_cols,
    X_augment=X_original,
    y_augment=y_original
)

# ========================================================================
# STEP 6: VISUALIZATION (POST-TRAINING)
# ========================================================================
print_section("STEP 6: GENERATING DIAGNOSTIC PLOTS")

viz.plot_oof_predictions(y, trainer.oof_predictions)
viz.plot_fold_scores(trainer.fold_scores)
viz.plot_residuals(y, trainer.oof_predictions)

# ========================================================================
# STEP 7: BLENDING PREDICTIONS
# ========================================================================
print_section("STEP 7: BLENDING ENSEMBLE + REFIT PREDICTIONS")

# Get CV ensemble predictions
ensemble_preds = trainer.get_ensemble_predictions()

# Blend predictions
blended_preds = (
    config.ENSEMBLE_WEIGHT * ensemble_preds +
    config.REFIT_WEIGHT * refit_preds
)

print(f"Ensemble weight: {config.ENSEMBLE_WEIGHT}")
print(f"Refit weight: {config.REFIT_WEIGHT}")
print(f"\nPrediction Statistics:")
print(f"  Ensemble range: [{ensemble_preds.min():.2f}, {ensemble_preds.max():.2f}]")
print(f"  Refit range:    [{refit_preds.min():.2f}, {refit_preds.max():.2f}]")
print(f"  Blended range:  [{blended_preds.min():.2f}, {blended_preds.max():.2f}]")

# ========================================================================
# STEP 8: GENERATE SUBMISSIONS
# ========================================================================
print_section("STEP 8: GENERATING SUBMISSIONS")

# Save OOF predictions
oof_df = pd.DataFrame({
    'id': train_df['id'],
    config.TARGET: trainer.oof_predictions
})
oof_df.to_csv(config.OUTPUT_DIR / 'oof_predictions.csv', index=False)
print(f"‚úì Saved: {config.OUTPUT_DIR / 'oof_predictions.csv'}")

# Generate final test predictions (clipped)
final_preds_clipped = np.clip(blended_preds, config.CLIP_MIN, config.CLIP_MAX)

# Save main submission (blended)
submission_df[config.TARGET] = final_preds_clipped
submission_df.to_csv(config.OUTPUT_DIR / 'submission_blended.csv', index=False)
print(f"‚úì Saved: {config.OUTPUT_DIR / 'submission_blended.csv'}")

# Save alternative submissions for comparison
submission_ensemble = submission_df.copy()
submission_ensemble[config.TARGET] = np.clip(ensemble_preds, config.CLIP_MIN, config.CLIP_MAX)
submission_ensemble.to_csv(config.OUTPUT_DIR / 'submission_ensemble.csv', index=False)
print(f"‚úì Saved: {config.OUTPUT_DIR / 'submission_ensemble.csv'}")

submission_refit = submission_df.copy()
submission_refit[config.TARGET] = np.clip(refit_preds, config.CLIP_MIN, config.CLIP_MAX)
submission_refit.to_csv(config.OUTPUT_DIR / 'submission_refit.csv', index=False)
print(f"‚úì Saved: {config.OUTPUT_DIR / 'submission_refit.csv'}")

# Save final model
if config.SAVE_MODEL:
    trainer.save_final_model(config.OUTPUT_DIR / 'models')

# ========================================================================
# STEP 9: FINAL SUMMARY
# ========================================================================
print_section("FINAL SUMMARY", char='#')

print(f"Cross-Validation Results:")
print(f"  Overall OOF RMSE: {np.sqrt(mean_squared_error(y, trainer.oof_predictions)):.5f}")
print(f"  Mean Fold RMSE: {np.mean(trainer.fold_scores):.5f}")
print(f"  Std Fold RMSE: {np.std(trainer.fold_scores):.5f}")
print(f"  Min Fold RMSE: {np.min(trainer.fold_scores):.5f}")
print(f"  Max Fold RMSE: {np.max(trainer.fold_scores):.5f}")

print(f"\nBlended Submission:")
print(f"  Prediction Range: [{final_preds_clipped.min():.2f}, {final_preds_clipped.max():.2f}]")
print(f"  Predictions clipped: {(blended_preds != final_preds_clipped).sum()}")

print(f"\nSubmissions Generated:")
print(f"  1. submission_blended.csv  ‚Üê RECOMMENDED (ensemble + refit blend)")
print(f"  2. submission_ensemble.csv (CV ensemble only)")
print(f"  3. submission_refit.csv    (full refit only)")

print(f"\n{'#' * 80}")
print("PIPELINE COMPLETED SUCCESSFULLY")
print(f"{'#' * 80}\n")