In [1]:
# --- Cell A: Global Configurations for Robust Evaluation ---
import pandas as pd
import numpy as np
import os
import joblib
import sys
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import GridSearchCV, GroupKFold, train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

from sklearn.ensemble import RandomForestRegressor, HistGradientBoostingRegressor
from sklearn.linear_model import LinearRegression
import xgboost as xgb

# --- Assuming these are defined from your original Cell 1 or similar setup ---
INPUT_FILE = 'New_Features_Added_ALL.csv'
TARGET_COL = 'SOH_cycle_capacity_%'
EXTRA_EXCLUDE_COLS_FROM_FEATURES = [
    'capacity_Ah', 'energy_Wh',
    'capacity_Ah_roll_mean_3', 'capacity_Ah_roll_std_3', 'capacity_Ah_diff_3',
    'capacity_Ah_roll_mean_5', 'capacity_Ah_roll_std_5', 'capacity_Ah_diff_5',
    'capacity_Ah_roll_mean_10', 'capacity_Ah_roll_std_10', 'capacity_Ah_diff_10',
    'energy_Wh_roll_mean_3', 'energy_Wh_roll_std_3', 'energy_Wh_diff_3',
    'energy_Wh_roll_mean_5', 'energy_Wh_roll_std_5', 'energy_Wh_diff_5',
    'energy_Wh_roll_mean_10', 'energy_Wh_roll_std_10', 'energy_Wh_diff_10',
    'SOH_cycle_capacity_%_roll_mean_3', 'SOH_cycle_capacity_%_roll_std_3', 'SOH_cycle_capacity_%_diff_3',
    'SOH_cycle_capacity_%_roll_mean_5', 'SOH_cycle_capacity_%_roll_std_5', 'SOH_cycle_capacity_%_diff_5',
    'SOH_cycle_capacity_%_roll_mean_10', 'SOH_cycle_capacity_%_roll_std_10', 'SOH_cycle_capacity_%_diff_10'
]
MODELS_TO_TRAIN = ['rf', 'gb', 'xgb', 'lr']
PARAM_GRIDS = { # Using Run 3 grids as an example
    'rf': {'n_estimators': [50, 100, 150], 'max_depth': [10, 20, 25], 'min_samples_split': [5, 10]},
    'gb': {'learning_rate': [0.05, 0.1], 'max_iter': [150, 200, 250], 'max_depth': [4, 5, 7]},
    'xgb': {'n_estimators': [100, 150, 200], 'learning_rate': [0.05, 0.1], 'max_depth': [4, 5, 7]},
    'lr': {'fit_intercept': [True]}
}
CV_FOLDS_GRIDSEARCH = 3
GRIDSEARCH_SCORING = 'neg_mean_squared_error'
BATTERY_REGIME_MAP = {
    'battery00': 'regular_constant', 'battery01': 'regular_constant', 'battery10': 'regular_constant',
    'battery11': 'regular_constant', 'battery20': 'regular_constant', 'battery21': 'regular_constant',
    'battery30': 'regular_constant', 'battery31': 'regular_constant', 'battery40': 'regular_constant',
    'battery50': 'regular_constant', 'battery22': 'regular_variable', 'battery23': 'regular_variable',
    'battery41': 'regular_variable', 'battery51': 'regular_variable', 'battery52': 'regular_variable',
    'battery02': 'recommissioned_two_stage', 'battery12': 'recommissioned_two_stage',
    'battery24': 'recommissioned_two_stage', 'battery32': 'recommissioned_two_stage',
    'battery53': 'recommissioned_two_stage', 'battery03': 'recommissioned_three_stage',
    'battery25': 'recommissioned_three_stage', 'battery33': 'recommissioned_three_stage',
}
SPLIT_RANDOM_SEED = 42 # For reproducibility of the initial dev/hold-out split & CV folds
# --- End of assumed definitions ---

# Parameters for the Robust Evaluation
N_FINAL_HOLD_OUT_BATTERIES = 4  # Number of batteries for the sacred final hold-out set
N_SPLITS_GROUPKFOLD = 5         # Number of folds for GroupKFold cross-validation

BASE_OUTPUT_DIR_ROBUST = "soh_robust_evaluation_outputs"
CV_EXPERIMENT_NAME = "SOH_GroupKFold_CV"
CV_EXPERIMENT_OUTPUT_DIR = os.path.join(BASE_OUTPUT_DIR_ROBUST, CV_EXPERIMENT_NAME)
FINAL_MODEL_DIR = os.path.join(BASE_OUTPUT_DIR_ROBUST, "Final_Champion_Model")

os.makedirs(CV_EXPERIMENT_OUTPUT_DIR, exist_ok=True)
os.makedirs(FINAL_MODEL_DIR, exist_ok=True)

print(f"Robust evaluation base output directory: {BASE_OUTPUT_DIR_ROBUST}")
print(f"Cross-validation outputs will be saved to: {CV_EXPERIMENT_OUTPUT_DIR}")
print(f"Final champion model artifacts will be saved to: {FINAL_MODEL_DIR}")

Robust evaluation base output directory: soh_robust_evaluation_outputs
Cross-validation outputs will be saved to: soh_robust_evaluation_outputs/SOH_GroupKFold_CV
Final champion model artifacts will be saved to: soh_robust_evaluation_outputs/Final_Champion_Model


In [2]:
# --- Cell B: Preprocessing and Plotting Helper Functions ---

# Placeholder: Assume load_data, preprocess_data, scale_features,
# plot_actual_vs_predicted_soh, and plot_feature_importances are defined here
# or imported correctly. For brevity, I'm not re-pasting them.
# Make sure they are available in your notebook's execution context.

# Example for preprocess_data (ensure it's suitable)
def load_data(filepath):
    if not os.path.exists(filepath): print(f"Error: File not found at {filepath}"); return None
    try: df = pd.read_csv(filepath); print(f"Data loaded successfully from {filepath}. Shape: {df.shape}"); return df
    except Exception as e: print(f"Error loading data: {e}"); return None

def preprocess_data(df, target_col_name, current_extra_exclude_cols=None):
    if df is None or df.empty: print("Error: Input DataFrame empty."); return None, None, None
    df_cleaned = df.dropna(subset=[target_col_name]).copy()
    if df_cleaned.empty: print(f"Error: No data after dropping NaNs in {target_col_name}."); return None, None, None
    y = df_cleaned[target_col_name]
    base_excluded = ['start_time', 'cycle_number', 'battery_id', 'regime', target_col_name, 'SOH_cycle_capacity_%']
    excluded_from_features = list(set(base_excluded + (current_extra_exclude_cols if current_extra_exclude_cols else [])))
    potential_features = [col for col in df_cleaned.columns if col not in excluded_from_features]
    temp_X_df = df_cleaned[potential_features].copy()
    feature_cols_for_X = []
    for col in temp_X_df.columns:
        if temp_X_df[col].dtype == 'object':
            try: temp_X_df[col] = pd.to_numeric(temp_X_df[col]); feature_cols_for_X.append(col)
            except ValueError: print(f"Warning: Skipping non-convertible object column '{col}'.")
        else: feature_cols_for_X.append(col)
    feature_cols_for_X = sorted(list(set(feature_cols_for_X)))
    X = temp_X_df[feature_cols_for_X].copy()
    if X.empty or y.empty: print("Error: X or y empty after preprocessing."); return None, None, None
    return X, y, feature_cols_for_X

def scale_features(X_train, X_test, feature_names): # Simplified for CV fold (train/test within fold)
    scaler = StandardScaler()
    X_train_scaled = pd.DataFrame(scaler.fit_transform(X_train), columns=feature_names, index=X_train.index)
    X_test_scaled = None
    if X_test is not None and not X_test.empty:
        X_test_scaled = pd.DataFrame(scaler.transform(X_test), columns=feature_names, index=X_test.index)
    return X_train_scaled, X_test_scaled, scaler

# --- Make sure your plot_actual_vs_predicted_soh and plot_feature_importances are defined here too ---
def plot_actual_vs_predicted_soh(
    df_test_eval, battery_id_to_plot, model_name_str, output_dir,
    actual_soh_col='y_actual', predicted_soh_col='y_pred'
):
    battery_data = df_test_eval[df_test_eval['battery_id'] == battery_id_to_plot].sort_values('cycle_number')
    if battery_data.empty: print(f"No data for battery {battery_id_to_plot}. Skipping plot."); return
    plt.figure(figsize=(10, 5))
    plt.plot(battery_data['cycle_number'], battery_data[actual_soh_col], label='Actual SOH', marker='o', linestyle='-')
    plt.plot(battery_data['cycle_number'], battery_data[predicted_soh_col], label=f'Predicted SOH ({model_name_str})', marker='x', linestyle='--')
    plt.title(f'Actual vs. Predicted SOH: {battery_id_to_plot} ({model_name_str})')
    plt.xlabel('Cycle Number'); plt.ylabel('SOH (%)'); plt.legend(); plt.grid(True)
    plot_filename = os.path.join(output_dir, f"soh_comp_{battery_id_to_plot}_{model_name_str}.png")
    try: plt.savefig(plot_filename); # print(f"  Saved SOH plot to {plot_filename}")
    except Exception as e: print(f"  Error saving SOH plot {plot_filename}: {e}")
    plt.close()

def plot_feature_importances(
    importances_df, model_name_str, output_dir, top_n=15
):
    if importances_df is None or importances_df.empty: return
    importances_df = importances_df.sort_values(by='importance', ascending=False).head(top_n)
    plt.figure(figsize=(8, max(5, len(importances_df) * 0.3)))
    sns.barplot(x='importance', y='feature', data=importances_df, hue='feature', palette='viridis', legend=False, dodge=False)
    plt.title(f'Top {top_n} Feature Importances: {model_name_str}'); plt.xlabel('Importance'); plt.ylabel('Feature')
    plt.tight_layout()
    plot_filename = os.path.join(output_dir, f"fi_{model_name_str}.png")
    try: plt.savefig(plot_filename); # print(f"  Saved FI plot to {plot_filename}")
    except Exception as e: print(f"  Error saving FI plot {plot_filename}: {e}")
    plt.close()

In [3]:
# --- Cell C: Load Data and Create Final Hold-Out & Development Sets ---
df_master_full = load_data(INPUT_FILE)

if df_master_full is None:
    raise ValueError("Failed to load master data.")

# Annotate with 'regime' if not already done (ensure BATTERY_REGIME_MAP is correct)
if 'regime' not in df_master_full.columns:
    df_master_full['regime'] = df_master_full['battery_id'].map(BATTERY_REGIME_MAP)

# Ensure cycle_number is numeric
df_master_full['cycle_number'] = pd.to_numeric(df_master_full['cycle_number'], errors='coerce')
df_master_full.dropna(subset=['cycle_number', 'regime', TARGET_COL], inplace=True) # Drop vital NaNs
df_master_full['cycle_number'] = df_master_full['cycle_number'].astype(int)


# Get unique batteries and their regimes for stratified splitting
unique_batteries_df = df_master_full[['battery_id', 'regime']].drop_duplicates().reset_index(drop=True)

if len(unique_batteries_df) < N_FINAL_HOLD_OUT_BATTERIES + N_SPLITS_GROUPKFOLD : # Basic check for enough batteries
    raise ValueError(f"Not enough unique batteries ({len(unique_batteries_df)}) for desired hold-out ({N_FINAL_HOLD_OUT_BATTERIES}) and {N_SPLITS_GROUPKFOLD} CV folds.")

# Stratified split of BATTERY IDs into development and final hold-out sets
development_battery_ids_df, final_hold_out_battery_ids_df = train_test_split(
    unique_batteries_df,
    test_size=N_FINAL_HOLD_OUT_BATTERIES,
    stratify=unique_batteries_df['regime'], # Stratify by regime
    random_state=SPLIT_RANDOM_SEED
)

development_battery_ids = list(development_battery_ids_df['battery_id'])
final_hold_out_battery_ids = list(final_hold_out_battery_ids_df['battery_id'])

# Create the actual dataframes
df_development = df_master_full[df_master_full['battery_id'].isin(development_battery_ids)].copy()
df_final_hold_out_test = df_master_full[df_master_full['battery_id'].isin(final_hold_out_battery_ids)].copy()

print(f"--- Data Splitting for Robust Evaluation ---")
print(f"Total unique batteries: {df_master_full['battery_id'].nunique()}")
print(f"Development Set: {len(development_battery_ids)} batteries. Shape: {df_development.shape}")
print(f"  Regimes: {development_battery_ids_df['regime'].value_counts().to_dict()}")
print(f"Final Hold-Out Test Set: {len(final_hold_out_battery_ids)} batteries. Shape: {df_final_hold_out_test.shape}")
print(f"  Regimes: {final_hold_out_battery_ids_df['regime'].value_counts().to_dict()}")

# Sanity check
assert set(development_battery_ids).isdisjoint(set(final_hold_out_battery_ids)), "Overlap between dev and hold-out!"

Data loaded successfully from New_Features_Added_ALL.csv. Shape: (8220, 200)
--- Data Splitting for Robust Evaluation ---
Total unique batteries: 23
Development Set: 19 batteries. Shape: (7488, 201)
  Regimes: {'regular_constant': 8, 'recommissioned_two_stage': 4, 'regular_variable': 4, 'recommissioned_three_stage': 3}
Final Hold-Out Test Set: 4 batteries. Shape: (732, 201)
  Regimes: {'regular_constant': 2, 'recommissioned_two_stage': 1, 'regular_variable': 1}


In [4]:
# --- Cell D: Define Experiment Runner for a Single CV Fold ---

def run_experiment_for_cv_fold(
    df_train_data_fold,       # Training data for this fold
    df_test_data_fold,        # Test data for this fold (acting as validation for this fold)
    target_variable_name,
    extra_feature_exclusions,
    models_to_run,
    model_param_grids,
    gridsearch_cv_folds,      # Inner CV for GridSearchCV
    gridsearch_metric_name,
    fold_output_dir,          # Directory to save artifacts for this specific fold
    save_fold_artifacts=False # Control saving for each fold to save space if needed
):
    """
    Runs experiment for a single fold of GroupKFold CV.
    Hyperparameter tuning is done on df_train_data_fold.
    Evaluation is done on df_test_data_fold.
    """
    os.makedirs(fold_output_dir, exist_ok=True)
    
    # 1. Preprocess Data
    X_train_raw, y_train, actual_feature_names = preprocess_data(
        df_train_data_fold, target_variable_name, extra_feature_exclusions
    )
    X_test_raw, y_test, _ = preprocess_data(
        df_test_data_fold, target_variable_name, extra_feature_exclusions
    )

    if X_train_raw is None or X_test_raw is None:
        print("  Error: Preprocessing failed for this fold. Skipping.")
        return {model_code: {'test_rmse': np.nan, 'test_mae': np.nan, 'test_r2': np.nan, 'best_params': {}} for model_code in models_to_run}

    # Store original indices if needed for detailed analysis or fetching other columns for plotting
    train_indices = X_train_raw.index
    test_indices = X_test_raw.index
    
    # 2. Impute Data
    imputer = SimpleImputer(strategy='median')
    X_train_imputed = pd.DataFrame(imputer.fit_transform(X_train_raw), columns=actual_feature_names, index=train_indices)
    X_test_imputed = pd.DataFrame(imputer.transform(X_test_raw), columns=actual_feature_names, index=test_indices)

    # 3. Scale Data
    X_train_scaled, X_test_scaled, scaler = scale_features(
        X_train_imputed, X_test_imputed, actual_feature_names
    )

    fold_model_results = {}

    for model_code in models_to_run:
        # print(f"    Processing model: {model_code.upper()}")
        current_X_train_for_tuning = X_train_imputed
        current_X_test_for_eval = X_test_imputed
        
        if model_code in ['lr', 'gb']: # gb is HistGradientBoostingRegressor
            current_X_train_for_tuning = X_train_scaled
            current_X_test_for_eval = X_test_scaled
        
        base_model = None
        if model_code == 'rf': base_model = RandomForestRegressor(random_state=SPLIT_RANDOM_SEED, n_jobs=-1)
        elif model_code == 'gb': base_model = HistGradientBoostingRegressor(random_state=SPLIT_RANDOM_SEED)
        elif model_code == 'xgb': base_model = xgb.XGBRegressor(random_state=SPLIT_RANDOM_SEED, n_jobs=-1, verbosity=0)
        elif model_code == 'lr': base_model = LinearRegression()
        else: continue

        grid_search = GridSearchCV(
            estimator=base_model, param_grid=model_param_grids[model_code],
            scoring=gridsearch_metric_name, cv=gridsearch_cv_folds, verbose=0, n_jobs=-1 # verbose=0 for less output
        )
        grid_search.fit(current_X_train_for_tuning, y_train)
        best_model = grid_search.best_estimator_
        
        # Evaluate on the fold's test set
        y_pred_test = best_model.predict(current_X_test_for_eval)
        test_rmse = np.sqrt(mean_squared_error(y_test, y_pred_test))
        test_mae = mean_absolute_error(y_test, y_pred_test)
        test_r2 = r2_score(y_test, y_pred_test)
        
        fold_model_results[model_code] = {
            'test_rmse': test_rmse, 'test_mae': test_mae, 'test_r2': test_r2,
            'best_params': grid_search.best_params_
        }

        if save_fold_artifacts:
            joblib.dump(best_model, os.path.join(fold_output_dir, f"model_{model_code}.joblib"))
            # Could save imputer, scaler too if needed for that fold specifically
            
    if save_fold_artifacts: # Save scaler & imputer once per fold if needed
        joblib.dump(scaler, os.path.join(fold_output_dir, f"scaler_fold.joblib"))
        joblib.dump(imputer, os.path.join(fold_output_dir, f"imputer_fold.joblib"))
        joblib.dump(actual_feature_names, os.path.join(fold_output_dir, f"feature_names_fold.joblib"))


    return fold_model_results

In [None]:
# --- Cell E: Perform GroupKFold Cross-Validation ---
group_kfold = GroupKFold(n_splits=N_SPLITS_GROUPKFOLD)
groups_for_cv = df_development['battery_id'] # Group by battery ID

# Check if number of splits is feasible
if N_SPLITS_GROUPKFOLD > df_development['battery_id'].nunique():
    raise ValueError(f"N_SPLITS_GROUPKFOLD ({N_SPLITS_GROUPKFOLD}) cannot be greater than the number of unique batteries in the development set ({df_development['battery_id'].nunique()}).")


all_fold_results_list = [] # To store detailed results from each model in each fold

print(f"\n--- Starting {N_SPLITS_GROUPKFOLD}-Fold Group Cross-Validation ---")
for fold_num, (train_dev_indices, test_dev_indices) in enumerate(group_kfold.split(df_development, y=df_development[TARGET_COL], groups=groups_for_cv)):
    fold_experiment_tag = f"Fold_{fold_num + 1}"
    current_fold_output_dir = os.path.join(CV_EXPERIMENT_OUTPUT_DIR, fold_experiment_tag)
    print(f"\n--- {fold_experiment_tag}/{N_SPLITS_GROUPKFOLD} ---")
    
    df_train_fold = df_development.iloc[train_dev_indices]
    df_test_fold = df_development.iloc[test_dev_indices] # This is the test set for THIS FOLD

    print(f"  Train batteries ({len(df_train_fold['battery_id'].unique())}): {sorted(df_train_fold['battery_id'].unique())[:5]}...")
    print(f"  Test batteries ({len(df_test_fold['battery_id'].unique())}): {sorted(df_test_fold['battery_id'].unique())}")
    
    # Run experiment for the current fold
    # Set save_fold_artifacts to True if you want to inspect individual fold models/scalers
    results_this_fold = run_experiment_for_cv_fold(
        df_train_data_fold=df_train_fold,
        df_test_data_fold=df_test_fold,
        target_variable_name=TARGET_COL,
        extra_feature_exclusions=EXTRA_EXCLUDE_COLS_FROM_FEATURES,
        models_to_run=MODELS_TO_TRAIN,
        model_param_grids=PARAM_GRIDS,
        gridsearch_cv_folds=CV_FOLDS_GRIDSEARCH,
        gridsearch_metric_name=GRIDSEARCH_SCORING,
        fold_output_dir=current_fold_output_dir,
        save_fold_artifacts=False # Set to True to save models/scalers for each fold
    )
    
    # Store results for each model in this fold
    for model_name, metrics in results_this_fold.items():
        all_fold_results_list.append({
            'fold': fold_num + 1,
            'model': model_name,
            'test_rmse': metrics['test_rmse'],
            'test_mae': metrics['test_mae'],
            'test_r2': metrics['test_r2'],
            'best_params': str(metrics['best_params']) # Store as string for DataFrame
        })
    print(f"  Fold {fold_num+1} completed.")

# Convert list of dictionaries to DataFrame
df_cv_results_detailed = pd.DataFrame(all_fold_results_list)

print("\n--- Cross-Validation Detailed Results (All Folds, All Models) ---")
print(df_cv_results_detailed)

# Save detailed CV results
cv_detailed_csv_path = os.path.join(CV_EXPERIMENT_OUTPUT_DIR, "cv_detailed_results.csv")
df_cv_results_detailed.to_csv(cv_detailed_csv_path, index=False)
print(f"\nDetailed CV results saved to {cv_detailed_csv_path}")

TypeError: object of type 'int' has no len()

In [None]:
# --- Cell F: Analyze Cross-Validation Results and Select Champion Model ---

if df_cv_results_detailed.empty:
    raise ValueError("CV results are empty. Cannot proceed with analysis.")

# Calculate mean and std dev of metrics for each model across folds
aggregated_cv_metrics = df_cv_results_detailed.groupby('model').agg(
    mean_test_rmse=('test_rmse', 'mean'),
    std_test_rmse=('test_rmse', 'std'),
    mean_test_mae=('test_mae', 'mean'),
    std_test_mae=('test_mae', 'std'),
    mean_test_r2=('test_r2', 'mean'),
    std_test_r2=('test_r2', 'std')
).reset_index()

print("\n--- Aggregated Cross-Validation Metrics (Mean & Std Dev) ---")
# Sort by desired metric, e.g., mean_test_r2 descending or mean_test_rmse ascending
aggregated_cv_metrics = aggregated_cv_metrics.sort_values(by='mean_test_r2', ascending=False)
print(aggregated_cv_metrics)

# Save aggregated CV results
cv_aggregated_csv_path = os.path.join(CV_EXPERIMENT_OUTPUT_DIR, "cv_aggregated_metrics.csv")
aggregated_cv_metrics.to_csv(cv_aggregated_csv_path, index=False)
print(f"\nAggregated CV metrics saved to {cv_aggregated_csv_path}")

# --- Champion Model Selection Logic ---
# Example: Select based on highest mean_test_r2.
# You might also consider low std_test_r2 (stability) or other metrics.
champion_model_series = aggregated_cv_metrics.iloc[0] # Assumes sorted by best R2
champion_model_name = champion_model_series['model']
print(f"\n--- Champion Model Selection ---")
print(f"Based on CV results (highest mean R2), the champion model architecture is: {champion_model_name.upper()}")
print(f"  Mean Test R2: {champion_model_series['mean_test_r2']:.4f} (Std: {champion_model_series['std_test_r2']:.4f})")
print(f"  Mean Test RMSE: {champion_model_series['mean_test_rmse']:.4f} (Std: {champion_model_series['std_test_rmse']:.4f})")

# Get the best hyperparameters for the champion model
# Option 1: Find the most frequent best_params for this model from CV folds
champion_model_params_list = df_cv_results_detailed[
    df_cv_results_detailed['model'] == champion_model_name
]['best_params'].tolist()

from collections import Counter
most_common_params_str = Counter(champion_model_params_list).most_common(1)[0][0]
# Convert string back to dict - CAREFUL with this if params are complex
import ast
try:
    final_champion_hyperparameters = ast.literal_eval(most_common_params_str)
    print(f"Most common hyperparameters for {champion_model_name.upper()} from CV: {final_champion_hyperparameters}")
except Exception as e:
    print(f"Could not parse hyperparameters for {champion_model_name}, using default from PARAM_GRIDS. Error: {e}")
    final_champion_hyperparameters = PARAM_GRIDS[champion_model_name] # Fallback

# Option 2 (Potentially Better for final model): Re-run GridSearchCV on the full development set
# For simplicity here, we'll use the most common from CV.
# If you want to re-run GridSearchCV:
# print(f"\nOptional: For even better hyperparameters, re-run GridSearchCV for {champion_model_name.upper()} on the full df_development set.")

In [None]:
# --- Cell G: Train Final Champion Model and Evaluate on Hold-Out Set ---

print(f"\n--- Training Final Champion Model: {champion_model_name.upper()} ---")
print(f"Using hyperparameters: {final_champion_hyperparameters}")

# 1. Preprocess full development set and final hold-out test set
X_dev_full_raw, y_dev_full, final_feature_names = preprocess_data(
    df_development, TARGET_COL, EXTRA_EXCLUDE_COLS_FROM_FEATURES
)
X_hold_out_raw, y_hold_out, _ = preprocess_data(
    df_final_hold_out_test, TARGET_COL, EXTRA_EXCLUDE_COLS_FROM_FEATURES
)

if X_dev_full_raw is None or X_hold_out_raw is None:
    raise ValueError("Preprocessing failed for full development or hold-out set.")

# Impute
final_imputer = SimpleImputer(strategy='median')
X_dev_full_imputed = pd.DataFrame(final_imputer.fit_transform(X_dev_full_raw), columns=final_feature_names, index=X_dev_full_raw.index)
X_hold_out_imputed = pd.DataFrame(final_imputer.transform(X_hold_out_raw), columns=final_feature_names, index=X_hold_out_raw.index)

# Scale
final_scaler = StandardScaler()
X_dev_full_scaled = pd.DataFrame(final_scaler.fit_transform(X_dev_full_imputed), columns=final_feature_names, index=X_dev_full_imputed.index)
X_hold_out_scaled = pd.DataFrame(final_scaler.transform(X_hold_out_imputed), columns=final_feature_names, index=X_hold_out_imputed.index)


# Determine data for final model training
X_train_final = X_dev_full_imputed
X_test_final_eval = X_hold_out_imputed
if champion_model_name in ['lr', 'gb']:
    X_train_final = X_dev_full_scaled
    X_test_final_eval = X_hold_out_scaled

# Initialize and train the final model
final_model = None
if champion_model_name == 'rf':
    final_model = RandomForestRegressor(random_state=SPLIT_RANDOM_SEED, n_jobs=-1, **final_champion_hyperparameters)
elif champion_model_name == 'gb':
    final_model = HistGradientBoostingRegressor(random_state=SPLIT_RANDOM_SEED, **final_champion_hyperparameters)
elif champion_model_name == 'xgb':
    final_model = xgb.XGBRegressor(random_state=SPLIT_RANDOM_SEED, n_jobs=-1, verbosity=0, **final_champion_hyperparameters)
elif champion_model_name == 'lr':
    final_model = LinearRegression(**final_champion_hyperparameters)

if final_model is None:
    raise ValueError(f"Champion model {champion_model_name} could not be initialized.")

print(f"Training final {champion_model_name.upper()} model on the full development set...")
final_model.fit(X_train_final, y_dev_full)
print("Training complete.")

# Save the final model, scaler, and imputer
joblib.dump(final_model, os.path.join(FINAL_MODEL_DIR, f"champion_model_{champion_model_name}.joblib"))
joblib.dump(final_scaler, os.path.join(FINAL_MODEL_DIR, "final_scaler.joblib"))
joblib.dump(final_imputer, os.path.join(FINAL_MODEL_DIR, "final_imputer.joblib"))
joblib.dump(final_feature_names, os.path.join(FINAL_MODEL_DIR, "final_feature_names.joblib"))
print(f"Final champion model and transformers saved to {FINAL_MODEL_DIR}")


# Evaluate on the sacred Final Hold-Out Test Set
print(f"\n--- Evaluating Champion Model on Final Hold-Out Test Set ---")
y_pred_hold_out = final_model.predict(X_test_final_eval)

hold_out_rmse = np.sqrt(mean_squared_error(y_hold_out, y_pred_hold_out))
hold_out_mae = mean_absolute_error(y_hold_out, y_pred_hold_out)
hold_out_r2 = r2_score(y_hold_out, y_pred_hold_out)

print(f"Hold-Out Test Set Performance for {champion_model_name.upper()}:")
print(f"  RMSE: {hold_out_rmse:.4f}")
print(f"  MAE:  {hold_out_mae:.4f}")
print(f"  R2:   {hold_out_r2:.4f}")

final_results_summary = {
    'champion_model': champion_model_name,
    'hyperparameters': str(final_champion_hyperparameters),
    'hold_out_rmse': hold_out_rmse,
    'hold_out_mae': hold_out_mae,
    'hold_out_r2': hold_out_r2
}
df_final_summary = pd.DataFrame([final_results_summary])
final_summary_csv_path = os.path.join(FINAL_MODEL_DIR, "final_hold_out_results_summary.csv")
df_final_summary.to_csv(final_summary_csv_path, index=False)
print(f"Final hold-out results summary saved to {final_summary_csv_path}")


# Generate and save plots for the hold-out set
# Feature Importance Plot
fi_df_final = None
if hasattr(final_model, 'feature_importances_'):
    fi_df_final = pd.DataFrame({'feature': final_feature_names, 'importance': final_model.feature_importances_})
elif champion_model_name == 'lr' and hasattr(final_model, 'coef_'):
    fi_df_final = pd.DataFrame({'feature': final_feature_names, 'importance': np.abs(final_model.coef_)})

if fi_df_final is not None:
    plot_feature_importances(fi_df_final, f"champion_{champion_model_name}_hold_out", FINAL_MODEL_DIR)

# SOH Actual vs. Predicted Plots for hold-out batteries
df_hold_out_eval_plot = pd.DataFrame({
    'battery_id': df_final_hold_out_test.loc[X_hold_out_raw.index, 'battery_id'].values,
    'cycle_number': df_final_hold_out_test.loc[X_hold_out_raw.index, 'cycle_number'].values,
    'y_actual': y_hold_out.values,
    'y_pred': y_pred_hold_out
})

plots_hold_out_dir = os.path.join(FINAL_MODEL_DIR, "hold_out_SOH_plots")
os.makedirs(plots_hold_out_dir, exist_ok=True)
for batt_id in df_hold_out_eval_plot['battery_id'].unique():
    plot_actual_vs_predicted_soh(
        df_hold_out_eval_plot, batt_id, f"champion_{champion_model_name}", plots_hold_out_dir
    )
print(f"SOH comparison plots for hold-out set saved in {plots_hold_out_dir}")

print("\n--- Robust Evaluation Process Completed ---")