In [1]:
# Cell 1: Imports and Configuration
import pandas as pd
import numpy as np
import os
import joblib
import sys
from sklearn.model_selection import GridSearchCV # <<< ADDED
from sklearn.ensemble import RandomForestRegressor, HistGradientBoostingRegressor
from sklearn.linear_model import LinearRegression
import xgboost as xgb
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.inspection import permutation_importance # For HistGBM importance

# Assuming train_models.py is in the same directory or in python path
# project_root = os.path.abspath(os.path.join(os.getcwd(), os.pardir)) # if notebook is in a subfolder
# if project_root not in sys.path:
#    sys.path.append(project_root)
import train_models # Your refactored train_models.py

# --- Notebook Specific Configurations ---
INPUT_FILE = 'notebook_processed_battery_data_simplified_ALL.csv'
TARGET_COL = 'SOH_cycle_capacity_%'
EXTRA_EXCLUDE_COLS_FROM_FEATURES = ['capacity_Ah', 'energy_Wh', 'avg_power_W']
MODELS_TO_TUNE_AND_TRAIN = ['rf', 'gb', 'xgb', 'lr'] # Models to apply GridSearchCV to
SAVE_MODELS_FLAG = True
BASE_OUTPUT_DIR = "regime_aware_tuned_experiments" # New directory for tuned results
os.makedirs(BASE_OUTPUT_DIR, exist_ok=True)

# Define battery regime mapping (as before)
BATTERY_REGIME_MAP = {
    'battery00': 'regular_constant', 'battery01': 'regular_constant',
    'battery10': 'regular_constant', 'battery11': 'regular_constant',
    'battery20': 'regular_constant', 'battery21': 'regular_constant',
    'battery30': 'regular_constant', 'battery31': 'regular_constant',
    'battery40': 'regular_constant', 'battery50': 'regular_constant',
    'battery22': 'regular_variable', 'battery23': 'regular_variable',
    'battery41': 'regular_variable', 'battery51': 'regular_variable',
    'battery52': 'regular_variable',
    'battery02': 'recommissioned_two_stage', 'battery12': 'recommissioned_two_stage',
    'battery24': 'recommissioned_two_stage', 'battery32': 'recommissioned_two_stage',
    'battery53': 'recommissioned_two_stage',
    'battery03': 'recommissioned_three_stage', 'battery25': 'recommissioned_three_stage',
    'battery33': 'recommissioned_three_stage',
}
ALL_REGIMES = sorted(list(set(BATTERY_REGIME_MAP.values())))

# --- GridSearchCV Configurations ---
CV_FOLDS = 3 # Number of cross-validation folds for GridSearchCV (start small, e.g., 2 or 3)
GRIDSEARCH_SCORING = 'neg_mean_squared_error' # Metric to optimize for (lower MSE is better)

# Define Parameter Grids (start small to test)
PARAM_GRIDS = {
    'rf': {
        'n_estimators': [50, 100], # Default 100
        'max_depth': [None, 10, 20],
        'min_samples_split': [2, 5],
        'min_samples_leaf': [1, 2]
    },
    'gb': { # HistGradientBoostingRegressor
        'learning_rate': [0.01, 0.1], # Default 0.1
        'max_iter': [100, 200],       # Default 100 (number of trees)
        'max_depth': [None, 5, 10],
        'l2_regularization': [0, 0.1] # Default 0
    },
    'xgb': {
        'n_estimators': [50, 100, 200], # Default 100
        'learning_rate': [0.05, 0.1, 0.3], # Default 0.3
        'max_depth': [3, 5, 7],          # Default 6
        # 'subsample': [0.8, 1.0],
        # 'colsample_bytree': [0.8, 1.0]
    },
    'lr': { # Linear Regression often doesn't need much tuning,
            # but you could tune fit_intercept or other params if using Ridge/Lasso etc.
        'fit_intercept': [True, False]
    }
}

In [2]:
# Cell 2: Load Data and Annotate Regimes
df_master_full = train_models.load_data(INPUT_FILE)
if df_master_full is None:
    raise ValueError(f"Failed to load data from {INPUT_FILE}.")

df_master_full['regime'] = df_master_full['battery_id'].map(BATTERY_REGIME_MAP)
if df_master_full['regime'].isnull().any():
    unmapped_batteries = df_master_full[df_master_full['regime'].isnull()]['battery_id'].unique()
    print(f"Warning: Unmapped batteries: {unmapped_batteries}. Dropping them.")
    df_master_full.dropna(subset=['regime'], inplace=True)

print("--- Data Loaded and Regimes Annotated ---")
print(f"Master DataFrame shape: {df_master_full.shape}")
print("Regime counts (batteries per regime):")
print(df_master_full.groupby('regime')['battery_id'].nunique())

Data loaded successfully from notebook_processed_battery_data_simplified_ALL.csv. Shape: (8220, 47)
--- Data Loaded and Regimes Annotated ---
Master DataFrame shape: (8220, 48)
Regime counts (batteries per regime):
regime
recommissioned_three_stage     3
recommissioned_two_stage       5
regular_constant              10
regular_variable               5
Name: battery_id, dtype: int64


In [3]:
# Cell 3: Helper Function to Evaluate a Fitted Model
def evaluate_fitted_model(model, model_name_str, X_val_data, y_val_data, X_test_data, y_test_data, feature_names_list):
    """Evaluates a pre-fitted model on validation and test sets and gathers feature importances."""
    results_metrics = {}
    print(f"Evaluating {model_name_str.upper()}...")

    # Evaluation on Validation Set
    val_rmse, val_mae, val_r2 = np.nan, np.nan, np.nan
    if X_val_data is not None and not X_val_data.empty and y_val_data is not None and not y_val_data.empty:
        try:
            y_pred_val = model.predict(X_val_data)
            val_rmse = np.sqrt(mean_squared_error(y_val_data, y_pred_val))
            val_mae = mean_absolute_error(y_val_data, y_pred_val)
            val_r2 = r2_score(y_val_data, y_pred_val)
            print(f"  Validation RMSE: {val_rmse:.4f}, MAE: {val_mae:.4f}, R2: {val_r2:.4f}")
        except Exception as e:
            print(f"  Error during validation evaluation for {model_name_str}: {e}")
    else:
        print("  Validation set empty or y_val empty. Skipping validation metrics.")

    # Evaluation on Test Set
    test_rmse, test_mae, test_r2 = np.nan, np.nan, np.nan
    if X_test_data is not None and not X_test_data.empty and y_test_data is not None and not y_test_data.empty:
        try:
            y_pred_test = model.predict(X_test_data)
            test_rmse = np.sqrt(mean_squared_error(y_test_data, y_pred_test))
            test_mae = mean_absolute_error(y_test_data, y_pred_test)
            test_r2 = r2_score(y_test_data, y_pred_test)
            print(f"  Test RMSE: {test_rmse:.4f}, MAE: {test_mae:.4f}, R2: {test_r2:.4f}")
        except Exception as e:
            print(f"  Error during test evaluation for {model_name_str}: {e}")
    else:
        print("  Test set empty or y_test empty. Skipping test metrics.")

    results_metrics = {
        'val_rmse': val_rmse, 'val_mae': val_mae, 'val_r2': val_r2,
        'test_rmse': test_rmse, 'test_mae': test_mae, 'test_r2': test_r2
    }

    # Feature Importances
    if feature_names_list:
        importances_values = None
        if hasattr(model, 'feature_importances_'):
            importances_values = model.feature_importances_
        elif model_name_str == 'lr' and hasattr(model, 'coef_'):
            importances_values = np.abs(model.coef_)
            if len(importances_values.shape) > 1: importances_values = np.mean(importances_values, axis=0)
        elif model_name_str == 'gb': # HistGradientBoostingRegressor - using permutation importance
            print(f"  Calculating permutation importance for {model_name_str.upper()} on validation data...")
            if X_val_data is not None and not X_val_data.empty and y_val_data is not None and not y_val_data.empty and len(X_val_data) > 1:
                try:
                    perm_result = permutation_importance(model, X_val_data, y_val_data, n_repeats=10, random_state=42, n_jobs=-1)
                    importances_values = perm_result.importances_mean
                except Exception as e:
                    print(f"    Error calculating permutation importance for {model_name_str}: {e}")
            else:
                print(f"    Skipping permutation importance for {model_name_str}: validation data unsuitable.")

        if importances_values is not None:
            if len(feature_names_list) == len(importances_values):
                feature_importance_df = pd.DataFrame({'feature': feature_names_list, 'importance': importances_values})
                feature_importance_df = feature_importance_df.sort_values(by='importance', ascending=False).head(15)
                print(f"\n  Top 15 Feature Importances for {model_name_str.upper()}:")
                print(feature_importance_df)
            else:
                print(f"Warning: Mismatch feature names ({len(feature_names_list)}) & importances ({len(importances_values)}) for {model_name_str}.")
        elif model_name_str not in ['lr', 'gb'] and not hasattr(model, 'feature_importances_'):
             print(f"Warning: Model {model_name_str} has no 'feature_importances_' or 'coef_'.")

    return results_metrics

In [4]:
# Cell 4: Define Experiment Runner Function (Modified for GridSearchCV)

def run_experiment_with_gridsearch(
    exp_name, df_full_data, train_battery_ids, val_battery_ids, test_battery_ids,
    target_col_name, extra_exclude_cols, models_to_tune, param_grids_dict,
    cv_folds_gs, scoring_gs, save_artifacts=True
):
    """
    Runs a single experiment with GridSearchCV for hyperparameter tuning:
    1. Filters data for train, validation, and test sets.
    2. Preprocesses data.
    3. Imputes and scales features.
    4. For each model, performs GridSearchCV on training data.
    5. Evaluates the best model from GridSearchCV on validation and test data.
    6. Optionally saves models, scaler, and imputer.
    """
    print(f"\n--- Running Experiment with GridSearchCV: {exp_name} ---")
    exp_output_dir = os.path.join(BASE_OUTPUT_DIR, exp_name)
    os.makedirs(exp_output_dir, exist_ok=True)

    # --- 1. Filter Data ---
    df_train_exp = df_full_data[df_full_data['battery_id'].isin(train_battery_ids)].copy()
    df_val_exp = df_full_data[df_full_data['battery_id'].isin(val_battery_ids)].copy()
    df_test_exp = df_full_data[df_full_data['battery_id'].isin(test_battery_ids)].copy()

    # Explicitly drop 'regime' if present to avoid warnings in preprocess_data
    for df_to_clean in [df_train_exp, df_val_exp, df_test_exp]:
        if 'regime' in df_to_clean.columns:
            df_to_clean.drop(columns=['regime'], inplace=True)

    if df_train_exp.empty: # Val/Test can be empty if not enough batteries, but train must exist
        print(f"ERROR: Training data is empty for experiment {exp_name}. Skipping.")
        return None, None

    # Drop extra feature columns BEFORE preprocessing
    cols_to_drop = [col for col in extra_exclude_cols if col in df_train_exp.columns]
    if cols_to_drop:
        df_train_exp.drop(columns=cols_to_drop, inplace=True)
        if not df_val_exp.empty: df_val_exp.drop(columns=cols_to_drop, inplace=True, errors='ignore')
        if not df_test_exp.empty: df_test_exp.drop(columns=cols_to_drop, inplace=True, errors='ignore')
        print(f"Dropped extra feature columns: {cols_to_drop}")

    # --- 2. Preprocess Data ---
    # preprocess_data returns X (features), y (target), battery_ids (not used here), feature_names
    X_train, y_train, _, actual_feature_names = train_models.preprocess_data(df_train_exp, target_col_name)
    
    X_val, y_val = (None, None)
    if not df_val_exp.empty:
        X_val, y_val, _, _ = train_models.preprocess_data(df_val_exp, target_col_name)

    X_test, y_test = (None, None)
    if not df_test_exp.empty:
        X_test, y_test, _, _ = train_models.preprocess_data(df_test_exp, target_col_name)

    if X_train is None or X_train.empty:
        print(f"Error: X_train is None or empty after preprocessing for exp: {exp_name}. Skipping.")
        return None, None

    # --- 3. Impute & Scale ---
    imputer = SimpleImputer(strategy='median')
    X_train_imputed = pd.DataFrame(imputer.fit_transform(X_train), columns=X_train.columns, index=X_train.index)
    
    X_val_imputed = None
    if X_val is not None and not X_val.empty:
        X_val_imputed = pd.DataFrame(imputer.transform(X_val), columns=X_val.columns, index=X_val.index)
    
    X_test_imputed = None
    if X_test is not None and not X_test.empty:
        X_test_imputed = pd.DataFrame(imputer.transform(X_test), columns=X_test.columns, index=X_test.index)

    X_train_scaled, X_val_scaled, X_test_scaled, scaler = train_models.scale_features(
        X_train_imputed, X_val_imputed, X_test_imputed # Pass potentially None X_val/X_test
    )
    
    experiment_results = {}
    trained_best_models = {}

    # --- 4. GridSearchCV for each model ---
    for model_name_key in models_to_tune:
        print(f"\n-- Tuning {model_name_key.upper()} for experiment {exp_name} --")
        
        current_X_train_data = X_train_imputed
        current_X_val_data = X_val_imputed
        current_X_test_data = X_test_imputed
        
        # Select appropriate dataset (scaled or unscaled) for tuning and final evaluation
        if model_name_key in ['lr', 'gb']:
            current_X_train_data = X_train_scaled
            current_X_val_data = X_val_scaled
            current_X_test_data = X_test_scaled
            print(f"  Using SCALED data for {model_name_key.upper()}")
        else:
            print(f"  Using IMPUTED (unscaled) data for {model_name_key.upper()}")

        base_model = None
        if model_name_key == 'rf': base_model = RandomForestRegressor(random_state=42, n_jobs=-1)
        elif model_name_key == 'gb': base_model = HistGradientBoostingRegressor(random_state=42) # Early stopping auto
        elif model_name_key == 'xgb': base_model = xgb.XGBRegressor(random_state=42, n_jobs=-1)
        elif model_name_key == 'lr': base_model = LinearRegression()
        else:
            print(f"Warning: Unknown model '{model_name_key}'. Skipping.")
            continue
        
        if not param_grids_dict.get(model_name_key):
            print(f"  No parameter grid for {model_name_key}. Fitting with defaults.")
            base_model.fit(current_X_train_data, y_train)
            best_model_found = base_model
        else:
            grid_search = GridSearchCV(
                estimator=base_model,
                param_grid=param_grids_dict[model_name_key],
                scoring=scoring_gs,
                cv=cv_folds_gs,
                verbose=1,
                n_jobs=-1 
            )
            print(f"  Starting GridSearchCV for {model_name_key.upper()}...")
            grid_search.fit(current_X_train_data, y_train)
            print(f"  Best parameters for {model_name_key.upper()}: {grid_search.best_params_}")
            best_model_found = grid_search.best_estimator_ # Already refitted on full train data

        trained_best_models[model_name_key] = best_model_found
        
        # --- 5. Evaluate the BEST model ---
        # Ensure y_val and y_test are Series, even if empty, for the evaluation function
        y_val_safe = y_val if (y_val is not None and not y_val.empty) else pd.Series(dtype=y_train.dtype)
        y_test_safe = y_test if (y_test is not None and not y_test.empty) else pd.Series(dtype=y_train.dtype)

        metrics = evaluate_fitted_model(
            best_model_found, model_name_key,
            current_X_val_data, y_val_safe,
            current_X_test_data, y_test_safe,
            actual_feature_names # Pass the correct feature names
        )
        experiment_results[model_name_key] = metrics

    # --- 6. Save Artifacts ---
    if save_artifacts:
        joblib.dump(scaler, os.path.join(exp_output_dir, f"scaler_{exp_name}.joblib"))
        joblib.dump(imputer, os.path.join(exp_output_dir, f"imputer_{exp_name}.joblib"))
        print(f"\nScaler and Imputer saved for {exp_name} in {exp_output_dir}")
        if SAVE_MODELS_FLAG:
            for model_name_key, model_instance in trained_best_models.items():
                model_filename = os.path.join(exp_output_dir, f"{model_name_key}_tuned_{exp_name}.joblib")
                joblib.dump(model_instance, model_filename)
            print(f"Tuned models saved for {exp_name} in {exp_output_dir}")
            
    return experiment_results, trained_best_models

In [5]:
# Cell 5: Define Experiment Scenarios
all_battery_ids_by_regime = df_master_full.groupby('regime')['battery_id'].unique().to_dict()
for regime, ids in all_battery_ids_by_regime.items():
    all_battery_ids_by_regime[regime] = list(ids) # Ensure they are lists

experiment_configs = []
np.random.seed(42) # For reproducible shuffles

# --- Scenario 1: Intra-Regime - Regular Constant (RC) ---
rc_ids_orig = list(all_battery_ids_by_regime.get('regular_constant', []))
np.random.shuffle(rc_ids_orig) # Shuffle for random assignment
if len(rc_ids_orig) >= 3:
    # Adjust split logic to ensure train, val, and test are not empty if possible
    test_rc_count = 1
    val_rc_count = 1 if len(rc_ids_orig) - test_rc_count >= 2 else 0 # Need at least 1 for train
    train_rc_count = len(rc_ids_orig) - test_rc_count - val_rc_count
    if train_rc_count > 0 :
      experiment_configs.append({
          'name': 'IntraRegime_RC_Tuned',
          'train_battery_ids': rc_ids_orig[:train_rc_count],
          'val_battery_ids': rc_ids_orig[train_rc_count : train_rc_count + val_rc_count] if val_rc_count > 0 else rc_ids_orig[:train_rc_count], # Use train as val if no dedicated val
          'test_battery_ids': rc_ids_orig[train_rc_count + val_rc_count:],
      })

# --- Scenario 2: Intra-Regime - Regular Variable (RV) ---
rv_ids_orig = list(all_battery_ids_by_regime.get('regular_variable', []))
np.random.shuffle(rv_ids_orig)
if len(rv_ids_orig) >= 3:
    test_rv_count = 1
    val_rv_count = 1 if len(rv_ids_orig) - test_rv_count >= 2 else 0
    train_rv_count = len(rv_ids_orig) - test_rv_count - val_rv_count
    if train_rv_count > 0:
      experiment_configs.append({
          'name': 'IntraRegime_RV_Tuned',
          'train_battery_ids': rv_ids_orig[:train_rv_count],
          'val_battery_ids': rv_ids_orig[train_rv_count : train_rv_count + val_rv_count] if val_rv_count > 0 else rv_ids_orig[:train_rv_count],
          'test_battery_ids': rv_ids_orig[train_rv_count + val_rv_count:],
      })

# --- Scenario 3: Intra-Regime - Recommissioned Two-Stage (R2S) ---
r2s_ids_orig = list(all_battery_ids_by_regime.get('recommissioned_two_stage', []))
np.random.shuffle(r2s_ids_orig)
if len(r2s_ids_orig) >= 3:
    test_r2s_count = 1
    val_r2s_count = 1 if len(r2s_ids_orig) - test_r2s_count >= 2 else 0
    train_r2s_count = len(r2s_ids_orig) - test_r2s_count - val_r2s_count
    if train_r2s_count > 0:
      experiment_configs.append({
          'name': 'IntraRegime_R2S_Tuned',
          'train_battery_ids': r2s_ids_orig[:train_r2s_count],
          'val_battery_ids': r2s_ids_orig[train_r2s_count : train_r2s_count + val_r2s_count] if val_r2s_count > 0 else r2s_ids_orig[:train_r2s_count],
          'test_battery_ids': r2s_ids_orig[train_r2s_count + val_r2s_count:],
      })

# --- Scenario 4: Intra-Regime - Recommissioned Three-Stage (R3S) ---
r3s_ids_orig = list(all_battery_ids_by_regime.get('recommissioned_three_stage', []))
np.random.shuffle(r3s_ids_orig)
if len(r3s_ids_orig) >= 3: # Requires 1 for train, 1 for val, 1 for test
    experiment_configs.append({
        'name': 'IntraRegime_R3S_Tuned',
        'train_battery_ids': [r3s_ids_orig[0]],
        'val_battery_ids': [r3s_ids_orig[1]],
        'test_battery_ids': [r3s_ids_orig[2]],
    })
elif len(r3s_ids_orig) == 2: # Train/Val are same, Test is different
     experiment_configs.append({
        'name': 'IntraRegime_R3S_Tuned_TrainValEq_Test',
        'train_battery_ids': [r3s_ids_orig[0]],
        'val_battery_ids': [r3s_ids_orig[0]], # Use train also as val
        'test_battery_ids': [r3s_ids_orig[1]],
    })

# --- Scenario 5: Cross-Regime - Train ALL Regular, Test ALL Recomm ---
all_reg_ids_for_s5 = list(set(all_battery_ids_by_regime.get('regular_constant', []) + all_battery_ids_by_regime.get('regular_variable', [])))
all_recomm_ids_for_s5 = list(set(all_battery_ids_by_regime.get('recommissioned_two_stage', []) + all_battery_ids_by_regime.get('recommissioned_three_stage', [])))
np.random.shuffle(all_reg_ids_for_s5)

if len(all_reg_ids_for_s5) >= 2 and all_recomm_ids_for_s5:
    val_count_s5 = max(1, int(0.2 * len(all_reg_ids_for_s5)))
    if len(all_reg_ids_for_s5) - val_count_s5 == 0 : val_count_s5 = len(all_reg_ids_for_s5) - 1 # ensure at least 1 for train
    
    experiment_configs.append({
        'name': 'CrossRegime_TrainReg_TestRecomm_Tuned',
        'train_battery_ids': all_reg_ids_for_s5[:-val_count_s5],
        'val_battery_ids': all_reg_ids_for_s5[-val_count_s5:],
        'test_battery_ids': all_recomm_ids_for_s5,
    })

# --- Scenario 6: Cross-Regime - Train ALL Recomm, Test ALL Regular ---
all_recomm_ids_for_s6 = list(set(all_battery_ids_by_regime.get('recommissioned_two_stage', []) + all_battery_ids_by_regime.get('recommissioned_three_stage', [])))
all_reg_ids_for_s6 = list(set(all_battery_ids_by_regime.get('regular_constant', []) + all_battery_ids_by_regime.get('regular_variable', [])))
np.random.shuffle(all_recomm_ids_for_s6)

if len(all_recomm_ids_for_s6) >= 2 and all_reg_ids_for_s6:
    val_count_s6 = max(1, int(0.2 * len(all_recomm_ids_for_s6)))
    if len(all_recomm_ids_for_s6) - val_count_s6 == 0 : val_count_s6 = len(all_recomm_ids_for_s6) -1
    
    experiment_configs.append({
        'name': 'CrossRegime_TrainRecomm_TestReg_Tuned',
        'train_battery_ids': all_recomm_ids_for_s6[:-val_count_s6],
        'val_battery_ids': all_recomm_ids_for_s6[-val_count_s6:],
        'test_battery_ids': all_reg_ids_for_s6,
    })

# --- Scenario 7: Train on ALL (Regular + Recommissioned), Test on unseen Regular, Test on unseen Recommissioned ---
all_reg_constant_s7 = list(all_battery_ids_by_regime.get('regular_constant', [])); np.random.shuffle(all_reg_constant_s7)
all_reg_variable_s7 = list(all_battery_ids_by_regime.get('regular_variable', [])); np.random.shuffle(all_reg_variable_s7)
all_recomm_2stage_s7 = list(all_battery_ids_by_regime.get('recommissioned_two_stage', [])); np.random.shuffle(all_recomm_2stage_s7)
all_recomm_3stage_s7 = list(all_battery_ids_by_regime.get('recommissioned_three_stage', [])); np.random.shuffle(all_recomm_3stage_s7)

# Define number of batteries for specific test sets (ensure they are small enough to leave data for train/val)
n_test_reg_c = min(1, len(all_reg_constant_s7) // 3) if len(all_reg_constant_s7) >=3 else (1 if len(all_reg_constant_s7) == 2 else 0)
n_test_reg_v = min(1, len(all_reg_variable_s7) // 3) if len(all_reg_variable_s7) >=3 else (1 if len(all_reg_variable_s7) == 2 else 0)
n_test_recomm_2s = min(1, len(all_recomm_2stage_s7) // 3) if len(all_recomm_2stage_s7) >=3 else (1 if len(all_recomm_2stage_s7) == 2 else 0)
n_test_recomm_3s = min(1, len(all_recomm_3stage_s7) // 3) if len(all_recomm_3stage_s7) >=3 else (1 if len(all_recomm_3stage_s7) == 2 else 0)


final_test_regular_batteries_s7 = all_reg_constant_s7[:n_test_reg_c] + all_reg_variable_s7[:n_test_reg_v]
final_test_recomm_batteries_s7 = all_recomm_2stage_s7[:n_test_recomm_2s] + all_recomm_3stage_s7[:n_test_recomm_3s]

pool_rc_s7 = all_reg_constant_s7[n_test_reg_c:]
pool_rv_s7 = all_reg_variable_s7[n_test_reg_v:]
pool_r2s_s7 = all_recomm_2stage_s7[n_test_recomm_2s:]
pool_r3s_s7 = all_recomm_3stage_s7[n_test_recomm_3s:]

training_validation_pool_s7 = pool_rc_s7 + pool_rv_s7 + pool_r2s_s7 + pool_r3s_s7
np.random.shuffle(training_validation_pool_s7)

if len(training_validation_pool_s7) >= 2:
    val_pool_size_s7 = max(1, int(0.20 * len(training_validation_pool_s7)))
    if len(training_validation_pool_s7) - val_pool_size_s7 == 0: # Not enough for distinct train & val
        train_pool_size_s7 = len(training_validation_pool_s7)
        val_combined_batteries_s7 = training_validation_pool_s7 # Use all for val as well
    else:
        train_pool_size_s7 = len(training_validation_pool_s7) - val_pool_size_s7
        val_combined_batteries_s7 = training_validation_pool_s7[train_pool_size_s7:]
        
    train_combined_batteries_s7 = training_validation_pool_s7[:train_pool_size_s7]

    if train_combined_batteries_s7 and val_combined_batteries_s7 and (final_test_regular_batteries_s7 or final_test_recomm_batteries_s7):
        experiment_configs.append({
            'name': 'TrainCombined_MultiTest_Tuned',
            'train_battery_ids': train_combined_batteries_s7,
            'val_battery_ids': val_combined_batteries_s7,
            'test_battery_ids_regular': final_test_regular_batteries_s7,
            'test_battery_ids_recomm': final_test_recomm_batteries_s7,
            'test_battery_ids': final_test_regular_batteries_s7 + final_test_recomm_batteries_s7, # Combined for initial pass
            'is_multitest': True
        })
    else:
        print("Warning: Not enough batteries for 'TrainCombined_MultiTest_Tuned' scenario after reserving test sets.")
else:
    print("Warning: Training/validation pool too small for 'TrainCombined_MultiTest_Tuned' scenario.")

print(f"\nDefined {len(experiment_configs)} experiment scenarios for tuning.")
for i, config in enumerate(experiment_configs):
    print(f"  {i+1}. {config['name']}")
    if config.get('is_multitest'):
        print(f"    Train Batts={len(config['train_battery_ids'])}, Val Batts={len(config['val_battery_ids'])}")
        print(f"    Test Batts (Regular)={len(config['test_battery_ids_regular'])}")
        print(f"    Test Batts (Recomm)={len(config['test_battery_ids_recomm'])}")
        print(f"    Test Batts (Overall Combined for main run)={len(config['test_battery_ids'])}")
    else:
        print(f"    Train Batts={len(config['train_battery_ids'])}, Val Batts={len(config['val_battery_ids'])}, Test Batts={len(config.get('test_battery_ids', 'N/A'))}")
    # Sanity checks for overlaps
    if 'test_battery_ids' in config and not set(config['train_battery_ids']).isdisjoint(set(config['test_battery_ids'])):
         if not (len(config['test_battery_ids']) == 1 and config['test_battery_ids'][0] in config['train_battery_ids'] and len(config['train_battery_ids'])==1) : # allow if train=test with 1 battery
            print(f"    WARNING: Overlap between train and general test battery IDs for {config['name']}")
    if 'val_battery_ids' in config and not set(config['train_battery_ids']).isdisjoint(set(config['val_battery_ids'])):
        if not (config['val_battery_ids'] == config['train_battery_ids']): # Allow val==train
            print(f"    WARNING: Overlap between train and val battery IDs for {config['name']} (and val is not identical to train)")


Defined 7 experiment scenarios for tuning.
  1. IntraRegime_RC_Tuned
    Train Batts=8, Val Batts=1, Test Batts=1
  2. IntraRegime_RV_Tuned
    Train Batts=3, Val Batts=1, Test Batts=1
  3. IntraRegime_R2S_Tuned
    Train Batts=3, Val Batts=1, Test Batts=1
  4. IntraRegime_R3S_Tuned
    Train Batts=1, Val Batts=1, Test Batts=1
  5. CrossRegime_TrainReg_TestRecomm_Tuned
    Train Batts=12, Val Batts=3, Test Batts=8
  6. CrossRegime_TrainRecomm_TestReg_Tuned
    Train Batts=7, Val Batts=1, Test Batts=15
  7. TrainCombined_MultiTest_Tuned
    Train Batts=16, Val Batts=3
    Test Batts (Regular)=2
    Test Batts (Recomm)=2
    Test Batts (Overall Combined for main run)=4


In [6]:
# Cell 6: Run Experiments and Collect Results
all_tuned_experiment_results = {}

for config_idx, config in enumerate(experiment_configs):
    print(f"\nStarting Experiment {config_idx + 1}/{len(experiment_configs)}: {config['name']}")
    train_ids = list(config['train_battery_ids'])
    val_ids = list(config['val_battery_ids'])
    # Use the general 'test_battery_ids' for the main run (could be combined for multitest)
    test_ids_main_run = list(config.get('test_battery_ids', []))

    if not train_ids:
        print(f"Skipping experiment {config['name']} due to empty training battery ID list.")
        all_tuned_experiment_results[config['name']] = None # Mark as skipped
        continue
    if not val_ids: # Ensure val_ids has something, even if it's same as train for tiny sets
        print(f"Warning: Val IDs are empty for {config['name']}, using train IDs for validation.")
        val_ids = train_ids


    # Call the experiment runner
    # It returns: results (dict of metrics for models), trained_models_dict (dict of model_name: best_estimator)
    results_main_run, trained_best_models_dict = run_experiment_with_gridsearch(
        exp_name=config['name'],
        df_full_data=df_master_full,
        train_battery_ids=train_ids,
        val_battery_ids=val_ids,
        test_battery_ids=test_ids_main_run,
        target_col_name=TARGET_COL,
        extra_exclude_cols=EXTRA_EXCLUDE_COLS_FROM_FEATURES,
        models_to_tune=MODELS_TO_TUNE_AND_TRAIN,
        param_grids_dict=PARAM_GRIDS,
        cv_folds_gs=CV_FOLDS,
        scoring_gs=GRIDSEARCH_SCORING,
        save_artifacts=SAVE_MODELS_FLAG
    )

    if results_main_run:
        all_tuned_experiment_results[config['name']] = results_main_run

        # --- Special handling for MultiTest scenario ---
        if config.get('is_multitest'):
            print(f"\n-- Additional Evaluation for MultiTest Experiment: {config['name']} --")
            test_ids_regular_s7 = list(config.get('test_battery_ids_regular', []))
            test_ids_recomm_s7 = list(config.get('test_battery_ids_recomm', []))
            
            exp_output_dir_multi = os.path.join(BASE_OUTPUT_DIR, config['name'])
            imputer_multi, scaler_multi = None, None
            try:
                imputer_multi = joblib.load(os.path.join(exp_output_dir_multi, f"imputer_{config['name']}.joblib"))
                scaler_multi = joblib.load(os.path.join(exp_output_dir_multi, f"scaler_{config['name']}.joblib"))
            except FileNotFoundError:
                print(f"ERROR: Could not load imputer/scaler for {config['name']} for multi-test. Skipping additional evals.")
                continue # Skip additional evals for this config

            # Get original feature names from one of the trained models (assuming they are consistent)
            # This assumes run_experiment_with_gridsearch used train_models.preprocess_data which returned actual_feature_names
            # And that `evaluate_fitted_model` was called with it.
            # For simplicity, we re-derive feature names if needed, or ensure run_experiment_with_gridsearch returns them.
            # Let's re-run preprocess_data on a dummy df from train_ids to get feature names if not easily available
            # This is a bit inefficient but robust for getting names.
            # A better way: ensure run_experiment_with_gridsearch returns the 'actual_feature_names'
            # For now:
            temp_df_for_names = df_master_full[df_master_full['battery_id'].isin(train_ids[:1])].copy() # Use one train battery
            if 'regime' in temp_df_for_names.columns: temp_df_for_names.drop(columns=['regime'], inplace=True)
            cols_to_drop_fn = [col for col in EXTRA_EXCLUDE_COLS_FROM_FEATURES if col in temp_df_for_names.columns]
            if cols_to_drop_fn: temp_df_for_names.drop(columns=cols_to_drop_fn, inplace=True, errors='ignore')
            _, _, _, common_feature_names = train_models.preprocess_data(temp_df_for_names, TARGET_COL)
            if not common_feature_names :
                print("ERROR: Could not derive common feature names for multi-test evaluation. Skipping additional evals.")
                continue


            # Evaluate on REGULAR Test Set for MultiTest
            if test_ids_regular_s7:
                df_test_reg_s7 = df_master_full[df_master_full['battery_id'].isin(test_ids_regular_s7)].copy()
                if 'regime' in df_test_reg_s7.columns: df_test_reg_s7.drop(columns=['regime'], inplace=True)
                if cols_to_drop_fn: df_test_reg_s7.drop(columns=cols_to_drop_fn, inplace=True, errors='ignore')
                
                X_test_reg_s7, y_test_reg_s7, _, _ = train_models.preprocess_data(df_test_reg_s7, TARGET_COL)
                
                if X_test_reg_s7 is not None and not X_test_reg_s7.empty:
                    X_test_reg_imputed_s7 = pd.DataFrame(imputer_multi.transform(X_test_reg_s7), columns=X_test_reg_s7.columns, index=X_test_reg_s7.index)
                    X_test_reg_scaled_s7 = pd.DataFrame(scaler_multi.transform(X_test_reg_imputed_s7), columns=X_test_reg_imputed_s7.columns, index=X_test_reg_imputed_s7.index)

                    for model_name_key, best_model_instance in trained_best_models_dict.items():
                        current_X_test_data = X_test_reg_imputed_s7 if model_name_key not in ['lr', 'gb'] else X_test_reg_scaled_s7
                        print(f"  Model: {model_name_key.upper()} (Evaluating on specific REGULAR Test Set)")
                        metrics_specific_reg = evaluate_fitted_model(best_model_instance, f"{model_name_key}_reg_test", None, None, current_X_test_data, y_test_reg_s7, common_feature_names)
                        if model_name_key not in all_tuned_experiment_results[config['name']]: all_tuned_experiment_results[config['name']][model_name_key] = {}
                        all_tuned_experiment_results[config['name']][model_name_key]['test_rmse_regular'] = metrics_specific_reg.get('test_rmse')
                        all_tuned_experiment_results[config['name']][model_name_key]['test_mae_regular'] = metrics_specific_reg.get('test_mae')
                        all_tuned_experiment_results[config['name']][model_name_key]['test_r2_regular'] = metrics_specific_reg.get('test_r2')
                else:
                    print("  Skipping REGULAR specific test set for MultiTest: No data after preprocessing.")
            
            # Evaluate on RECOMMISSIONED Test Set for MultiTest
            if test_ids_recomm_s7:
                df_test_recomm_s7 = df_master_full[df_master_full['battery_id'].isin(test_ids_recomm_s7)].copy()
                if 'regime' in df_test_recomm_s7.columns: df_test_recomm_s7.drop(columns=['regime'], inplace=True)
                if cols_to_drop_fn: df_test_recomm_s7.drop(columns=cols_to_drop_fn, inplace=True, errors='ignore')

                X_test_recomm_s7, y_test_recomm_s7, _, _ = train_models.preprocess_data(df_test_recomm_s7, TARGET_COL)

                if X_test_recomm_s7 is not None and not X_test_recomm_s7.empty:
                    X_test_recomm_imputed_s7 = pd.DataFrame(imputer_multi.transform(X_test_recomm_s7), columns=X_test_recomm_s7.columns, index=X_test_recomm_s7.index)
                    X_test_recomm_scaled_s7 = pd.DataFrame(scaler_multi.transform(X_test_recomm_imputed_s7), columns=X_test_recomm_imputed_s7.columns, index=X_test_recomm_imputed_s7.index)

                    for model_name_key, best_model_instance in trained_best_models_dict.items():
                        current_X_test_data = X_test_recomm_imputed_s7 if model_name_key not in ['lr', 'gb'] else X_test_recomm_scaled_s7
                        print(f"  Model: {model_name_key.upper()} (Evaluating on specific RECOMMISSIONED Test Set)")
                        metrics_specific_recomm = evaluate_fitted_model(best_model_instance, f"{model_name_key}_recomm_test", None, None, current_X_test_data, y_test_recomm_s7, common_feature_names)
                        if model_name_key not in all_tuned_experiment_results[config['name']]: all_tuned_experiment_results[config['name']][model_name_key] = {}
                        all_tuned_experiment_results[config['name']][model_name_key]['test_rmse_recomm'] = metrics_specific_recomm.get('test_rmse')
                        all_tuned_experiment_results[config['name']][model_name_key]['test_mae_recomm'] = metrics_specific_recomm.get('test_mae')
                        all_tuned_experiment_results[config['name']][model_name_key]['test_r2_recomm'] = metrics_specific_recomm.get('test_r2')
                else:
                    print("  Skipping RECOMMISSIONED specific test set for MultiTest: No data after preprocessing.")
    else:
         all_tuned_experiment_results[config['name']] = None # Mark as having no results if main run failed

    print("-" * 70)

print("\n\n--- All Tuned Experiment Runs Completed (including MultiTest if any) ---")


Starting Experiment 1/7: IntraRegime_RC_Tuned

--- Running Experiment with GridSearchCV: IntraRegime_RC_Tuned ---
Dropped extra feature columns: ['capacity_Ah', 'energy_Wh', 'avg_power_W']
Target variable: SOH_cycle_capacity_%
Number of features selected: 41
Shape of X (features for model): (2082, 41)
Shape of y: (2082,)
Target variable: SOH_cycle_capacity_%
Number of features selected: 41
Shape of X (features for model): (865, 41)
Shape of y: (865,)
Target variable: SOH_cycle_capacity_%
Number of features selected: 41
Shape of X (features for model): (81, 41)
Shape of y: (81,)

-- Tuning RF for experiment IntraRegime_RC_Tuned --
  Using IMPUTED (unscaled) data for RF
  Starting GridSearchCV for RF...
Fitting 3 folds for each of 24 candidates, totalling 72 fits
  Best parameters for RF: {'max_depth': 20, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 50}
Evaluating RF...
  Validation RMSE: 1.9741, MAE: 0.9968, R2: 0.9195
  Test RMSE: 5.6243, MAE: 1.8263, R2: 0.4183

  

In [7]:
# Cell 7: Display Consolidated Results
print("\n--- Consolidated Tuned Results Summary ---")
# First, a text summary
for exp_name_outer, exp_data_outer in all_tuned_experiment_results.items():
    print(f"\nExperiment: {exp_name_outer}")
    if not exp_data_outer:
        print("  No results for this experiment (possibly skipped or failed).")
        continue
    for model_name_outer, metrics_outer in exp_data_outer.items():
        print(f"  Model: {model_name_outer.upper()}")
        val_rmse = metrics_outer.get('val_rmse', float('nan'))
        val_mae = metrics_outer.get('val_mae', float('nan'))
        val_r2 = metrics_outer.get('val_r2', float('nan'))
        test_rmse = metrics_outer.get('test_rmse', float('nan')) # This is on the 'combined' test set for multitest
        test_mae = metrics_outer.get('test_mae', float('nan'))
        test_r2 = metrics_outer.get('test_r2', float('nan'))
        
        print(f"    Validation: RMSE={val_rmse:.4f}, MAE={val_mae:.4f}, R2={val_r2:.4f}")
        print(f"    Test (Overall): RMSE={test_rmse:.4f}, MAE={test_mae:.4f}, R2={test_r2:.4f}")

        # Display specific multi-test results if present
        if metrics_outer.get('test_r2_regular') is not None:
            test_rmse_reg = metrics_outer.get('test_rmse_regular', float('nan'))
            test_mae_reg = metrics_outer.get('test_mae_regular', float('nan'))
            test_r2_reg = metrics_outer.get('test_r2_regular', float('nan'))
            print(f"    Test (REGULAR only): RMSE={test_rmse_reg:.4f}, MAE={test_mae_reg:.4f}, R2={test_r2_reg:.4f}")
        
        if metrics_outer.get('test_r2_recomm') is not None:
            test_rmse_recomm = metrics_outer.get('test_rmse_recomm', float('nan'))
            test_mae_recomm = metrics_outer.get('test_mae_recomm', float('nan'))
            test_r2_recomm = metrics_outer.get('test_r2_recomm', float('nan'))
            print(f"    Test (RECOMM only): RMSE={test_rmse_recomm:.4f}, MAE={test_mae_recomm:.4f}, R2={test_r2_recomm:.4f}")


# DataFrame for easier comparison
results_list_tuned_df = []
for exp_name, exp_data in all_tuned_experiment_results.items():
    if not exp_data: continue # Skip if experiment had no results
    for model_name, metrics in exp_data.items():
        entry = {
            'Experiment': exp_name,
            'Model': model_name.upper(),
            'Val RMSE': metrics.get('val_rmse'), 
            'Val MAE': metrics.get('val_mae'), 
            'Val R2': metrics.get('val_r2'),
            'Test RMSE (Overall)': metrics.get('test_rmse'), 
            'Test MAE (Overall)': metrics.get('test_mae'), 
            'Test R2 (Overall)': metrics.get('test_r2'),
            # Add new multi-test metrics if they exist
            'Test RMSE (Regular)': metrics.get('test_rmse_regular'),
            'Test MAE (Regular)': metrics.get('test_mae_regular'),
            'Test R2 (Regular)': metrics.get('test_r2_regular'),
            'Test RMSE (Recomm)': metrics.get('test_rmse_recomm'),
            'Test MAE (Recomm)': metrics.get('test_mae_recomm'),
            'Test R2 (Recomm)': metrics.get('test_r2_recomm'),
        }
        results_list_tuned_df.append(entry)

if results_list_tuned_df: # Check if list is not empty
    df_results_summary_tuned_final = pd.DataFrame(results_list_tuned_df)
    # Optionally drop columns that are all NaN if you prefer a cleaner table for general viewing
    df_results_summary_tuned_final.dropna(axis=1, how='all', inplace=True)

    print("\n\n--- Tuned Results Summary DataFrame ---")
    print(df_results_summary_tuned_final)

    # Save summary to CSV
    csv_path = os.path.join(BASE_OUTPUT_DIR, "all_tuned_experiments_summary_incl_multitest.csv")
    df_results_summary_tuned_final.to_csv(csv_path, index=False)
    print(f"\nTuned results summary saved to {csv_path}")
else:
    print("\nNo results to create a summary DataFrame.")


--- Consolidated Tuned Results Summary ---

Experiment: IntraRegime_RC_Tuned
  Model: RF
    Validation: RMSE=1.9741, MAE=0.9968, R2=0.9195
    Test (Overall): RMSE=5.6243, MAE=1.8263, R2=0.4183
  Model: GB
    Validation: RMSE=2.2845, MAE=1.2195, R2=0.8922
    Test (Overall): RMSE=4.0013, MAE=1.5292, R2=0.7056
  Model: XGB
    Validation: RMSE=2.2409, MAE=1.4775, R2=0.8963
    Test (Overall): RMSE=4.4920, MAE=1.0706, R2=0.6290
  Model: LR
    Validation: RMSE=1.6283, MAE=0.8780, R2=0.9452
    Test (Overall): RMSE=2.6848, MAE=2.0939, R2=0.8675

Experiment: IntraRegime_RV_Tuned
  Model: RF
    Validation: RMSE=3.9115, MAE=3.1552, R2=0.6876
    Test (Overall): RMSE=2.3880, MAE=1.5275, R2=0.8713
  Model: GB
    Validation: RMSE=4.7544, MAE=2.9563, R2=0.5384
    Test (Overall): RMSE=3.3162, MAE=2.1012, R2=0.7519
  Model: XGB
    Validation: RMSE=2.3535, MAE=2.0228, R2=0.8869
    Test (Overall): RMSE=3.0362, MAE=1.9476, R2=0.7920
  Model: LR
    Validation: RMSE=4.6750, MAE=4.4871, R2=0.55