# Example: Hyperparameter optimization with Optuna

This notebook introduces 3 examples fÃ¼r hyperparameter optimization based on different optimization objectives.
1. Optimizing the Autoencoder reconstruction using the MSE
2. Optimizing the FaultDetector classification performance using the Fbeta score
3. Optimizing the FaultDetector classification performance using the CARE-score

The optimization is done using the [CARE to Compare dataset](https://doi.org/10.5281/zenodo.14958989)

In [None]:
from copy import deepcopy

import optuna as op
import numpy as np
from sklearn.metrics import fbeta_score

from energy_fault_detector import FaultDetector, Config
from energy_fault_detector.evaluation import CAREScore, Care2CompareDataset

In [None]:
data_path = './Care_To_Compare'

## Optimize autoencoder reconstruction

In [None]:
# Our test set (a specific event from the CARE2Compare dataset)
c2c = Care2CompareDataset(data_path)
event_id = 47
train_data, normal_index, _, _ = c2c.load_and_format_event_dataset(event_id=event_id, index_column='time_stamp')

# Model configuration starting point
model_config = Config('c2c_configs/windfarm_C.yaml')
c2c.update_c2c_config(model_config, 'C')

# speed up for testing (select a small part of the dataset)
N = 10000
normal_index = normal_index.iloc[:N]
train_data = train_data.iloc[:N]

# Create an objective - what should be optimized? --> MSE of the reconstruction error
# NOTE:
# you can increase the speed of this part slightly if you do not need to fit the datapreprocessor.
# in that case, you would fit and apply the data preprocessor outside of this function and only fit the autoencoder inside the objective.
def reconstruction_mse(trial: op.Trial) -> float:
    """Samples new hyperparameters. fits a new model and returns the reconstruction error (MSE) of the validation data.

    Args:
        trial: optuna Trial object

    Returns:
        MSE of the reconstruction.
    """
    # Use a fresh config dict per trial
    cfg = deepcopy(model_config.config_dict)

    autoencoder_params = cfg['train']['autoencoder']['params']

    # sample new parameters
    autoencoder_params['batch_size'] = int(trial.suggest_categorical(name='batch_size', choices=[32, 64, 128]))
    autoencoder_params['learning_rate'] = trial.suggest_float(name='learning_rate', low=1e-5, high=0.01, log=True)
    autoencoder_params['decay_rate'] = trial.suggest_float(name='decay_rate', low=0.8, high=0.99)

    # architecture
    autoencoder_params['layers'][0] = trial.suggest_int(name='layers_0', low=100, high=400)
    autoencoder_params['layers'][1] = trial.suggest_int(name='layers_1', low=50, high=100)
    autoencoder_params['code_size'] = trial.suggest_int(name='code_size', low=10, high=30)

    # create a new model using our new configuration and train the model
    model = FaultDetector(Config(config_dict=cfg))
    # For autoencoder optimization, we do not need to fit a threshold
    training_result = model.fit(train_data, normal_index=normal_index, fit_autoencoder_only=True, save_model=False)

    # Calculate the MSE of the reconstruction errors of the validation data - this is minimized
    deviations = training_result.val_recon_error
    score = np.mean((np.square(deviations)))

    return score

In [None]:
study = op.create_study(sampler=op.samplers.TPESampler(), study_name='autoencoder_optimization', direction='minimize')

# if we want to ensure that the first trial is done with the hyperparameters of the configuration, we need to enqueue a trial:
autoencoder_params = model_config.config_dict['train']['autoencoder']['params']
study.enqueue_trial(params={
    'batch_size': autoencoder_params['batch_size'],
    'learning_rate': autoencoder_params['learning_rate'],
    'layers_0': autoencoder_params['layers'][0],
    'layers_1': autoencoder_params['layers'][1],
    'code_size': autoencoder_params['code_size'],
})

# Run optimization for 5 trials
study.optimize(reconstruction_mse, n_trials=5)

In [None]:
study.best_params

In [None]:
# analyze results
study.trials_dataframe()

## Optimize fault detection model - F-beta score

In [None]:
# Our test set (a specific event from the CARE2Compare dataset)
c2c = Care2CompareDataset(data_path)
event_id = 47
train_data, normal_index, test_data, test_normal_index = c2c.load_and_format_event_dataset(event_id=event_id, index_column='time_stamp')

# Create a ground truth for this event
event_info = c2c.event_info_all[c2c.event_info_all['event_id'] == event_id].iloc[0]
ground_truth = CAREScore.create_ground_truth(
    event_label=event_info['event_label'],
    event_start=event_info['event_start'],
    event_end=event_info['event_end'],
    normal_index=test_normal_index
)

In [None]:
# Model configuration starting point
model_config = Config('c2c_configs/windfarm_C.yaml')
c2c.update_c2c_config(model_config, 'C')

# speed up for testing (select a small part of the dataset)
N = 10000
normal_index = normal_index.iloc[:N]
train_data = train_data.iloc[:N]

# helper function to (re)set the scaling step of the DataPreprocessor
def set_scaler_step(cfg: dict, choice: str) -> dict:
    """Update cfg to use the chosen scaler."""
    dp = cfg['train'].setdefault('data_preprocessor', {})
    steps = dp.get('steps')

    # Remove any existing scaler step(s)
    scaler_names = {'standard_scaler', 'minmax_scaler'}
    steps = [s for s in steps if s.get('name') not in scaler_names]
    # Add the chosen scaler step
    if choice == 'minmax':
        steps.append({'name': 'minmax_scaler'})
    else:
        # 'standardize'
        steps.append({'name': 'standard_scaler'})

    dp['steps'] = steps
    return cfg


def f_score(trial: op.Trial) -> float:
    """Returns the F-score of the model (only useful for datasets with anomalies).

    Args:
        trial: optuna Trial object

    Returns:
        Score of the FaultDetector model 
    """
    # Use a fresh config dict per trial
    cfg = deepcopy(model_config.config_dict)

    # Scale choice (new steps mode or legacy fallback)
    scale_choice = trial.suggest_categorical('scale', ['minmax', 'standardize'])
    cfg = set_scaler_step(cfg, scale_choice)

    # Autoencoder params
    autoencoder_params = cfg['train']['autoencoder']['params']
    autoencoder_params['batch_size'] = int(trial.suggest_categorical(name='batch_size', choices=[32, 64, 128]))
    autoencoder_params['learning_rate'] = trial.suggest_float(name='learning_rate', low=1e-5, high=0.01, log=True)
    autoencoder_params['decay_rate'] = trial.suggest_float(name='decay_rate', low=0.8, high=0.99)

    # architecture
    autoencoder_params['layers'][0] = trial.suggest_int(name='layers_0', low=100, high=400)
    autoencoder_params['layers'][1] = trial.suggest_int(name='layers_1', low=50, high=100)
    autoencoder_params['code_size'] = trial.suggest_int(name='code_size', low=10, high=30)

    # create a new model using our new configuration and train the model
    model = FaultDetector(Config(config_dict=cfg))
    _ = model.fit(train_data, normal_index=normal_index, save_models=False)
    predictions = model.predict(test_data)

    return fbeta_score(
        y_true=ground_truth.sort_index(),
        y_pred=predictions.predicted_anomalies.sort_index(),
        beta=0.5
    )

In [None]:
study = op.create_study(sampler=op.samplers.TPESampler(), study_name='ad_optimization', direction='maximize')

# if we want to ensure that the first trial is done with the hyperparameters of the configuration, we need to enqueue a trial:
autoencoder_params = model_config.config_dict['train']['autoencoder']['params']
study.enqueue_trial(params={
    'batch_size': autoencoder_params['batch_size'],
    'learning_rate': autoencoder_params['learning_rate'],
    'layers_0': autoencoder_params['layers'][0],
    'layers_1': autoencoder_params['layers'][1],
    'code_size': autoencoder_params['code_size'],
})

study.optimize(f_score, n_trials=5)

In [None]:
study.trials_dataframe()

## Optimize fault detection model - CARE score
Optimize the CARE Score. Note that this takes a while, as we train a model for each subdataset.

In [None]:
# Our test set - Wind Farm B from the CARE2Compare dataset
c2c = Care2CompareDataset(data_path)
wind_farm = 'B'

# Model configuration starting point
model_config = Config('c2c_configs/windfarm_B.yaml')
c2c.update_c2c_config(model_config, 'B')

# speed up for testing
N = 10000
max_datasets = 15

def care_objective(trial: op.Trial) -> float:
    """Returns the CARE score of the FaultDetector model."""

    # Use a fresh config dict per trial
    cfg = deepcopy(model_config.config_dict)

    autoencoder_params = cfg['train']['autoencoder']['params']
    threshold_params = cfg['train']['threshold_selector']['params']

    autoencoder_params['batch_size'] = int(trial.suggest_categorical(name='batch_size', choices=[32, 64, 128]))
    autoencoder_params['learning_rate'] = trial.suggest_float(name='learning_rate', low=1e-5, high=0.01, log=True)

    # architecture
    autoencoder_params['layers'][0] = trial.suggest_int(name='layers_0', low=20, high=100)
    autoencoder_params['code_size'] = trial.suggest_int(name='code_size', low=5, high=20)

    # threshold
    threshold_params['gamma'] = trial.suggest_float(name='gamma', low=0.05, high=0.3)
    threshold_params['nn_size'] = trial.suggest_int(name='nn_size', low=20, high=50)

    # Create a CAREScore object and train+evaluate each dataset for this wind farm
    care_score = CAREScore(coverage_beta=0.5, eventwise_f_score_beta=0.5, anomaly_detection_method='criticality')
    i = 1
    for x_train, y_train, x_test, y_test, event_id in c2c.iter_formatted_datasets(wind_farm=wind_farm, index_column='time_stamp'):
        print(f"event {i}/{len(c2c.event_info_all[c2c.event_info_all['wind_farm'] == wind_farm])}")
        if N is not None:
            x_train = x_train.iloc[:N]
            x_test = x_test.iloc[:N]
            y_train = y_train.iloc[:N]
            y_test = y_test.iloc[:N]
        
        # create a new model using our new configuration and train the model
        model = FaultDetector(Config(config_dict=cfg))
        _ = model.fit(x_train, normal_index=y_train, save_models=False)
        prediction = model.predict(x_test)
        event_info = c2c.event_info_all[c2c.event_info_all['event_id'] == event_id].iloc[0]
        care_score.evaluate_event(
            event_id=event_id,
            event_start=event_info['event_start'],
            event_end=event_info['event_end'],
            event_label=event_info['event_label'],
            normal_index=y_test,
            predicted_anomalies=prediction.predicted_anomalies,
            ignore_normal_index=False
        )
        i += 1
        if i > max_datasets:
            break

    score = care_score.get_final_score()

    return score

In [None]:
study = op.create_study(sampler=op.samplers.TPESampler(), study_name='care_optimization', direction='maximize')

# Ensure that the first trial is done with the hyperparameters of the provided configuration
autoencoder_params = model_config.config_dict['train']['autoencoder']['params']
threshold_params = model_config.config_dict['train']['threshold_selector']['params']
study.enqueue_trial(params={
    'batch_size': autoencoder_params['batch_size'],
    'learning_rate': autoencoder_params['learning_rate'],
    'layers_0': autoencoder_params['layers'][0],
    'code_size': autoencoder_params['code_size'],
    'gamma': threshold_params['gamma'],
    'nn_size': threshold_params['nn_size'],
})

# Since we loop through many datasets, train many models, we run the garbage collector after each trial
study.optimize(care_objective, n_trials=5, gc_after_trial=True)

In [None]:
study.trials_dataframe()