In [None]:
from sys import path
if '..' not in path:
    path.insert(0, '..')

In [None]:
import _library.som_utils as utils
import _library.som_pre_utils as pre_utils
import _library.fault_utils as fault_utils
import numpy as np
import pandas as pd
import pickle
import csv 
from _library.utils import SYSTEM_NAMES, SUBFOLDERS, load_datasets
from os import path, makedirs
from sklearn.preprocessing import StandardScaler
from IPython.display import Javascript
from scipy.io import loadmat

In [None]:
%cd /mnt/data/vieri/projects/SAMPLE/

# Read the pump dataset

In [None]:
system_name = "Pump sensor"
system_path = path.join("data", system_name)

In [None]:
file_path = path.join(system_path, "sensor.csv")

# Read the csv file
pump_sensor_df = pd.read_csv(file_path, index_col = 0)

# Set the index as a fake 1-hour sampled timeseries (original: 1-minute sampled)
pump_sensor_df.index = pd.date_range(start = '2022-01-01', periods = len(pump_sensor_df), freq = "1H")
pump_sensor_df.drop(columns = ['timestamp'], inplace = True)

# Drop artefacts
pump_sensor_df.drop(['sensor_15','sensor_50'], axis=1, inplace=True)

# Visualize the number of classes
grouped_classes = pump_sensor_df.groupby('machine_status').count()['sensor_00'].to_frame()\
                                              .rename(columns = {'sensor_00': 'Observations'})\
                                              .sort_values(by ='Observations', ascending=False)
display(grouped_classes)
pump_sensor_df.info()
display(pump_sensor_df[pump_sensor_df['machine_status'] == 'BROKEN'])

# Train and test split

In [None]:
df = pump_sensor_df
nominal_behaviour = df[df['machine_status'] == 'NORMAL']
failure_events = df[df['machine_status'] == 'BROKEN']
recovering_status = df[df['machine_status'] == 'RECOVERING']

# Balance the classes
temporal_tolerance = pd.Timedelta(10, unit="days")
failure_idk_obs = []
for timestamp in failure_events.index:
    df_period = df.loc[timestamp - temporal_tolerance:timestamp + temporal_tolerance, :]
    failure_idk_obs.extend(df_period.index.tolist())
#print(failure_obs[0])

#recovering_obs = recovering_status.sample(frac = 0.4, random_state = 99)
#nominal_obs = nominal_behaviour.sample(frac = 0.02, random_state = 99)

# Compute the indexes
idk_test = sorted(failure_idk_obs)  #+ random_recovering_obs.index.tolist() + random_nominal_obs.index.tolist()
idk_train = list(sorted(set(df.index.tolist()) - set(idk_test)))

# Create the two subsets
train_data = df.loc[idk_train,:].iloc[:50000]
test_data =  df.loc[idk_test,:]

# Compute the classes within them
train_classes_counter = train_data.groupby(by = 'machine_status').count()['sensor_00'].to_dict()
test_classes_counter = test_data.groupby(by = 'machine_status').count()['sensor_00'].to_dict()

# Remove the classes
train_classes = train_data['machine_status']
train_data.drop(columns = ['machine_status'], inplace=True)

test_classes = test_data['machine_status']
test_data.drop(columns = ['machine_status'], inplace=True)

# Visualize their dimensions
print("-" * 28 + f"\nTOTAL: {len(df)} obs. (~ {int(len(df)/1000)} K)\n" + "-" * 28 + "\n")
print( "-" * 40 + f"\na) TRAIN SUBSET: {len(train_data)} obs. ({(round((len(train_data)/len(df))*100, 1))} %)\n"  + "-" * 40)
print("\t-->", '\n\t--> '.join([f'CLASS "{class_name}": {counter} obs. ({(round((counter/len(train_data))*100, 1))} %)'
                           for class_name, counter in train_classes_counter.items()]))
print("\n" + "-" * 40 + f"\nb) TEST SUBSTET:  {len(test_data)} obs. ({(round((len(test_data)/len(df))*100, 1))} %)\n"  + "-" * 40)
print("\t-->", '\n\t--> '.join([f'CLASS {class_name}: {counter} obs. ({(round((counter/len(test_data))*100, 1))} %)'
                           for class_name, counter in test_classes_counter.items()]))

# METRICHE: Test
# precision --> 
# 

# Data pre-processing
**SOURCE**: *'Fault Prediction and Early-Detection in Large PV Power Plants
Based on Self-Organizing Maps'* by Alessandro Betti et al. (2021).
- Compute a missing feature (i.e., DC Power)
- *Data Pre-Processing* (Section 2.3)
- *Data Detrending* (Section 2.5)
- Data Scaling* (Section 2.5)

In [None]:
pre_processing_steps = {
    "Compute DC Power": False, 
    "Three-phase average": False,
    "Above Solar Irradiance Minimum": False,
    "Linear regression for AC power outliers": False,
    "Linear regression for AC power outliers (Test set)": False,
    "Data detrending": False,
    "Data standardization": True
}
print("\n".join(["PRE-PROCESSING STEP SELECTED: " + str(step) 
                     for step, flag in pre_processing_steps.items() if flag == True]))

## F) Data Standardization

In [None]:
if pre_processing_steps["Data standardization"]:

    # StandardScaler
    scaler = StandardScaler().fit(train_data)

    # Transform the TRAIN data
    train_data = pd.DataFrame(data = scaler.transform(train_data),
                                        index = train_data.index, 
                                        columns = train_data.columns)
    print("(TRAIN) has been standardized.")

    # Transform the TEST data
    test_data = pd.DataFrame(data = scaler.transform(test_data),
                                       index = test_data.index, 
                                       columns = test_data.columns)
    print(" (TEST) has been standardized (using a fitted StandardScaler).\n")
else:
    print("This pre-processing step (Data standardization) has not been selected.")

## End of the pre-processing steps 
### Example of the train dataset

In [None]:
print("-" * 20 + " TRAIN " + "-" * 20 )
train_data.info()
print("\n" + "-" * 20 + " TEST " + "-" * 20 )
test_data.info()

# Self-organizing map (SOM)

In [None]:
train_data = {'INV1': train_data}
test_data = {'INV1': test_data}

In [None]:
grid_search = False

## Pre trained 

In [None]:
pre_trained = False

In [None]:
pre_steps = "_1hour_averaged_fullReg_detrended" 
if system_name == "Soleto 1":
    trained_version = {
        "INV1": "20grid_3Kepoch_0.001lr_10sigma_gaussianFunc" + pre_steps,
        "INV2": "14grid_10Kepoch_0.001lr_9sigma_gaussianFunc" + pre_steps,
        "INV3": "12grid_3Kepoch_0.001lr_7sigma_gaussianFunc" + pre_steps,
        "INV4": "22grid_2Kepoch_0.01lr_8sigma_gaussianFunc" + pre_steps
    }
elif system_name == "Pump sensor":
      trained_version = {
        "INV1": "20grid_3Kepoch_0.001lr_10sigma_gaussianFunc" + pre_steps,
    }

In [None]:
display(trained_version) if pre_trained else print(f"PRE TRAINED SOM: {pre_trained}")

### BEST AVERAGED SOMs
- **Soleto 1**: *18grid_4Kepoch_0.001lr_9sigma_gaussianFunc*
    - (INV1:TOP2) --> *18grid_4Kepoch_0.001lr_8sigma_gaussianFunc*
- **Soleto 2**: *16grid_4Kepoch_0.001lr_10sigma_gaussianFunc* 
- **Galatina**: *26grid_10Kepoch_0.01lr_10sigma_gaussianFunc*
- **All PV Systems**: *16grid_5Kepoch_0.001lr_9sigma_gaussianFunc*

## Create a saving folder

In [None]:
# -------- MAIN FOLDER ----------------
saving_folder_name = "SOMs"
# -------- SUB FOLDERS ----------------
saving_graph_folder_name = "Graphs"
saving_som_folder_name = "Trained SOM"
saving_kpi_folder_name = "KPI scores"
saving_warnings_folder_name = "Warnings"
saving_metrics_folder_name = "Metrics"
saving_params_folder_name = "Params"

In [None]:
# Main saving folder
saving_folder_path = path.join(system_path, saving_folder_name)

# Create the saving folder
if not path.exists(saving_folder_path):
    makedirs(saving_folder_path) 
    print(f"PV System --> {system_name.upper()}\nA new saving folder has been created: {saving_folder_path}\n")

# Create the subfolders
subfolders = [saving_graph_folder_name, saving_som_folder_name, saving_kpi_folder_name, saving_warnings_folder_name, 
              saving_metrics_folder_name]

for subfolder in subfolders + [saving_params_folder_name]:
    subfolder_path = path.join(saving_folder_path, subfolder)
    if not path.exists(subfolder_path):
        makedirs(subfolder_path)
        print(f"{system_name} --> Folder '{subfolder}' has been created!")

## Hyperparameters

In [None]:
map_topology = 'hexagonal'
activation_distance = 'euclidean'

In [None]:
som_config = {
    "INV1": {
        "dim_grid": 20,
        "epoch": 10 * (10**3),
        "learning_rate": 0.001,
        "sigma": 2,
        "neighborhood_function": "gaussian"  
    }
}

## Training phase

In [None]:
train_data_shuffling = True
merge_inv_data = False

In [None]:
inv_names = ['INV1']
dataset_name = 'all data' 

trained_som = dict()
som_version = dict()

for inv_name in inv_names:
    
    # ----------------------------------------
    # LOADED SPECIFIC PRETRAINED VERSION
    tmp_inv_number = 1
    tmp_inv_name = 'INV'+ str(tmp_inv_number)
    # ----------------------------------------

    # Saving path
    file_name = f"trained_som_{tmp_inv_name}.p"
    loading_path = path.join(saving_folder_path, saving_som_folder_name, trained_version[tmp_inv_name], file_name)

    # CASE 1: Load the trained SOM 
    if pre_trained & path.exists(loading_path):      
        with open(loading_path, 'rb') as infile:
            som = pickle.load(infile)
            print(40 * "-", f"\nThe trained SOM has been LOADED!\nVersion [{tmp_inv_name}]: "\
                  f"{trained_version[tmp_inv_name]}\n" + 40 * "-")

        # CREATE THE POTENTIAL MSSING SUBFOLDERS for this SOM configuration
        pretrained_config_params = trained_version[tmp_inv_name]
        pretrained_config_params += "_trainedInv" + str(tmp_inv_number) + "Data"
        trained_som_version = utils.create_somVersion_folders(saving_folder_path, subfolders, dataset_name, 
                                                              pretrained_config_params, merge_inv_data, 
                                                              pre_processing_steps)
    else:
        # CASE 2: Train a SOM

        # 0) Merging the inveretr data in case the flag has been set to true
        if merge_inv_data:
            all_data = [train_data[inv_name] for inv_name in inv_names]
            input_data = pd.concat(all_data).sort_index()
            print(f"The inverter data ({len(inv_names)}) has been merged ({len(input_data)} obs.)")
        else: 
            input_data = train_data[inv_name]

        # 0) Create a matrix for the training data
        print("\n" + 50 * "-" + "\nCreating a data matrix for the training data...\n" + 50 * "-")
        train_matrix, train_cols, train_timestamps = utils.to_num_matrix(input_data, stat_nan_values = "last_valid_obs")

        # CASE 2.1: Carry out a GRID SEARCH function to find out the optimal hyperparameters
        if grid_search:
            neighborhood_functions = ["gaussian"]

            # Grid of values (1540 configs: 14 epoches * 11 grid dims * 10 sigma values)
            epoch_values = list(range(500, 10000, 1000)) + list(range(10000, 60000, 10000)) 
            dim_grid_values = np.arange(4, 32, step = 2)
            sigma_values = np.arange(1, 11, step = 1, dtype = np.int32)
            learning_rate_values = [0.01, 0.001]

            # 2.1.0) Create a base saving folder 
            path_folder = path.join(saving_folder_path, saving_som_folder_name)

            # 2.1.1) TEST DATA: Create a numerical matrix  for the test data
            print("\n" + 50 * "-" + "\nCreating a data matrix for the test data...\n" + 50 * "-")
            test_matrix, test_cols, test_timestamps = utils.to_num_matrix(test_data[inv_name], stat_nan_values = "last_valid_obs")

            # Create a compact version of the parameters
            params = [epoch_values, dim_grid_values, learning_rate_values, sigma_values, neighborhood_functions]
            inv_test_obs_to_ignore = None 

            # Create the string to save the configuration of the grid search (VAR: dataset type, regression on the test set)
            config_type = dataset_name.replace("-", "").replace(" ", "_")[:-9]
            if pre_processing_steps["Linear regression for AC power outliers"]:
                if pre_processing_steps["Linear regression for AC power outliers (Test set)"]:
                    config_type += "_" + "fullReg"
                else:
                    config_type += "_" + "reg"
            if pre_processing_steps["Data detrending"]:
                config_type += "_" + "detrended"
            
            fault_df = failure_events['machine_status']#.to_dict()  

            # 2.1.2) Start the Grid search to find the optimal parameters
            best_som, best_config = utils.grid_search(inv_name, train_matrix, train_timestamps, test_matrix, 
                                                      test_timestamps, inv_test_obs_to_ignore, params,
                                                      map_topology, activation_distance, path_folder, fault_df, config_type,
                                                      shuffling_flag = train_data_shuffling, 
                                                      verbose = True)
            trained_som_version = best_config
            som = best_som
        else:
            # CASE 2.2: Train the SOM with the "static" hyperparameters
            params = som_config[inv_name]
            som,  quantization_error, weights = utils.train_som(train_matrix, params["dim_grid"], params["epoch"], 
                                                                params["learning_rate"], params["sigma"], 
                                                                map_topology, params["neighborhood_function"], 
                                                                activation_distance, 
                                                                shuffling_flag = train_data_shuffling, 
                                                                verbose = False)

            # CREATE THE SUBFOLDERS for this SOM configuration
            #pre_processing_steps["extra_param"] = "run5_shuffling_noSeed"
            trained_som_version = utils.create_somVersion_folders(saving_folder_path, subfolders, dataset_name, params,
                                                                  merge_inv_data, pre_processing_steps)

        # CASE 2.2.A) Save the pretrained SOM as a file
        if merge_inv_data:
            file_name = f"trained_som_mergedInvData.p"
        else:
            file_name = f"trained_som_{inv_name}.p"
        saving_path = path.join(saving_folder_path, saving_som_folder_name, trained_som_version, file_name)
        with open(saving_path, 'wb') as outfile:
            pickle.dump(som, outfile)
            print("\n"+ 120*"-" + f"\n\tThe trained SOM has been SAVED as '{trained_som_version}'.\n" + 120*"-")

        # CASE 2.2.B) Save also the second best SOM [ONLY FOR GRID SEARCH]
        #if grid_search and (second_best_config is not None):
            #saving_path = path.join(saving_folder_path, saving_som_folder_name, second_best_config, file_name)
            #with open(saving_path, 'wb') as outfile:
                # pickle.dump(second_best_som, outfile)
                # print("\n"+ 100*"-" + f"\n\t\t\tThe second best trained SOM has been SAVED as "\
                      #f"'{second_best_config}'.\n" + 100*"-")

    # Save the trained SOM for each inverter
    trained_som[inv_name] = som
    som_version[inv_name] = trained_som_version

## Computing the KPI scores

## Select period of the sliding window [hours]
'None' for computing a *daily KPI*

In [None]:
sliding_window = 24

## Computing the KPI scores

In [None]:
load_kpi_scores = False #pre_trained

In [None]:
kpi_scores_inv = dict()
thresholds_inv = dict()
for inv_name in inv_names:
    print(40 * "-", inv_name, 40 * "-")
    
    # Load the trained SOM for this inverter
    som = trained_som[inv_name]
    
    train_df = train_data[inv_name]
    test_df = test_data[inv_name]
    
    # Saving paths
    kpi_file_name = f"KPI_scores_{inv_name}.csv"
    threshold_file_name = f"thresholds_{inv_name}.csv"
    loading_kpi_path = path.join(saving_folder_path, saving_kpi_folder_name, trained_version[inv_name], kpi_file_name)
    loading_threshold_path = path.join(saving_folder_path, saving_kpi_folder_name, trained_version[inv_name], 
                                       threshold_file_name)
    
    # Load the KPI scores in the case they have already been computed previously
    if load_kpi_scores & path.exists(loading_kpi_path):
        print(50 * "-" + f"\nLoading the pre-computed KPI scores/thresholds..."\
              f"\nVersion: {trained_version[inv_name]}\n" + 50 * "-")
        
        # Load thresholds
        thresholds_df = pd.read_csv(loading_threshold_path)
        threshold1 = thresholds_df.iloc[0, 1]
        threshold2 = thresholds_df.iloc[1, 1]
        thresholds_inv[inv_name] = (threshold1, threshold2)
        print(f"FORMULA: {thresholds_df.iloc[0, 0]} --> THRESHOLD 1: {round(threshold1, 4)}")
        print(f"FORMULA: {thresholds_df.iloc[1, 0]} --> THRESHOLD 2: {round(threshold2, 4)}")

        # Load the kpi score
        kpi_scores_inv[inv_name] = pd.read_csv(loading_kpi_path, index_col=[0]) 
    else:
        # TRAIN DATA: Create a numerical matrix
        print("\n" + 50 * "-" + "\nCreating a data matrix for the training data...\n" + 50 * "-")
        train_matrix, train_cols, train_timestamps = utils.to_num_matrix(train_df, stat_nan_values = "last_valid_obs")

        # TEST DATA: Create a numerical matrix 
        print("\n" + 50 * "-" + "\nCreating a data matrix for the test data...\n" + 50 * "-")
        test_matrix, test_cols, test_timestamps = utils.to_num_matrix(test_df, stat_nan_values = "last_valid_obs")
    
        # Computing the KPI scores 
        std_multipliers = [0.5, 1]
        graphs_saving_folder = path.join(saving_folder_path, saving_graph_folder_name, som_version[inv_name]) 
        kpi_scores, thresholds = utils.compute_kpi_scores(som, inv_name, train_matrix, train_timestamps, 
                                                          test_matrix, test_timestamps, 
                                                          std_multipliers, sliding_window, 
                                                          graphs_saving_folder, visualize_graphs = True)
        thresholds_inv[inv_name] = thresholds
        kpi_scores_inv[inv_name] = kpi_scores
        
        # A) Save the thresholds as a CSV file
        saving_threhsold_path = path.join(saving_folder_path, saving_kpi_folder_name, som_version[inv_name], threshold_file_name)
        with open(saving_threhsold_path, 'w+', encoding="utf-8") as csv_file:
            write = csv.writer(csv_file)
            write.writerow(["Formula", "Threshold value"])
            write.writerows([
                    [f"(μ - {std_multipliers[0]}σ)", thresholds[0]],
                    [f"(μ - {std_multipliers[1]}σ)", thresholds[1]]
                ])
           
        # B) Save the KPI scores as a CSV file
        saving_kpi_path = path.join(saving_folder_path, saving_kpi_folder_name, som_version[inv_name], kpi_file_name)
        kpi_scores_inv[inv_name].to_csv(saving_kpi_path)
        
    # Visualize the KPI scores for this inverter
    display(kpi_scores_inv[inv_name])
    
# Visualize all the thresholds  
print(50 * "-" + "\n\t\t    THERSHOLDS\n" + 50 * "-")
display(thresholds_inv)

# Warnings 

## Definition
1. **Numerical thresholds**;
2. **Derivative**: Consider only KPI scores thaving a degradation behaviour (i.e., negative derivative);
3. **Persistence**: Increase (+1) the *warning level* in case the KPI score persists for more than once timestamp.

## Create the warnings with their relative warning levels [1:4]

In [None]:
inv_warnings = dict()
for inv_name in inv_names:
    print(50 * "-", inv_name, 50 * "-")
    
    # Retrieve potential test observations that should be ignore
    inv_obs_to_ignore = None 
   
    # Retrieve the warnings according to thier KPI scores
    inv_warnings[inv_name] = utils.create_warning(kpi_scores_inv[inv_name], thresholds_inv[inv_name], inv_obs_to_ignore)
    display(inv_warnings[inv_name])
    
    # Save them in a CSV file
    warnings_file_name = f"{inv_name}_warnings.csv"
    if pre_trained:
        warning_subfolder = trained_version[inv_name]
    else:
        warning_subfolder = som_version[inv_name]
    inv_warnings[inv_name].to_csv(path.join(saving_folder_path, saving_warnings_folder_name, warning_subfolder, 
                                            warnings_file_name))
    print(f"The ({len(inv_warnings[inv_name])}) warnings have been saved in a CSV file.\n")

## Compute the correct and wrong predictions (TP, TN, FP, FN)

### Create different fault profiles to compute the metrics with different granulairty

In [None]:
prediction_windows = [1, 2, 3, 4, 5, 6, 7]
warning_levels = [4, 3, 2, 1]

### Compute the metrics

In [None]:
warning_metrics = dict()
for inv_name in inv_names:
    
    timestamps = kpi_scores_inv[inv_name].index.tolist()
    warnings = inv_warnings[inv_name]
    
    # Initialize the dictionary
    warning_metrics[inv_name] = []
    
    # Compute the metrics for each fault profile
    for warning_level in warning_levels:
        for prediction_window in prediction_windows:
            print(f"\nPREDICTION WINDOW: - {prediction_window} days || WARNING LEVELS: >= {warning_level}")
            
            # Select the warnings within the warning level selected
            selected_warnings = warnings[warnings["Warning level"] >= warning_level]
            
            # List of failure events
            failure_events_list = failure_events['machine_status'].to_dict()  
            display(failure_events_list)
            
            # Compute the metrics
            config_metrics = utils.compute_correct_wrong_predictions(timestamps, failure_events_list, selected_warnings.index, 
                                                                     prediction_window, verbose=True)
            
            # Save the outcomes
            warning_metrics[inv_name].append({
                    "fault_profile" : "All",
                    "faults": failure_events_list.keys(),
                    "warning_levels": warning_level,
                    "prediction_window": prediction_window,
                    "fault_warnings": selected_warnings.index,
                    "first_warning": selected_warnings.index[0],
                    "metrics": list(zip(['TP', 'TN', 'FP', 'FN'], config_metrics)),
                })

    all_metrics = pd.DataFrame(warning_metrics[inv_name])
    print(50 * "-" +  f" {inv_name}: METRICS " + 50 * "-")
    display(all_metrics)
    print(80 * "-")          

## Compute the overall metrics
1. ** F1-Score**
2. **Recall/Hit rate** (correct positive prevision / Retrieve true positive cases)
3. **Miss Rate** (a.k.a., FNR: False Negative Rate = FN/TP+FN)
4. **Fall-out** (a.k.a., FPR: False Positive Rate = FP/TN + FP)

In [None]:
pre_trained = False # ONLY FOR TESTING MIX PRETRAINED SOM

In [None]:
for inv_name in inv_names:
    print("\n" + 120 * "-" + f"\n\t\t\t\t\t{inv_name}: Analysis of its warning and fault events\n" + 120 * "-")
    
    all_metrics = warning_metrics[inv_name]
    
    # Compute the metrics for each configuration
    performances = []
    for idk, config in enumerate(all_metrics):
        print(40 * "-" + f" CONFIGURATION {idk + 1}/{len(all_metrics)} " + 30 * "-" )
        print(f"WARNING LEVELS: <= {config['warning_levels']}\nPREDICTION WINDOW: - {config['prediction_window']} day(s)")
        print(f"FAULT PROFILE: {config['fault_profile']}\n"+ 80 * "-")
        print(80 * "-")
        
        # Retrieve the values of TP, TN, FP, FN
        config_metrics = [value for name, value in config["metrics"]]
        true_positive, true_negative, false_positive, false_negative = config_metrics
        
        # Compute the metrics (i.e., recall, ...)
        recall, miss_rate, fall_out, precision, f1_score = utils.compute_metrics(true_positive, false_positive, 
                                                                                 false_negative, true_negative, 
                                                                                 verbose = True)
        # Create a Pandas Series for this metrics
        data = (config["fault_profile"], len(config['faults']), config['faults'], 
                config['warning_levels'], config['prediction_window'], 
                config["first_warning"], config["fault_warnings"],
                f1_score, recall, miss_rate, fall_out, precision)
        
        column_names = ["Fault Profile", "Num Faults", "Faults", "Warning levels (>=)" , "Prediction Window (days)",
                        "First Fault Warning", "Fault warnings", 
                        "F1 score", "Recall", "Miss rate", "Fall out", "Precision"]
        performances.append(pd.Series(data, index = column_names))
     
    # Create a Pandas Dataframe for the performance of all the fault profiles
    metrics_df = pd.DataFrame(performances)
    print("\n"+ "-"*40 + f" SOM PERFORMANCE: {inv_name} " + 40 * "-")
    display(metrics_df)
    
    # Save the metrics
    if pre_trained:
        subfolder = trained_version[inv_name] + pretrained_config_params
    else:
        subfolder = som_version[inv_name]
        
    file_name = f"{inv_name}_performance.csv"
    file_path = path.join(saving_folder_path, saving_metrics_folder_name, subfolder)
    metrics_df.to_csv(path.join(file_path, file_name))
    print(f"The perfomances for all the combinations (i.e., fault profiles, warning levels, time windows) "\
          f"\nhave been saved in '{saving_metrics_folder_name}/{subfolder}'")

In [None]:
cond_warning = metrics_df['Warning levels (>=)'] == 1
cond_days = metrics_df['Prediction Window (days)'] == 7

display(metrics_df[cond_warning & cond_days])
                  
# BEST (f1-score): 61%