In [1]:
from sys import path
if '..' not in path:
    path.insert(0, '..')

In [2]:
import _library.som_outcome_utils as som_outcome_utils
from _library.fault_utils import load_priorities
import _library.utils as utils
import sys
import numpy as np
import pandas as pd
from os import path, makedirs
from collections import defaultdict, Counter
from tabulate import tabulate

In [3]:
%cd /mnt/data/vieri/projects/SAMPLE/

/mnt/data/vieri/projects/SAMPLE


# The PV systems

In [4]:
print(utils.SYSTEM_NAMES, "-->", utils.SUBFOLDERS)
# --- 0 ---------- 1 ---------- 2 --------- 3 ---------- 4 -------- (for ALL) ------- (For SOL 1, SOL 2 & GALATINA) ---------

['Binetto 1', 'Binetto 2', 'Soleto 1', 'Soleto 2', 'Galatina'] --> ['Cleaned', '1-hour sampling', '1-hour averaged sampling', 'Residuals', 'Residuals_analytical', 'Failure events', None]


# Select the PV systems and its path

In [5]:
system_name = utils.SYSTEM_NAMES[4]
system_path = path.join("data", system_name.upper(), system_name.upper())
print("PV SYSTEM -->", system_name.upper())

PV SYSTEM --> GALATINA


# A) Visualize findings of the GRID SERCH
**PRE-PROCESSING STEPS**: 1hour_averaged_fullLinReg_detrended

## A.1) Select the version of the grid serch

In [6]:
file_versions = [
    # [0] -- FINAL GRID SEARCH - with shuffling
    "1hour_averaged_fullReg_detrended",
    # [1] -- AutoEncoder
    "L2_N16", 
    # [1] -- FINAL GRID SEARCH - with shuffling
    "1hour_averaged_fullReg_detrended_Shuffling",
    # [2] -- FINAL GRID SEARCH - without shuffling
    "1hour_averaged_fullReg_detrended_noShuffle",
    # [3] -- OLD outcomes
    "old", 
    # [5:7]-------------- Test hyperparameter behaviours ------------------------
    "1hour_averaged_reg_FixedEpoch", "1hour_averaged_sigma_reg", "1hour_averaged_dimGrid_reg" 
]
file_version = file_versions[0]
print(f"VERSION: {file_version}")

VERSION: 1hour_averaged_fullReg_detrended


## A.2 ) Load the CSV file

In [7]:
dataset_type = "folds"

In [8]:
variable_to_sort_out = "f1"

In [9]:
folder_path = path.join(system_path, "SOMs", "Trained SOM")

inv_som_performance = dict()
for inv_number in [1, 2, 3, 4]:
    
    # Paths and file name
    file_name = f"INV{inv_number}_" + f"som_{dataset_type}_performance_" + file_version + "_ALT" + ".txt"
    file_path = path.join(folder_path, file_name)
    
    if not path.exists(file_path):
        print(f"\n[INV {inv_number}] ISSUE: File not found. Sorry :/")
        continue

    # Read and save the som perfomance of each inverter
    perfomance_list = som_outcome_utils.read_som_perfomance_file(file_path, variable_to_sort_out)
    som_perfomance, best_configurations, best_score_performance = perfomance_list
    
    inv_som_performance[f"INV{inv_number}"] = {
        "perfomance": som_perfomance, 
        "best_configs": best_configurations, 
        "best_scores": best_score_performance
    }
    
    print(f"[INV {inv_number}] The SOM perfomance has been loaded: '{file_name}'")

[INV 1] The SOM perfomance has been loaded: 'INV1_som_folds_performance_1hour_averaged_fullReg_detrended_ALT.txt'
[INV 2] The SOM perfomance has been loaded: 'INV2_som_folds_performance_1hour_averaged_fullReg_detrended_ALT.txt'
[INV 3] The SOM perfomance has been loaded: 'INV3_som_folds_performance_1hour_averaged_fullReg_detrended_ALT.txt'
[INV 4] The SOM perfomance has been loaded: 'INV4_som_folds_performance_1hour_averaged_fullReg_detrended_ALT.txt'


## A.3) Create the saving folder 

In [10]:
save_to_file = {"view1": True, "view2": True}

In [11]:
saving_folder_path = path.join(system_path, "SOMs", "Hyperparameter behaviour", file_version)
log_folder_name = "Best Configs"
    
# Create the sub-folder
if save_to_file["view1"]:     
    log_folder_path = path.join(saving_folder_path, log_folder_name)
    
    if not path.exists(log_folder_path):
        makedirs(log_folder_path)
    
    print(f"The findings will be saved in a txt file...\n(folder: {log_folder_path}).")
else:
    print("Enjoy the awesome findings here :)")

The findings will be saved in a txt file...
(folder: data/GALATINA/GALATINA/SOMs/Hyperparameter behaviour/1hour_averaged_fullReg_detrended/Best Configs).


### A.4) [VIEW 1] Visualize the perfomance

In [12]:
inv_to_visualize = -1

In [13]:
top_config_to_visualize = -1

In [14]:
console_stdout = sys.stdout 

# Visualize the findings
for inv_name, som_perfomance in inv_som_performance.items():
    if inv_to_visualize != -1:
        if inv_name != f"INV{inv_to_visualize}":
            continue
            
    # Retrieve the ordered list of the configs       
    inv_best_configs = som_perfomance["best_configs"]
    best_quantiz, best_f1, best_recall, best_precision = som_perfomance["best_scores"]
    
    # LOG FILE (txt file) 
    if save_to_file["view1"]: 
        log_file_name = f"{inv_name}_top_som_{dataset_type}_performance"+ ".txt"
        log_file_path = path.join(log_folder_path, log_file_name)
        log_file = open(log_file_path,  mode = "w+")
        
        print(f"[{inv_name}] The findings will be saved in a txt file (i.e., ./{log_folder_name}/{log_file_name})")
        sys.stdout = log_file
    
    print("-" * 15 + f" [{system_name}] ({inv_name}) "\
          f"{'TOP' + top_config_to_visualize if top_config_to_visualize != -1 else 'ALL'} "\
          f"CONFIGURATIONS (Version: {file_version}) " + "-" * 15, "\n")
    
    if top_config_to_visualize != -1:
        inv_best_configs[:top_config_to_visualize]
    
    for idk, (config, metrics) in enumerate(inv_best_configs):
        print(f"TOP {idk + 1}: " + config + "\n" + "-" * 70)

        # Retrieve all the metrics 
        quantization_error, f1_score, recall, precision, fold_f1_scores, comp_times = metrics
        
        # Compute the deltas from the best scores
        delta_quantization = (quantization_error - best_quantiz)
        delta_f1 = (f1_score - best_f1) * 100
        delta_recall = (recall - best_recall) * 100
        delta_precision = (precision - best_precision) * 100
        
        # Visualize the perfomance
        print(f"--> F1 score: {f1_score} ({', '.join([str(score) for idk, score in enumerate(fold_f1_scores)])})"\
              f" {'--> delta from the best config: ' + str(round(delta_f1, 2)) +' %' if abs(delta_f1) > 0 else ''}")
        print(f"--> RECALL: {recall} {'--> delta from the best config: ' + str(round(delta_recall, 2)) +' %' if abs(delta_recall) > 0 else ''}")
        print(f"--> PRECISION: {precision} "\
              f"{'--> delta from the best config: ' + str(round(delta_precision, 2)) +' %' if abs(delta_precision) > 0 else ''}")
        print(f"--> QUANTIZATION ERROR: {quantization_error} "\
              f"{'--> delta from the best config: ' + str(round(delta_quantization, 4)) if abs(delta_quantization) > 0 else ''}")
        print(f"--> {comp_times[0]} --> {comp_times[1]}\n")

    if save_to_file["view1"]:
        sys.stdout.close()
        sys.stdout = console_stdout

[INV1] The findings will be saved in a txt file (i.e., ./Best Configs/INV1_top_som_folds_performance.txt)
[INV2] The findings will be saved in a txt file (i.e., ./Best Configs/INV2_top_som_folds_performance.txt)
[INV3] The findings will be saved in a txt file (i.e., ./Best Configs/INV3_top_som_folds_performance.txt)
[INV4] The findings will be saved in a txt file (i.e., ./Best Configs/INV4_top_som_folds_performance.txt)


## A.5) Weighted perfomance: Find the best SOM configuration across all the inverters

### A.5.1) Set the threshold for selecting the best som configurations
1. **[0:1]**: Consider only the configuration included in the 'top k % ' 
2. **(-1)**: Consider all the configurations 

In [15]:
threshold_top_configs = [-1, 0.3]

### A.5.2) Merge and weigh all the som configuration

In [16]:
list_perfomance = dict()

for threshold in threshold_top_configs:
    list_perfomance[threshold] = som_outcome_utils.merge_and_weigh_som_perfomance(inv_som_performance, threshold)

[INV1] Reading and merging som perfomance  (i.e., 1540 configs.)
[INV2] Reading and merging som perfomance  (i.e., 1540 configs.)
[INV3] Reading and merging som perfomance  (i.e., 1540 configs.)
[INV4] Reading and merging som perfomance  (i.e., 1540 configs.)

Consider only the configuration included in the 'top 30%
--------------------------------------------------------------------------------
[INV1] Reading and merging som perfomance with threshold 30% (i.e., 1540 configs.)
[INV2] Reading and merging som perfomance with threshold 30% (i.e., 1540 configs.)
[INV3] Reading and merging som perfomance with threshold 30% (i.e., 1540 configs.)
[INV4] Reading and merging som perfomance with threshold 30% (i.e., 1540 configs.)


### A.5.3) Compute an avereage score between all the f1 score retrieved from the inverters

In [17]:
for threshold in threshold_top_configs:
    weighted_som_perfomance = list_perfomance[threshold]
    list_perfomance[threshold] = som_outcome_utils.compute_average_scores(weighted_som_perfomance, threshold)

### A.5.4) [View 2] Visualize the weighted som configurations 

In [18]:
for threshold in threshold_top_configs:
    weighted_som_perfomance = list_perfomance[threshold]
    
    # Create the sub-folder
    allPerfomance_folder_path = path.join(saving_folder_path, log_folder_name, "Weighted perfomance")
    if not path.exists(allPerfomance_folder_path):
        makedirs(allPerfomance_folder_path)
        
    labels = ["INV1", "INV2", "INV3", "INV4"]
    som_outcome_utils.visualize_weighted_som_configurations(weighted_som_perfomance, threshold, system_name, dataset_type,
                                                            labels, file_version, allPerfomance_folder_path,
                                                            save_to_file["view2"])

[Galatina] The findings will be saved in a txt file (i.e., weighted_som_folds_performance.txt)
[Galatina] The findings will be saved in a txt file (i.e., weighted_som_folds_performance_top30%.txt)


## A.5bis) Weighted perfomance: Find the best SOM configuration across all the PV systems [View 2.bis]

In [19]:
system_to_visualize = utils.SYSTEM_NAMES[2:5]

In [20]:
for threshold in threshold_top_configs:
    weighted_general_som_perfomance = defaultdict(list)
    labels = []
    
    print(f"VERSION: SELECTING '{'THE TOP: '+ str(threshold*100) + '%' if threshold > 0 else 'ALL'} CONFIGS'")
    for pv_system_name in system_to_visualize:
        print("\n" + "-" * 30 + f" PV SYSTEM: {pv_system_name} " + "-" * 30 )

        # 0) FOLDERS
        pv_system_folder = path.join("data", pv_system_name.upper(), pv_system_name.upper())
        log_file_folder = path.join(pv_system_folder, "SOMs", "Trained SOM")

        # 1) READ THE CSV FILES of all the PV system
        print("-" * 20 + " (A.1) LOADING THE CSV FILES " +"-" * 20)
        pv_inv_som_performance = dict()
        for inv_number in [1, 2, 3, 4]:
            
            # Paths and file name
            perfomance_file_name = f"INV{inv_number}_" + f"som_{dataset_type}_performance_" + file_version + ".txt"
            perfomance_file_path = path.join(log_file_folder, perfomance_file_name)

            if not path.exists(perfomance_file_path):
                print(f"\n[INV {inv_number}] ISSUE: File not found. Sorry :/")
                continue

            # Read and save the som perfomance of each inverter
            som_perfomance, best_configurations, *_ = som_outcome_utils.read_som_perfomance_file(perfomance_file_path, 
                                                                                                 variable_to_sort_out = "f1")
            pv_inv_som_performance[f"INV{inv_number}"] = {"perfomance": som_perfomance, "best_configs": best_configurations}
            print(f"[INV{inv_number}] The SOM perfomance has been loaded")
            
            name_parts = pv_system_name.split(" ")
            if len(name_parts) == 1:
                simplified_sys_name = pv_system_name[:3]
            else:
                simplified_sys_name = name_parts[0][:3] + name_parts[1]
            labels.append(f"{simplified_sys_name}:INV{inv_number}")

        # 2.1) Get the normalized f1 scores for the pv system
        print("\n" + "-" * 20 + " (A.2) Merging inverter perfomance " +"-" * 20)
        weighted_pvSystem_som_perfomance = som_outcome_utils.merge_and_weigh_som_perfomance(pv_inv_som_performance, threshold)

        # 2.2) Save the normalized scores for each pv system
        if not weighted_general_som_perfomance:
            weighted_general_som_perfomance = weighted_pvSystem_som_perfomance
        else:
            for config, score_list in weighted_pvSystem_som_perfomance.items():
                weighted_general_som_perfomance[config].extend(score_list)

    # 3) Compute the average values
    print("\n" + "-" * 20 + " (B) Computing the average score values " +"-" * 20)
    avg_general_som_perfomance = som_outcome_utils.compute_average_scores(weighted_general_som_perfomance, threshold)

    # 4) Generate the ranked list
    # 4.1) Create the folder
    system_label = "All systems" 
    overall_perfomance_folder_path = path.join("data", "Overall SOM perfomance")
    if not path.exists(overall_perfomance_folder_path):
        makedirs(overall_perfomance_folder_path)
        
    # 4.2) Generate the list
    print("\n" + "-" * 20 + " (C) Generated the ranked list " +"-" * 20)
    som_outcome_utils.visualize_weighted_som_configurations(avg_general_som_perfomance, threshold, system_label, dataset_type,
                                                            labels, file_version, overall_perfomance_folder_path, 
                                                            save_to_file = True)

VERSION: SELECTING 'ALL CONFIGS'

------------------------------ PV SYSTEM: Soleto 1 ------------------------------
-------------------- (A.1) LOADING THE CSV FILES --------------------
[INV1] The SOM perfomance has been loaded
[INV2] The SOM perfomance has been loaded
[INV3] The SOM perfomance has been loaded
[INV4] The SOM perfomance has been loaded

-------------------- (A.2) Merging inverter perfomance --------------------
[INV1] Reading and merging som perfomance  (i.e., 1540 configs.)
[INV2] Reading and merging som perfomance  (i.e., 1540 configs.)
[INV3] Reading and merging som perfomance  (i.e., 1540 configs.)
[INV4] Reading and merging som perfomance  (i.e., 1540 configs.)

------------------------------ PV SYSTEM: Soleto 2 ------------------------------
-------------------- (A.1) LOADING THE CSV FILES --------------------
[INV1] The SOM perfomance has been loaded
[INV2] The SOM perfomance has been loaded

[INV 3] ISSUE: File not found. Sorry :/

[INV 4] ISSUE: File not found.

## A.6) Visualize the findings using some graphs 

### A.6.0) Some useful functions

### A.6.1)  [View 3 ] Generate and visualize the graphs

In [21]:
skip_this_step = True

In [22]:
visualize_graphs = False

In [23]:
if not skip_this_step:
    for inv_name in inv_som_performance.keys():
        print("-" * 30 + f" {inv_name} " + "-" * 30)

        # Retrieve the som perfomance for each inverter
        som_perfomance = inv_som_performance[inv_name]["perfomance"]
        som_perfomance = som_perfomance.drop(columns = ["Fold F1 scores"])

        # Visualize the columns (i.e., metrics) available as well as the som configurations computed with the grid search
        columns = som_perfomance.columns.tolist()
        print(f"COLUMNS AVAILABLE ({len(columns)}): {', '.join(columns)}")
        print(f"CONFIGURATIONS TESTED OUT: {len(som_perfomance)}\n")

        # Create, visualze and save the graph
        som_outcome_utils.plot_analysis(som_perfomance, system_name, inv_name, saving_folder_path, dataset_type,
                                        var_panels = "Function", var_graphColums = "Dim grid", 
                                        var_graphRows = "Dim grid",
                                        verbose = visualize_graphs)
        print("-" * 66 + "\n")
    print("\n" + "-" * 50 + "\nGraph generation has been finished. Nice :)\n" + "-" * 50)
else:
    print("This visualization has been skipped. Sorry :/")

This visualization has been skipped. Sorry :/


# B) Comparison of the combinations of pre-processing steps

In [24]:
consider_only_failure_starts = True

## B.1.1) SELECT: the 'SOM versions'

In [25]:
SYSTEM_SOM_CONFIGS = {
    "Soleto 1": {
        "INV1": [
            "16grid_6Kepoch_0.001lr_9sigma_gaussianFunc", # TOP 1
            "18grid_5Kepoch_0.001lr_1sigma_gaussianFunc", # TOP 1
            "20grid_6Kepoch_0.001lr_1sigma_gaussianFunc" # TOP 1
        ],  
        "INV2": [
            "12grid_5Kepoch_0.001lr_1sigma_gaussianFunc", # TOP 11 (F1: - 6% from its TOP 1)
            "18grid_5Kepoch_0.001lr_1sigma_gaussianFunc", # TOP 1
            "20grid_6Kepoch_0.001lr_1sigma_gaussianFunc" # TOP 1
        ],
        "INV3": [
            "10grid_6Kepoch_0.001lr_1sigma_gaussianFunc", # TOP 1
            "18grid_5Kepoch_0.001lr_1sigma_gaussianFunc", # TOP 1
            "20grid_6Kepoch_0.001lr_1sigma_gaussianFunc" # TOP 1
        ],
        "INV4": [
            "18grid_5Kepoch_0.001lr_1sigma_gaussianFunc", # TOP 1
            "18grid_5Kepoch_0.001lr_1sigma_gaussianFunc", # TOP 1
            "20grid_6Kepoch_0.001lr_1sigma_gaussianFunc" # TOP 1
        ]
    },
    "Soleto 2": {
        "INV1": [
            "12grid_30Kepoch_0.001lr_1sigma_gaussianFunc", # TOP 1
            "18grid_7Kepoch_0.001lr_6sigma_gaussianFunc", # TOP 1 (AVG)
            "20grid_6Kepoch_0.001lr_1sigma_gaussianFunc" # TOP 1
        ],  
        "INV2": [
            "28grid_20Kepoch_0.01lr_10sigma_gaussianFunc", # TOP 2 (F1: -0.16 %)
            "18grid_7Kepoch_0.001lr_6sigma_gaussianFunc", # TOP 1 (AVG)
            "20grid_6Kepoch_0.001lr_1sigma_gaussianFunc" # TOP 1
        ],
    },
    "Galatina": {
        "INV1": [
            "26grid_10Kepoch_0.01lr_2sigma_gaussianFunc",  # TOP 1
            "26grid_8Kepoch_0.01lr_4sigma_gaussianFunc", # TOP 1 (AVG)
            "20grid_6Kepoch_0.001lr_1sigma_gaussianFunc" # TOP 1
        ],  
        "INV2":  [
            "30grid_30Kepoch_0.01lr_1sigma_gaussianFunc", # TOP 3 (F1: -1.65 %)
            "26grid_8Kepoch_0.01lr_4sigma_gaussianFunc", # TOP 1 (AVG)
            "20grid_6Kepoch_0.001lr_1sigma_gaussianFunc" # TOP 1
        ],
        "INV3":  [
            "26grid_2Kepoch_0.01lr_10sigma_gaussianFunc", # TOP 1
            "26grid_8Kepoch_0.01lr_4sigma_gaussianFunc", # TOP 1 (AVG)
            "20grid_6Kepoch_0.001lr_1sigma_gaussianFunc" # TOP 1
        ],
        "INV4":  [
            "30grid_8Kepoch_0.01lr_5sigma_gaussianFunc", # TOP 2 (F1: -17.22 %)
            "26grid_8Kepoch_0.01lr_4sigma_gaussianFunc", # TOP 1 (AVG)
            "20grid_6Kepoch_0.001lr_1sigma_gaussianFunc" # TOP 1
        ]
    }
}
som_configs = SYSTEM_SOM_CONFIGS[system_name]

# Alternative metrics (i.e., considering only the starting timestamps of failure events)

In [26]:
if consider_only_failure_starts:
    SYSTEM_SOM_CONFIGS = {
        "Soleto 1": {
            "INV1": [
                "30grid_3Kepoch_0.01lr_10sigma_gaussianFunc",
                "24grid_2Kepoch_0.01lr_10sigma_gaussianFunc", 
                "" 
            ],  
            "INV2": [
                "22grid_2Kepoch_0.01lr_9sigma_gaussianFunc",
                "24grid_2Kepoch_0.01lr_10sigma_gaussianFunc", 
                "" 
            ],
            "INV3": [
                "18grid_7Kepoch_0.001lr_1sigma_gaussianFunc",
                "24grid_2Kepoch_0.01lr_10sigma_gaussianFunc", 
                "" 
            ],
            "INV4": [
                "22grid_2Kepoch_0.01lr_8sigma_gaussianFunc",
                "24grid_2Kepoch_0.01lr_10sigma_gaussianFunc", 
                "" 
            ]
        },
        "Soleto 2": {
            "INV1": [
                "14grid_4Kepoch_0.001lr_1sigma_gaussianFunc",
                "12grid_4Kepoch_0.001lr_2sigma_gaussianFunc", 
                "" 
            ],  
            "INV2": [
                "10grid_10Kepoch_0.001lr_2sigma_gaussianFunc",
                "12grid_4Kepoch_0.001lr_2sigma_gaussianFunc", 
                "" 
            ],
        },
        "Galatina": {
            "INV1": [
                "26grid_8Kepoch_0.01lr_4sigma_gaussianFunc",
                "12grid_3Kepoch_0.001lr_6sigma_gaussianFunc", 
                "" 
            ],  
            "INV2":  [
                "16grid_9Kepoch_0.001lr_1sigma_gaussianFunc",
                "12grid_3Kepoch_0.001lr_6sigma_gaussianFunc", 
                "" 
            ],
            "INV3":  [
                "14grid_6Kepoch_0.001lr_8sigma_gaussianFunc",
                "12grid_3Kepoch_0.001lr_6sigma_gaussianFunc", 
                "" 
            ],
            "INV4":  [
                "10grid_4Kepoch_0.001lr_7sigma_gaussianFunc",
                "12grid_3Kepoch_0.001lr_6sigma_gaussianFunc", 
                "" 
            ]
        }
    }
    som_configs = SYSTEM_SOM_CONFIGS[system_name]
else:
    print(f"[{system_name}] using the tradictional approach")

## B.1.2) SELECT: the combination of pre-processing steps

In [27]:
pre_steps_configs = ["1hour_averaged_fullReg_detrended"]
print(f"COMBINATIONS CHOOSEN: {len(pre_steps_configs)}\n" + "-" * 60)
print('\n'.join([f"({idk +1}) " + ' || '.join(config.split('_')) for idk, config in enumerate(pre_steps_configs)]))

COMBINATIONS CHOOSEN: 1
------------------------------------------------------------
(1) 1hour || averaged || fullReg || detrended


## B.2) Load all the CSV files

In [28]:
methods = ["SOMs", "AutoEncoder"]
selected_method = methods[0]
if selected_method == methods[1]:
    ae_config = 'L2_N16'
    print(f"METHOD: '{selected_method}: {ae_config}'")
else:
    print(f"METHOD: '{selected_method}")

METHOD: 'SOMs


In [29]:
folder_path = path.join(system_path, selected_method, "Metrics")

In [30]:
num_selected_configs = len(som_configs[list(som_configs.keys())[0]])
test_names = [
    f"TEST(0): best_for_inverter ({system_name})",
    f"TEST(1): averaged_across_inverter ({system_name})",
    "TEST(2): averaged_across_pvSystems"
]

In [31]:
only_failure_starts_prefix = 'ALT2'

In [32]:
all_system_configs_available = dict()
for idk_config in range(num_selected_configs):
    test_name = test_names[idk_config]
    print("-" * 20 + f"Loading the metrics... [{test_name}]" + "-" * 20 + "\n")

    # Load each inverter data
    df_inv_metrics = dict()
    for inv_name in som_configs.keys():
        som_config = som_configs[inv_name][idk_config]

        # Load each configuration selected
        df_metrics_configs = dict()
        for config in pre_steps_configs:

            # File path
            if selected_method == methods[0]:
                full_config = som_config + "_" + config
            else: 
                full_config = ae_config 
                
            # Create the file name
            file_name = f"{inv_name}_performance"
            if consider_only_failure_starts:
                file_name += "_" + only_failure_starts_prefix
            file_path = path.join(folder_path, full_config,file_name + ".csv" )

            # (Try to) read the csv file with the metrics
            try:
                df_metrics_configs[config] = pd.read_csv(file_path, index_col = 0)
                df_inv_metrics[inv_name] = df_metrics_configs
                
                #print(f"{inv_name}: the config '{config}' has been loaded successfully.")
            except FileNotFoundError:
                print(f"ISSUE ({inv_name}): File not found!\n--> The configuration {config.split('_')} \n    with the SOM: "\
                      f"{som_config.split('_')}'\n")

        print(f"---------- {inv_name}: Hey, {len(df_metrics_configs.keys())} configurations have been loaded. --------------\n")
    all_system_configs_available[test_name] = df_inv_metrics

--------------------Loading the metrics... [TEST(0): best_for_inverter (Galatina)]--------------------

---------- INV1: Hey, 1 configurations have been loaded. --------------

---------- INV2: Hey, 1 configurations have been loaded. --------------

---------- INV3: Hey, 1 configurations have been loaded. --------------

---------- INV4: Hey, 1 configurations have been loaded. --------------

--------------------Loading the metrics... [TEST(1): averaged_across_inverter (Galatina)]--------------------

---------- INV1: Hey, 1 configurations have been loaded. --------------

---------- INV2: Hey, 1 configurations have been loaded. --------------

---------- INV3: Hey, 1 configurations have been loaded. --------------

---------- INV4: Hey, 1 configurations have been loaded. --------------

--------------------Loading the metrics... [TEST(2): averaged_across_pvSystems]--------------------

ISSUE (INV1): File not found!
--> The configuration ['1hour', 'averaged', 'fullReg', 'detrended'] 
 

In [33]:
configs_available = list(all_system_configs_available.keys())
print(f"SYSTEM CONFIGS LOADED ({len(configs_available)})\n" + "-" * 40)
print('\n'.join([f'{idk + 1}) {config}'for idk, config in enumerate(configs_available)]))

SYSTEM CONFIGS LOADED (3)
----------------------------------------
1) TEST(0): best_for_inverter (Galatina)
2) TEST(1): averaged_across_inverter (Galatina)
3) TEST(2): averaged_across_pvSystems


## B.3) [View 4] Visualize a filtered metrics dataframe
- *Fault profiles* (default: only "General faults")
- *prediction window* of the warnings (default: 7 days in advance)

In [34]:
save_metrics_to_file = False

### B.3.1) SET: 'Prediction window' to visualize(i.e., days in advance of the warnings)

In [35]:
prediction_window = 7

### B.3.2) SET: 'Fault profiles' to visualize

In [36]:
fault_profiles_available = [
    ["General Fault", "Log - High"],
    ['General Fault']
]

### B.3.3) Create the saving sub-folder

In [37]:
perfomance_folder_name = 'Perfomance'

In [38]:
perfomance_saving_folder = path.join(saving_folder_path, perfomance_folder_name)

if save_metrics_to_file:
    if not path.exists(perfomance_saving_folder):
        makedirs(perfomance_saving_folder)
        print(f"The folder '{perfomance_folder_name}' has been created")

## B.4) Comparision between different combinations of pre-processing steps

In [39]:
def visualize_perfomance(fault_profile, visualize_details = True):
    console_stdout = sys.stdout
    
    inv_ranked_configs_faultProfiles = dict()
    fault_anticipation = dict()
    inv_best_perfomance = dict()
    
    if save_metrics_to_file:
        profile_name = '_'.join([''.join(word.capitalize() for word in profile.replace('-', '').split(' ')) 
                                 for profile in fault_profile])
        file_name = f"perfomance_configs_{profile_name}.txt"
        if consider_only_failure_starts:
            file_name += "_" + only_failure_starts_prefix
        log_file_path = path.join(perfomance_saving_folder, file_name + ".txt")
        log_file = open(log_file_path,  mode="w+")
        print(f"The findings will be saved in a txt file --> {file_name}\n(folder: {perfomance_saving_folder}).\n")
        sys.stdout = log_file
     
    for inv_name in df_inv_metrics.keys():
        metrics_configs = df_inv_metrics[inv_name]

        print("\n" + "-" * 50 + f" PV SYSTEM: {system_name} " + "-" * 50)
        print("\n" + "-" * 45 + f" SOM CONFIGURATION " + "-" * 45)
        configs_pairs = list(zip(test_names, som_configs[inv_name][:num_selected_configs]))
        print('\n'.join([f"({idk + 1}) {config_type.upper()} --> { ' || '.join(config.split('_'))}\n" +"-" * 100
                         for idk, (config_type, config) in enumerate(configs_pairs)]))
        print("\n\n" + "-" * 30 + f" INVERTER: N°{inv_name[-1:]} (FAULT PROFILE: {' || '.join(fault_profile)})" + "-" * 30)
        
        # Visualize the metrics
        outcomes = som_outcome_utils.visualize_metrics(metrics_configs, fault_profile, fault_profiles_available,
                                                       prediction_window, save_metrics_to_file, 
                                                       verbose = visualize_details)
        inv_ranked_configs, inv_fault_anticipation, inv_best_perfomance[inv_name] = outcomes

        
        #  Save and visualize anticipation
        if inv_fault_anticipation:
            fault_anticipation[inv_name] = inv_fault_anticipation

    # Save it 
    key_name = "_".join([item.replace(" ", "") for item in fault_profile])
    inv_ranked_configs_faultProfiles[key_name] = inv_ranked_configs 

    if save_metrics_to_file:       
        sys.stdout.close()
        sys.stdout = console_stdout
        
    return inv_ranked_configs_faultProfiles, fault_anticipation, inv_best_perfomance

In [40]:
df_inv_metrics = dict()            
for test_available in all_system_configs_available.keys():
    print("\n"+"-" * 30 + f" {test_available} " + "-" * 30)
    system_metrics = all_system_configs_available[test_available]
    
    if len(system_metrics.keys()) == 0:
        print("-" * 10 + " WARNING: There are not configurations available for this test " + "-" * 10)
        continue
        
    for inv_name in system_metrics.keys():
        print("-" * 10 + f" [{inv_name}]  Merging the metrics from all the SOM configs " + "-" * 10)
        inv_metrics = system_metrics[inv_name]
        
        renamed_dict = dict()
        for config in inv_metrics.keys():
            full_config = config + "_" + test_available.split(':')[1].split('(')[0].strip()
            renamed_dict[full_config] = inv_metrics[config]

        try:   
            df_inv_metrics[inv_name].update(renamed_dict)
        except KeyError:
            df_inv_metrics[inv_name] = renamed_dict


------------------------------ TEST(0): best_for_inverter (Galatina) ------------------------------
---------- [INV1]  Merging the metrics from all the SOM configs ----------
---------- [INV2]  Merging the metrics from all the SOM configs ----------
---------- [INV3]  Merging the metrics from all the SOM configs ----------
---------- [INV4]  Merging the metrics from all the SOM configs ----------

------------------------------ TEST(1): averaged_across_inverter (Galatina) ------------------------------
---------- [INV1]  Merging the metrics from all the SOM configs ----------
---------- [INV2]  Merging the metrics from all the SOM configs ----------
---------- [INV3]  Merging the metrics from all the SOM configs ----------
---------- [INV4]  Merging the metrics from all the SOM configs ----------

------------------------------ TEST(2): averaged_across_pvSystems ------------------------------


### B.4.1) [VIEW 5] General Fault & Alarm Logs with 'High priority'

In [41]:
fault_profile = fault_profiles_available[0]

In [42]:
inv_ranked_configs_faultProfiles = dict()

inv_ranked_configs, fault_anticipation, inv_best_perfomance = visualize_perfomance(fault_profile, visualize_details = False)
inv_ranked_configs_faultProfiles = inv_ranked_configs


-------------------------------------------------- PV SYSTEM: Galatina --------------------------------------------------

--------------------------------------------- SOM CONFIGURATION ---------------------------------------------
(1) TEST(0): BEST_FOR_INVERTER (GALATINA) --> 26grid || 8Kepoch || 0.01lr || 4sigma || gaussianFunc
----------------------------------------------------------------------------------------------------
(2) TEST(1): AVERAGED_ACROSS_INVERTER (GALATINA) --> 12grid || 3Kepoch || 0.001lr || 6sigma || gaussianFunc
----------------------------------------------------------------------------------------------------
(3) TEST(2): AVERAGED_ACROSS_PVSYSTEMS --> 
----------------------------------------------------------------------------------------------------


------------------------------ INVERTER: N°1 (FAULT PROFILE: General Fault || Log - High)------------------------------

---------------------------------------- A) COMPARISION OF THE PRE-PROCESSING STEPS ----

Unnamed: 0,F1 score (%),Recall (%),Miss rate (%),Precision (%),Fall out (%)
(CONFIG. 1) 1hour_averaged_fullReg_detrended_best_for_inverter,,0.0,100.0,,0.0
(CONFIG. 2) 1hour_averaged_fullReg_detrended_averaged_across_inverter,,0.0,100.0,,0.0



-------------------------------------------------- F1 score (%) --------------------------------------------------

-------------------------------------------------- Recall (%) --------------------------------------------------
--------------------------------------------------------------------------------------------------------------
REFERENCE: (CONFIG. 1) 1hour||averaged||fullReg||detrended||best||for||inverter --> Recall (%): 0.0 %
--------------------------------------------------------------------------------------------------------------
--> (CONFIG. 2) 1hour||averaged||fullReg||detrended||averaged||across||inverter --> [0.0%]

-------------------------------------------------- Precision (%) --------------------------------------------------

---------------------------------------- B) COMPARISION OF THE PRE-PROCESSING STEPS --------------------------------


Unnamed: 0,F1 score (%),Recall (%),Miss rate (%),Precision (%),Fall out (%)
(CONFIG. 1) 1hour_averaged_fullReg_detrended_best_for_inverter,,0.0,100.0,,0.0
(CONFIG. 2) 1hour_averaged_fullReg_detrended_averaged_across_inverter,,0.0,100.0,,0.0



-------------------------------------------------- F1 score (%) --------------------------------------------------

-------------------------------------------------- Recall (%) --------------------------------------------------
--------------------------------------------------------------------------------------------------------------
REFERENCE: (CONFIG. 1) 1hour||averaged||fullReg||detrended||best||for||inverter --> Recall (%): 0.0 %
--------------------------------------------------------------------------------------------------------------
--> (CONFIG. 2) 1hour||averaged||fullReg||detrended||averaged||across||inverter --> [0.0%]

-------------------------------------------------- Precision (%) --------------------------------------------------

---------------------------------------- C) COMPARISION OF THE PRE-PROCESSING STEPS --------------------------------


Unnamed: 0,F1 score (%),Recall (%),Miss rate (%),Precision (%),Fall out (%)
(CONFIG. 2) 1hour_averaged_fullReg_detrended_averaged_across_inverter,26.67,16.67,83.33,66.67,0.06
(CONFIG. 1) 1hour_averaged_fullReg_detrended_best_for_inverter,,0.0,100.0,,0.0



-------------------------------------------------- F1 score (%) --------------------------------------------------
--------------------------------------------------------------------------------------------------------------
REFERENCE: (CONFIG. 2) 1hour||averaged||fullReg||detrended||averaged||across||inverter --> F1 score (%): 26.67 %
--------------------------------------------------------------------------------------------------------------
--> (CONFIG. 1) 1hour||averaged||fullReg||detrended||best||for||inverter --> [nan%]

-------------------------------------------------- Recall (%) --------------------------------------------------
--------------------------------------------------------------------------------------------------------------
REFERENCE: (CONFIG. 2) 1hour||averaged||fullReg||detrended||averaged||across||inverter --> Recall (%): 16.67 %
--------------------------------------------------------------------------------------------------------------
--> (CONFIG. 1) 1h

Unnamed: 0,F1 score (%),Recall (%),Miss rate (%),Precision (%),Fall out (%)
(CONFIG. 1) 1hour_averaged_fullReg_detrended_best_for_inverter,81.82,75.0,25.0,90.0,0.06
(CONFIG. 2) 1hour_averaged_fullReg_detrended_averaged_across_inverter,25.0,16.67,83.33,50.0,0.12



-------------------------------------------------- F1 score (%) --------------------------------------------------
--------------------------------------------------------------------------------------------------------------
REFERENCE: (CONFIG. 1) 1hour||averaged||fullReg||detrended||best||for||inverter --> F1 score (%): 81.82 %
--------------------------------------------------------------------------------------------------------------
--> (CONFIG. 2) 1hour||averaged||fullReg||detrended||averaged||across||inverter --> [-56.82%]

-------------------------------------------------- Recall (%) --------------------------------------------------
--------------------------------------------------------------------------------------------------------------
REFERENCE: (CONFIG. 1) 1hour||averaged||fullReg||detrended||best||for||inverter --> Recall (%): 75.0 %
--------------------------------------------------------------------------------------------------------------
--> (CONFIG. 2) 1hour||

Unnamed: 0,F1 score (%),Recall (%),Miss rate (%),Precision (%),Fall out (%)
(CONFIG. 1) 1hour_averaged_fullReg_detrended_best_for_inverter,18.59,10.37,89.63,89.29,0.25
(CONFIG. 2) 1hour_averaged_fullReg_detrended_averaged_across_inverter,5.25,2.7,97.3,100.0,0.0



-------------------------------------------------- F1 score (%) --------------------------------------------------
--------------------------------------------------------------------------------------------------------------
REFERENCE: (CONFIG. 1) 1hour||averaged||fullReg||detrended||best||for||inverter --> F1 score (%): 18.59 %
--------------------------------------------------------------------------------------------------------------
--> (CONFIG. 2) 1hour||averaged||fullReg||detrended||averaged||across||inverter --> [-13.34%]

-------------------------------------------------- Recall (%) --------------------------------------------------
--------------------------------------------------------------------------------------------------------------
REFERENCE: (CONFIG. 1) 1hour||averaged||fullReg||detrended||best||for||inverter --> Recall (%): 10.37 %
--------------------------------------------------------------------------------------------------------------
--> (CONFIG. 2) 1hour|

Unnamed: 0,F1 score (%),Recall (%),Miss rate (%),Precision (%),Fall out (%)
(CONFIG. 1) 1hour_averaged_fullReg_detrended_best_for_inverter,24.47,14.32,85.68,84.15,0.54
(CONFIG. 2) 1hour_averaged_fullReg_detrended_averaged_across_inverter,15.33,8.3,91.7,100.0,0.0



-------------------------------------------------- F1 score (%) --------------------------------------------------
--------------------------------------------------------------------------------------------------------------
REFERENCE: (CONFIG. 1) 1hour||averaged||fullReg||detrended||best||for||inverter --> F1 score (%): 24.47 %
--------------------------------------------------------------------------------------------------------------
--> (CONFIG. 2) 1hour||averaged||fullReg||detrended||averaged||across||inverter --> [-9.14%]

-------------------------------------------------- Recall (%) --------------------------------------------------
--------------------------------------------------------------------------------------------------------------
REFERENCE: (CONFIG. 1) 1hour||averaged||fullReg||detrended||best||for||inverter --> Recall (%): 14.32 %
--------------------------------------------------------------------------------------------------------------
--> (CONFIG. 2) 1hour||

Unnamed: 0,F1 score (%),Recall (%),Miss rate (%),Precision (%),Fall out (%)
(CONFIG. 1) 1hour_averaged_fullReg_detrended_best_for_inverter,37.86,24.27,75.73,86.03,0.79
(CONFIG. 2) 1hour_averaged_fullReg_detrended_averaged_across_inverter,25.23,14.52,85.48,95.89,0.13



-------------------------------------------------- F1 score (%) --------------------------------------------------
--------------------------------------------------------------------------------------------------------------
REFERENCE: (CONFIG. 1) 1hour||averaged||fullReg||detrended||best||for||inverter --> F1 score (%): 37.86 %
--------------------------------------------------------------------------------------------------------------
--> (CONFIG. 2) 1hour||averaged||fullReg||detrended||averaged||across||inverter --> [-12.63%]

-------------------------------------------------- Recall (%) --------------------------------------------------
--------------------------------------------------------------------------------------------------------------
REFERENCE: (CONFIG. 1) 1hour||averaged||fullReg||detrended||best||for||inverter --> Recall (%): 24.27 %
--------------------------------------------------------------------------------------------------------------
--> (CONFIG. 2) 1hour|

Unnamed: 0,F1 score (%),Recall (%),Miss rate (%),Precision (%),Fall out (%)
(CONFIG. 1) 1hour_averaged_fullReg_detrended_best_for_inverter,58.71,45.44,54.56,82.95,1.88
(CONFIG. 2) 1hour_averaged_fullReg_detrended_averaged_across_inverter,47.26,32.16,67.84,89.08,0.79



-------------------------------------------------- F1 score (%) --------------------------------------------------
--------------------------------------------------------------------------------------------------------------
REFERENCE: (CONFIG. 1) 1hour||averaged||fullReg||detrended||best||for||inverter --> F1 score (%): 58.71 %
--------------------------------------------------------------------------------------------------------------
--> (CONFIG. 2) 1hour||averaged||fullReg||detrended||averaged||across||inverter --> [-11.45%]

-------------------------------------------------- Recall (%) --------------------------------------------------
--------------------------------------------------------------------------------------------------------------
REFERENCE: (CONFIG. 1) 1hour||averaged||fullReg||detrended||best||for||inverter --> Recall (%): 45.44 %
--------------------------------------------------------------------------------------------------------------
--> (CONFIG. 2) 1hour|

Unnamed: 0,F1 score (%),Recall (%),Miss rate (%),Precision (%),Fall out (%)
(CONFIG. 1) 1hour_averaged_fullReg_detrended_best_for_inverter,44.44,28.57,71.43,100.0,0.0
(CONFIG. 2) 1hour_averaged_fullReg_detrended_averaged_across_inverter,,0.0,100.0,,0.0



-------------------------------------------------- F1 score (%) --------------------------------------------------
--------------------------------------------------------------------------------------------------------------
REFERENCE: (CONFIG. 1) 1hour||averaged||fullReg||detrended||best||for||inverter --> F1 score (%): 44.44 %
--------------------------------------------------------------------------------------------------------------
--> (CONFIG. 2) 1hour||averaged||fullReg||detrended||averaged||across||inverter --> [nan%]

-------------------------------------------------- Recall (%) --------------------------------------------------
--------------------------------------------------------------------------------------------------------------
REFERENCE: (CONFIG. 1) 1hour||averaged||fullReg||detrended||best||for||inverter --> Recall (%): 28.57 %
--------------------------------------------------------------------------------------------------------------
--> (CONFIG. 2) 1hour||av

Unnamed: 0,F1 score (%),Recall (%),Miss rate (%),Precision (%),Fall out (%)
(CONFIG. 1) 1hour_averaged_fullReg_detrended_best_for_inverter,44.44,28.57,71.43,100.0,0.0
(CONFIG. 2) 1hour_averaged_fullReg_detrended_averaged_across_inverter,,0.0,100.0,,0.0



-------------------------------------------------- F1 score (%) --------------------------------------------------
--------------------------------------------------------------------------------------------------------------
REFERENCE: (CONFIG. 1) 1hour||averaged||fullReg||detrended||best||for||inverter --> F1 score (%): 44.44 %
--------------------------------------------------------------------------------------------------------------
--> (CONFIG. 2) 1hour||averaged||fullReg||detrended||averaged||across||inverter --> [nan%]

-------------------------------------------------- Recall (%) --------------------------------------------------
--------------------------------------------------------------------------------------------------------------
REFERENCE: (CONFIG. 1) 1hour||averaged||fullReg||detrended||best||for||inverter --> Recall (%): 28.57 %
--------------------------------------------------------------------------------------------------------------
--> (CONFIG. 2) 1hour||av

Unnamed: 0,F1 score (%),Recall (%),Miss rate (%),Precision (%),Fall out (%)
(CONFIG. 1) 1hour_averaged_fullReg_detrended_best_for_inverter,44.44,28.57,71.43,100.0,0.0
(CONFIG. 2) 1hour_averaged_fullReg_detrended_averaged_across_inverter,32.0,19.05,80.95,100.0,0.0



-------------------------------------------------- F1 score (%) --------------------------------------------------
--------------------------------------------------------------------------------------------------------------
REFERENCE: (CONFIG. 1) 1hour||averaged||fullReg||detrended||best||for||inverter --> F1 score (%): 44.44 %
--------------------------------------------------------------------------------------------------------------
--> (CONFIG. 2) 1hour||averaged||fullReg||detrended||averaged||across||inverter --> [-12.44%]

-------------------------------------------------- Recall (%) --------------------------------------------------
--------------------------------------------------------------------------------------------------------------
REFERENCE: (CONFIG. 1) 1hour||averaged||fullReg||detrended||best||for||inverter --> Recall (%): 28.57 %
--------------------------------------------------------------------------------------------------------------
--> (CONFIG. 2) 1hour|

Unnamed: 0,F1 score (%),Recall (%),Miss rate (%),Precision (%),Fall out (%)
(CONFIG. 1) 1hour_averaged_fullReg_detrended_best_for_inverter,64.52,47.62,52.38,100.0,0.0
(CONFIG. 2) 1hour_averaged_fullReg_detrended_averaged_across_inverter,44.44,28.57,71.43,100.0,0.0



-------------------------------------------------- F1 score (%) --------------------------------------------------
--------------------------------------------------------------------------------------------------------------
REFERENCE: (CONFIG. 1) 1hour||averaged||fullReg||detrended||best||for||inverter --> F1 score (%): 64.52 %
--------------------------------------------------------------------------------------------------------------
--> (CONFIG. 2) 1hour||averaged||fullReg||detrended||averaged||across||inverter --> [-20.08%]

-------------------------------------------------- Recall (%) --------------------------------------------------
--------------------------------------------------------------------------------------------------------------
REFERENCE: (CONFIG. 1) 1hour||averaged||fullReg||detrended||best||for||inverter --> Recall (%): 47.62 %
--------------------------------------------------------------------------------------------------------------
--> (CONFIG. 2) 1hour|

Unnamed: 0,F1 score (%),Recall (%),Miss rate (%),Precision (%),Fall out (%)
(CONFIG. 2) 1hour_averaged_fullReg_detrended_averaged_across_inverter,63.16,46.15,53.85,100.0,0.0
(CONFIG. 1) 1hour_averaged_fullReg_detrended_best_for_inverter,32.26,19.23,80.77,100.0,0.0



-------------------------------------------------- F1 score (%) --------------------------------------------------
--------------------------------------------------------------------------------------------------------------
REFERENCE: (CONFIG. 2) 1hour||averaged||fullReg||detrended||averaged||across||inverter --> F1 score (%): 63.16 %
--------------------------------------------------------------------------------------------------------------
--> (CONFIG. 1) 1hour||averaged||fullReg||detrended||best||for||inverter --> [-30.9%]

-------------------------------------------------- Recall (%) --------------------------------------------------
--------------------------------------------------------------------------------------------------------------
REFERENCE: (CONFIG. 2) 1hour||averaged||fullReg||detrended||averaged||across||inverter --> Recall (%): 46.15 %
--------------------------------------------------------------------------------------------------------------
--> (CONFIG. 1) 

Unnamed: 0,F1 score (%),Recall (%),Miss rate (%),Precision (%),Fall out (%)
(CONFIG. 2) 1hour_averaged_fullReg_detrended_averaged_across_inverter,63.16,46.15,53.85,100.0,0.0
(CONFIG. 1) 1hour_averaged_fullReg_detrended_best_for_inverter,32.26,19.23,80.77,100.0,0.0



-------------------------------------------------- F1 score (%) --------------------------------------------------
--------------------------------------------------------------------------------------------------------------
REFERENCE: (CONFIG. 2) 1hour||averaged||fullReg||detrended||averaged||across||inverter --> F1 score (%): 63.16 %
--------------------------------------------------------------------------------------------------------------
--> (CONFIG. 1) 1hour||averaged||fullReg||detrended||best||for||inverter --> [-30.9%]

-------------------------------------------------- Recall (%) --------------------------------------------------
--------------------------------------------------------------------------------------------------------------
REFERENCE: (CONFIG. 2) 1hour||averaged||fullReg||detrended||averaged||across||inverter --> Recall (%): 46.15 %
--------------------------------------------------------------------------------------------------------------
--> (CONFIG. 1) 

Unnamed: 0,F1 score (%),Recall (%),Miss rate (%),Precision (%),Fall out (%)
(CONFIG. 2) 1hour_averaged_fullReg_detrended_averaged_across_inverter,63.16,46.15,53.85,100.0,0.0
(CONFIG. 1) 1hour_averaged_fullReg_detrended_best_for_inverter,37.5,23.08,76.92,100.0,0.0



-------------------------------------------------- F1 score (%) --------------------------------------------------
--------------------------------------------------------------------------------------------------------------
REFERENCE: (CONFIG. 2) 1hour||averaged||fullReg||detrended||averaged||across||inverter --> F1 score (%): 63.16 %
--------------------------------------------------------------------------------------------------------------
--> (CONFIG. 1) 1hour||averaged||fullReg||detrended||best||for||inverter --> [-25.66%]

-------------------------------------------------- Recall (%) --------------------------------------------------
--------------------------------------------------------------------------------------------------------------
REFERENCE: (CONFIG. 2) 1hour||averaged||fullReg||detrended||averaged||across||inverter --> Recall (%): 46.15 %
--------------------------------------------------------------------------------------------------------------
--> (CONFIG. 1)

Unnamed: 0,F1 score (%),Recall (%),Miss rate (%),Precision (%),Fall out (%)
(CONFIG. 1) 1hour_averaged_fullReg_detrended_best_for_inverter,70.0,53.85,46.15,100.0,0.0
(CONFIG. 2) 1hour_averaged_fullReg_detrended_averaged_across_inverter,61.54,46.15,53.85,92.31,0.08



-------------------------------------------------- F1 score (%) --------------------------------------------------
--------------------------------------------------------------------------------------------------------------
REFERENCE: (CONFIG. 1) 1hour||averaged||fullReg||detrended||best||for||inverter --> F1 score (%): 70.0 %
--------------------------------------------------------------------------------------------------------------
--> (CONFIG. 2) 1hour||averaged||fullReg||detrended||averaged||across||inverter --> [-8.46%]

-------------------------------------------------- Recall (%) --------------------------------------------------
--------------------------------------------------------------------------------------------------------------
REFERENCE: (CONFIG. 1) 1hour||averaged||fullReg||detrended||best||for||inverter --> Recall (%): 53.85 %
--------------------------------------------------------------------------------------------------------------
--> (CONFIG. 2) 1hour||a

### B.4.2) [VIEW 5bis] General Fault

In [43]:
fault_profile = fault_profiles_available[1]

In [44]:
#inv_ranked_configs, *_ = visualize_perfomance(fault_profile, visualize_details = False)
#inv_ranked_configs_faultProfiles.update(inv_ranked_configs)

### B.5) Compute and visualize days in advance of failure events detected

In [45]:
verbose = False

In [46]:
if save_metrics_to_file:
    file_name = f"anticipation_GeneralFault_LogHigh"
    if consider_only_failure_starts:
        file_name += "_" + only_failure_starts_prefix
    anticip_file_path = path.join(perfomance_saving_folder, file_name + ".txt")
    anticipation_file = open(anticip_file_path,  mode="w+")
    print(f"The findings will be saved in a txt file --> {file_name}\n(folder: {perfomance_saving_folder}).\n")
    sys.stdout = anticipation_file
    
anticipation = defaultdict(list)
for inv_name in df_inv_metrics.keys():
    temporal_anticipation = fault_anticipation[inv_name]
    
    for warning_level in temporal_anticipation.keys():
         anticipation[warning_level].extend(temporal_anticipation[warning_level])
            
print("-" * 100 + f"\n\t\t\t\t\t\tPV SYSTEM: {system_name}")
print(f"\t\t\t\tEVENTS: General Faults & Alarms with 'High priority'\n"+ "-" * 100)
avg_anticipation = dict()
for warning_level in anticipation.keys():
    deltas = anticipation[warning_level]
    avg_delta = np.mean(deltas)
    
    avg_anticipation[warning_level] = avg_delta
    days, hours, minutes, *_ = avg_delta.components    
    print("-" * 55 + f"\nWARNING LEVEL >= {warning_level} (with {len(deltas)} correct warnings)\n" + "-" * 55)
    
    if verbose:
        for idk, delta in enumerate(deltas):
            delta_days, delta_hours, delta_minutes, *_ = delta.components
            print(f"Failure event detected ({'0' if idk < 9 else ''}{idk + 1}): "\
                  f"{delta_days} days, {delta_hours} hours and {delta_minutes} minutes")
        print("-" * 46)
    print(f"[AVG] TEMPORAL ADVANCE: {days} days, {hours} hours and {minutes} minutes.\n")
    
if save_metrics_to_file: 
    sys.stdout.close()
    sys.stdout = console_stdout

----------------------------------------------------------------------------------------------------
						PV SYSTEM: Galatina
				EVENTS: General Faults & Alarms with 'High priority'
----------------------------------------------------------------------------------------------------
-------------------------------------------------------
-------------------------------------------------------
[AVG] TEMPORAL ADVANCE: 4 days, 1 hours and 21 minutes.

-------------------------------------------------------
-------------------------------------------------------
[AVG] TEMPORAL ADVANCE: 4 days, 3 hours and 48 minutes.



### B.6) Error analysis

#### B.6.0) Utils

In [47]:
def visualize_error_analysis(fault_profile, warning_levels = 1, prediction_window = 7, 
                             save_metrics_to_file = False, verbose = False):
    console_stdout = sys.stdout
    
    inv_errors_faultProfiles = dict()
    key_name = "_".join([item.replace(" ", "") for item in fault_profile])
    inv_errors_faultProfiles[key_name] = dict() 
    
    # Load priorities 
    alarms_with_priorities = load_priorities(system_name)
    alarm_names = alarms_with_priorities['High']
    
    if save_metrics_to_file:
        profile_name = '_'.join([''.join(word.capitalize() for word in profile.replace('-', '').split(' ')) 
                                 for profile in fault_profile])
        
        file_name = f"error_analysis_{profile_name}"
        if consider_only_failure_starts:
            file_name += "_" + only_failure_starts_prefix
        error_file_path = path.join(perfomance_saving_folder, file_name + ".txt")
        
        error_file = open(error_file_path,  mode="w+")
        print(f"The findings will be saved in a txt file --> {file_name}\n(folder: {perfomance_saving_folder}).\n")
        sys.stdout = error_file
  
    inv_types_counter = defaultdict(int)
    for inv_name in df_inv_metrics.keys():
        metrics_configs = df_inv_metrics[inv_name]

        print("\n" + "-" * 50 + f" PV SYSTEM: {system_name} " + "-" * 50)
        print("\n" + "-" * 45 + f" SOM CONFIGURATION " + "-" * 45)
        configs_pairs = list(zip(test_names, som_configs[inv_name][:num_selected_configs]))
        print('\n'.join([f"({idk + 1}) {config_type.upper()} --> { ' || '.join(config.split('_'))}\n" +"-" * 100
                         for idk, (config_type, config) in enumerate(configs_pairs)]))
        print("\n\n" + "-" * 30 + f" INVERTER: N°{inv_name[-1:]} (FAULT PROFILE: {' || '.join(fault_profile)})" + "-" * 30)
        error_types = dict()
        for idk_type, (config_type, metrics_df) in enumerate(metrics_configs.items()):
            config_type_simplified_name = config_type.replace(pre_steps_configs[0], '').replace("_", ' ').strip().capitalize()
            print("\n" + "-" * 30 + f" TYPE {idk_type}: {config_type_simplified_name} " + "-" * 30)
            
            # Select only a fault profile 
            filtered_metrics = metrics_df[pd.array(metrics_df["Fault Profile"]) == str(fault_profile)]
            filtered_metrics = filtered_metrics[filtered_metrics['Warning levels (>=)']== warning_levels]
            filtered_metrics = filtered_metrics[filtered_metrics['Prediction Window (days)'] == prediction_window]
            
            # Retrieve the faults/alarms 
            raw_failure_events = filtered_metrics['Faults'].unique()[0].split(',')
            dt_format = '%Y-%m-%d (%H:%M)'
            failure_events = []
            failure_event_types = []
            print("-" * 10,"FAILURE EVENTS:", len(raw_failure_events), "-" * 10)
            for idk, raw_event in enumerate(raw_failure_events):
                
                # Extrac information
                event_type = raw_event.split(':')[0].strip("[' ")
                event_message = ':'.join(raw_event.split(':')[1:]).strip().split('\\n')[0].strip()
                period = (pd.to_datetime(raw_event.split('FROM')[1].split('TO')[0].strip("]'").strip(), format = dt_format), 
                          pd.to_datetime(raw_event.split('FROM')[1].split('TO')[1].strip("]'").strip(), format = dt_format))
                event_subType = [name.rstrip() for name in alarm_names if name in event_message]
                event_subType = event_subType[0] if len(event_subType) > 0 else ""
                
                # Save the failure event
                event = {'general_type': event_type,'subtype': event_subType, 'type': event_type + event_subType,
                         'message' : event_message, 'period': period} 
                failure_events.append(event)
                failure_event_types.append(event['type'])
                
                if verbose: 
                    print(f"{idk + 1}) ({event_type}\n\t--> {event_message}\n\t-->",
                          "\n\t--> ".join([ts.strftime(dt_format) for ts in period]))
            num_failure_events = len(failure_events)
            
            types_counter = Counter(failure_event_types)
            print("\t" + "-" * 30, "FAILURE TYPES:", len(types_counter.keys()), "-" * 30)
            print('\t' + '\n\t'.join([f"(x{counter}) {failure_type}" 
                               for failure_type, counter in types_counter.items()]))
            
            # Update the general couter
            if idk_type == 0:
                for failure_event, counter in types_counter.items():
                    inv_types_counter[failure_event] += counter
                
            # Retrieve the warnings
            raw_warnings = filtered_metrics['Fault warnings'].unique()[0].split("('")[1:]
            
            print("\n" + "-" * 25, "TRUE WARNINGS/POSITIVE (i.e., failure events correctly detected)", "-" * 25)
            tp_counter = 0
            tp_types = []
            missed_types = []
            for failure_event in failure_events:
                failure_message = failure_event['message']
                failure_period = failure_event['period']
                failure_type = failure_event['type']
               
                found = False
                for raw_warning in raw_warnings:
                    message = raw_warning.split('_')[0].lstrip()
                    period = (pd.to_datetime(raw_warning.split('_')[1].strip(",[").strip(), format = dt_format),
                              pd.to_datetime(raw_warning.split('_')[2].split(',')[0].strip(",['"), format = dt_format))
             
                    if failure_message == message:
                        if (period[0] == failure_period[0]) and (period[1] == failure_period[1]):                    
                            tp_counter += 1
                            tp_types.append(failure_type)
                            found = True
                            if verbose:
                                print(f"--> ({tp_counter}) FAILURE:", failure_message, "\n\t-->", 
                                      "\n\t--> ".join([ts.strftime(dt_format) for ts in failure_period]))
                if not found:
                    missed_types.append(failure_type)
                
            tp_type_counter = Counter(tp_types)
            missed_types_counter = Counter(missed_types)
            print("\n" + "-" * 35, "METRICS", "-" * 35)                
            print(f"\nFAILURE EVENTS CURRECTLY DETECTED: --> {tp_counter}/{num_failure_events} "\
                  f"({round((tp_counter/num_failure_events)*100, 2)} %)\n" + 60 * "-")
            if len(tp_types) > 0:
                print('-->', '\n--> '.join([f"(x{counter}) {failure_type}"
                                   for failure_type, counter in tp_type_counter.items()]))
            print(f"\nFAILURE EVENTS MISSED --> {num_failure_events - tp_counter}/{num_failure_events} "\
                  f"({round(((num_failure_events - tp_counter)/num_failure_events)*100, 2)} %)\n"+ 60 * "-") 
            print('-->', '\n--> '.join([f"(x{counter}) {failure_type}" 
                               for failure_type, counter in missed_types_counter.items()]))
            
            print("\n" + "-" * 80)
            print("\t\t\t\tERROR ANALYSIS")
            print("-" * 80)
            error_types[config_type_simplified_name] = dict()
            for idk, (failure_type, counter) in enumerate(types_counter.items()):
                missed_counter = missed_types_counter[failure_type]
                print(f"({idk + 1}) {failure_type} --> {missed_counter}/{counter} "\
                      f"({round((missed_counter/counter)*100, 1)} %)")
                error_types[config_type_simplified_name][failure_type] = missed_counter/counter
        
        # Save it 
        inv_errors_faultProfiles[key_name][inv_name] = error_types
    
    # Change organization
    avg_inv_errors_faultProfiles = defaultdict(dict)
    for inv_name, config_errors in inv_errors_faultProfiles[key_name].items():
        for config_type, errors in config_errors.items():
            
            for failure_event, error in errors.items():
                try: 
                    avg_inv_errors_faultProfiles[config_type][failure_event].append(error)
                except KeyError:
                     avg_inv_errors_faultProfiles[config_type][failure_event] = [error]
                        
    error_df = pd.DataFrame.from_dict(avg_inv_errors_faultProfiles)
    error_df.columns = ['[Error %] '+col for col in error_df.columns]
    avg_error_df = error_df.applymap(lambda list_values: np.mean(list_values) * 100)
    
    # Append the total failure events as a new column
    assign_counter = lambda df_row: [counter for failure_event, counter in inv_types_counter.items() 
                                     if failure_event == df_row.name]
    new_col = avg_error_df.apply(func = lambda df_row: assign_counter(df_row)[0] if len(assign_counter(df_row)) else 0, axis = 1) 
    avg_error_df.insert(loc = 0, column = 'Failure events', value = new_col)
    
    # TechnicaL transformation
    avg_error_df = avg_error_df.round(decimals = 2)
    avg_error_df = avg_error_df.sort_index()

    # Create a multi-index
    main_types = [name.split('(')[0].strip() for name in avg_error_df.index]
    sub_types = ['(' + name.split('(')[1] for name in avg_error_df.index]
    print(sub_types)
    sub_types = [name.split(')')[0].upper() + ')' + name.split(')')[1] for name in sub_types]
    new_multindex = pd.MultiIndex.from_tuples(list(zip(main_types, sub_types)))
    avg_error_df.index = new_multindex
    
    # Sort values
    avg_error_df = avg_error_df.sort_index().sort_values(by = ['[Error %] Best for inverter','Failure events'],
                                                         ascending = False, kind = 'stable')
    
    if save_metrics_to_file: 
        print("\n\n" + '-' * 150 + "\n" + '-' * 150)
        print('-' * 71 + " SUMMARY " + '-' * 70)
        print('-' * 150 + "\n" + '-' * 150 + "\n")
        print(tabulate(avg_error_df, headers = 'keys', tablefmt = 'psql'))
        sys.stdout.close()
        sys.stdout = console_stdout
    else: 
        display(avg_error_df)

    return avg_error_df

#### B6.1) Visualize the error analysis [in details]

In [48]:
warning_levels = 1
prediction_window = 7

In [49]:
fault_profile = fault_profiles_available[0]
avg_error_df = visualize_error_analysis(fault_profile, warning_levels, prediction_window, save_metrics_to_file)


-------------------------------------------------- PV SYSTEM: Galatina --------------------------------------------------

--------------------------------------------- SOM CONFIGURATION ---------------------------------------------
(1) TEST(0): BEST_FOR_INVERTER (GALATINA) --> 26grid || 8Kepoch || 0.01lr || 4sigma || gaussianFunc
----------------------------------------------------------------------------------------------------
(2) TEST(1): AVERAGED_ACROSS_INVERTER (GALATINA) --> 12grid || 3Kepoch || 0.001lr || 6sigma || gaussianFunc
----------------------------------------------------------------------------------------------------
(3) TEST(2): AVERAGED_ACROSS_PVSYSTEMS --> 
----------------------------------------------------------------------------------------------------


------------------------------ INVERTER: N°1 (FAULT PROFILE: General Fault || Log - High)------------------------------

------------------------------ TYPE 0: Best for inverter ------------------------------


Unnamed: 0,Unnamed: 1,Failure events,[Error %] Best for inverter,[Error %] Averaged across inverter
LOG - HIGH,(ALLARME INVERTER)Intervento protezione esterna,258,56.59,72.09
LOG - HIGH,(ALLARME INVERTER)Tensione rete fuori dai limiti,28,39.46,65.54
LOG - HIGH,(ALLARME INVERTER)Mancanza comunicazione CAN,1,0.0,0.0
LOG - HIGH,(ALLARME INVERTER)Sovracorrente inverter,1,0.0,0.0


#### B6.2) GIST: Error analysis 

In [50]:
print("-" * 120, "\n\t\t\t\t\t\t\tError Analysis\n","-" * 120)
display(avg_error_df)

------------------------------------------------------------------------------------------------------------------------ 
							Error Analysis
 ------------------------------------------------------------------------------------------------------------------------


Unnamed: 0,Unnamed: 1,Failure events,[Error %] Best for inverter,[Error %] Averaged across inverter
LOG - HIGH,(ALLARME INVERTER)Intervento protezione esterna,258,56.59,72.09
LOG - HIGH,(ALLARME INVERTER)Tensione rete fuori dai limiti,28,39.46,65.54
LOG - HIGH,(ALLARME INVERTER)Mancanza comunicazione CAN,1,0.0,0.0
LOG - HIGH,(ALLARME INVERTER)Sovracorrente inverter,1,0.0,0.0


## Various stuff

# Various [OLD STUFF]