In [1]:
from sys import path
if '..' not in path:
    path.insert(0, '..')

In [2]:
from _library.utils import SYSTEM_NAMES, SUBFOLDERS, load_datasets
from os import path
from collections import Counter
from string import ascii_uppercase
from re import match
from tensorflow.keras.models import load_model 
from tensorflow import device
import _library.lstm_utils as lstm_utils
import pandas as pd
import json
import numpy as np

In [3]:
%cd /mnt/data/vieri/projects/SAMPLE/

/mnt/data/vieri/projects/SAMPLE


### [ONLY FOR DEBUG] Turnaround to use the CPU (instead of the GPU)

In [4]:
use_gpu = False

In [5]:
from os import environ
if not use_gpu:
    environ["CUDA_VISIBLE_DEVICES"] = ""

# A) The photovoltaic systems

In [6]:
print(SYSTEM_NAMES, "\nSUBFOLDERS: -->", SUBFOLDERS)
# --- 0 ---------- 1 ---------- 2 --------- 3 ---------- 4 -------

['Binetto 1', 'Binetto 2', 'Soleto 1', 'Soleto 2', 'Galatina'] 
SUBFOLDERS: --> ['Cleaned', '1-hour sampling', '1-hour averaged sampling', 'Residuals', 'Residuals_analytical', 'Failure events', None]


## A.1) Selecting the PV system

In [7]:
system_name = SYSTEM_NAMES[4]

In [8]:
system_path = path.join('data', system_name.upper(), system_name.upper())
print(f"PV SYSTEM --> {system_name}")

PV SYSTEM --> Galatina


## A.2) Load the failure logs

In [9]:
data_folder = 'Failure events'

In [10]:
#file_name = 'HighMedium_failureEvent_logs.csv'
file_name = 'Medium_failureEvent_logs.csv'

In [11]:
folder_path = path.join(system_path, 'Imported data' , data_folder)
lstm_folder_path =  path.join('data',system_name.upper(), system_name.upper(), "UC2 - LSTM")
fault_df, unique_events = lstm_utils.load_failure_logs(folder_path, file_name, system_name, verbose = True)

--------------------------------------------------------------------------------------------------------------
-------------------------------------------------- GALATINA --------------------------------------------------
--------------------------------------------------------------------------------------------------------------
Logs concerning failure events have been loaded.

---------------------------------------- DATA AVAILABLE ----------------------------------------
--> Inverter available (4):  1, 2, 3, 4
--> Unique events (1)
	--> 1) (MEDIUM) Corrente di stringa fuori range

--> Unique string names available (12): s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12

--> General Plant box available (23):
	--> QC1.I1
	--> QC1.I2
	--> QC1.I3
	--> QC1.I4
	--> QC2.I2
	--> QC2.I3
	--> QC2.I4
	--> QC3.I1
	--> QC3.I2
	--> QC3.I3
	--> QC3.I4
	--> QC4.I1
	--> QC4.I2
	--> QC4.I3
	--> QC4.I4
	--> QC5.I1
	--> QC5.I2
	--> QC5.I3
	--> QC5.I4
	--> QC6.I1
	--> QC6.I2
	--> QC6.I3
	--> QC6.I4


In [12]:
grouped_fault_df = fault_df.groupby(by = ['Inverter',  'Tipo', 'Messaggio'])['Durata'].agg(['count', 'sum', 'mean', 
                                                                                            'max', 'min'])
grouped_fault_df.rename(columns = {'sum': 'Summed period', 'count' : 'Total events', 'mean' : 'Average event duration',
                                   'median': 'Median event duration', 'max': 'Maxiumum event duration',
                                   'min': 'Minimum event duration'}, 
                        inplace = True)
display(grouped_fault_df)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Total events,Summed period,Average event duration,Maxiumum event duration,Minimum event duration
Inverter,Tipo,Messaggio,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,Log_stringBox - Medium,Corrente di stringa fuori range,54735,1427 days 10:39:00,0 days 00:37:33.240887914,1 days 18:52:00,0 days 00:00:00
2,Log_stringBox - Medium,Corrente di stringa fuori range,22881,719 days 00:02:00,0 days 00:45:14.991477645,0 days 09:04:00,0 days 00:03:00
3,Log_stringBox - Medium,Corrente di stringa fuori range,28067,1196 days 01:22:00,0 days 01:01:21.879787650,1 days 10:56:00,0 days 00:00:00
4,Log_stringBox - Medium,Corrente di stringa fuori range,29627,701 days 08:46:00,0 days 00:34:05.362675937,0 days 09:00:00,0 days 00:00:00


# B) Data preparation

## Select only the relevant period (i.e., last 2 weeks)

In [13]:
minimum_days_required = 11 

In [14]:
# ONLY FOR TESTING: this re_set of the minimum_days_required is just for the disaligment between the alarm dataset and the inverter dataset 
# --> since there is a mismatch between: 
#     a) The alarm log dataset (last obs. available: 2021/09)
#     b) The inverter datasets (last obs. available: 2021/06)
minimum_days_required = 100

In [15]:
sorted_timestamps = sorted(fault_df['Inizio'].values)
starting_date = (sorted_timestamps[-1] - pd.Timedelta(minimum_days_required, unit = 'days')).date()
print(f"Minimum starting date: {starting_date}")

Minimum starting date: 2021-06-06


In [16]:
fault_df = fault_df[fault_df['Inizio'].dt.date >= starting_date]
print(f"Period selected: FROM '{pd.to_datetime(sorted(fault_df['Inizio'].values)[-1]).strftime('%Y-%m-%d')} "\
      f"TO '{pd.to_datetime(sorted(fault_df['Inizio'].values)[0]).strftime('%Y-%m-%d')}' --> "\
      f"{pd.to_datetime(sorted(fault_df['Inizio'].values)[-1]) - pd.to_datetime(sorted(fault_df['Inizio'].values)[0])}")

Period selected: FROM '2021-09-14 TO '2021-06-06' --> 100 days 08:02:00


## C) Create the new data space 

### Generate the timestamps (i.e., dataframe indexes)

In [17]:
timestamps = pd.date_range(
    start =  fault_df['Inizio'].iloc[0].strftime('%Y-%m-%d %H'), 
    end = pd.to_datetime(fault_df['Fine'].iloc[-1].strftime('%Y-%m-%d %H')) + pd.Timedelta(1, unit = 'hour'), 
    freq = "1H"
)

### C.1) Retrieve the number of strings for each string box

In [18]:
file_name = 'stringBoxes_config.json'
file_path = path.join(lstm_folder_path, 'Params', file_name)

In [19]:
try: 
    with open(file_path, 'r') as json_reader:
        stringBoxes_config = json.load(json_reader)
except FileNotFoundError:
    print(f'ISSUE! File not found. A configuration of the string boxes of {system_name} must be provided')
print("-" * 30,"\n" + '-' * 10, system_name, '-' * 10, "\n" +"-" * 30)
print('\n'.join([f"\t{sb}: {item['num_strings']} strings" for sb, item in stringBoxes_config.items()]))

------------------------------ 
---------- Galatina ---------- 
------------------------------
	QC1: 9 strings
	QC2: 12 strings
	QC3: 12 strings
	QC4: 12 strings
	QC5: 12 strings
	QC6: 12 strings


###  C.2) Generate the column names

In [20]:
num_generalBoxes = len(stringBoxes_config.keys())
max_num_strings = np.max([stringBoxes_config[sb]['num_strings'] for sb in stringBoxes_config.keys()])

In [21]:
prefix_gb = "QC"

In [22]:
names = [f'{prefix_gb}{gb}' for gb in range(1, num_generalBoxes + 1)]
names.extend([f'{prefix_gb}{gb}_strings_time' for gb in range(1, num_generalBoxes + 1)])
names.extend([f'{prefix_gb}{gb}_faulty_strings' for gb in range(1, num_generalBoxes + 1)])

# Add a column for the labels (i.e., array that contains the alarm high)
output_col_name = None # FOR TRAINING that was set as 'Labels'

### Set the input/output columns

In [23]:
if system_name == SYSTEM_NAMES[2]:
    input_classes = [
        'Corrente di stringa fuori range', 
        'String-box con produzione anomala'
    ]
    output_classes = [
        'Allarme fusibile su polo negativo', 
        'Allarme fusibile su polo positivo', 
        'Isolamento'
    ]
elif system_name == SYSTEM_NAMES[3]:
    input_classes = [
        'Corrente di stringa fuori range'
    ]
    output_classes = [
        'Isolamento', 
        'String-box con corrente a 0'
    ]
elif system_name == SYSTEM_NAMES[4]:
    input_classes = [
        'Corrente di stringa fuori range'
    ]
    output_classes = [
        'Allarme fusibile su polo negativo', 
        'Allarme fusibile su polo positivo', 
        'Isolamento', 
        'String-box con corrente a 0'
    ]
print("-" * 100 + "\n" + 45 * "-", system_name.upper(), 45 * "-" + "\n" + "-" * 100, "\n")
print("ALARMS used as INPUT\n" + "-" * 40 + "\n-->", '\n--> '.join(input_classes))
print("\nALARMS that will be predicted\n" + "-" * 40 + "\n-->", '\n--> '.join(output_classes))

----------------------------------------------------------------------------------------------------
--------------------------------------------- GALATINA ---------------------------------------------
---------------------------------------------------------------------------------------------------- 

ALARMS used as INPUT
----------------------------------------
--> Corrente di stringa fuori range

ALARMS that will be predicted
----------------------------------------
--> Allarme fusibile su polo negativo
--> Allarme fusibile su polo positivo
--> Isolamento
--> String-box con corrente a 0


### C.3) Generate the inverter names

In [24]:
prefix_inv_name = 'INV'

In [25]:
num_inverters = 4 if (system_name != 'Soleto 2') else 2
inv_names = [prefix_inv_name + str(inv_num) for inv_num in range(1, num_inverters + 1)]
print("[" + system_name.upper() + "] -->",', '.join(inv_names))

[GALATINA] --> INV1, INV2, INV3, INV4


### C.4) Fill the new data space (for each inverter)

In [26]:
inv_stringBoxes_data = dict()

for inv_name in inv_names.copy():
    print("\n" + "-" * 40, inv_name, "-" * 40)

    # 1) Retrieve the failure events concerning the inverter 
    inv_num = int(inv_name[-1])
    inv_alarms = fault_df[fault_df['Inverter'] == inv_num]
    
    # 2) Create the empty dataframe
    print("a) Generating the new space ...")
    inv_stringBoxes_data[inv_name] = pd.DataFrame(data = np.zeros(shape = (len(timestamps), len(names))), 
                                                  index = timestamps, columns = names)
    inv_stringBoxes_data[inv_name] = inv_stringBoxes_data[inv_name].applymap(lambda cell: np.zeros(len(input_classes), 
                                                                                                   dtype = int))
    faulty_cols = [col_name for col_name in names if 'faulty_strings' in col_name]
    inv_stringBoxes_data[inv_name].loc[:, faulty_cols] = inv_stringBoxes_data[inv_name][faulty_cols].applymap(lambda cell: 
                                                                                                              np.zeros(
            shape = (len(input_classes), max_num_strings), 
            dtype = int))
    
    # 3.2) Fill the new dataframe by iterating each alarm log
    print("b) Computing the new dataframe...")
    inv_alarms.apply(func = lambda alarm: 
                     lstm_utils.fill_generalized_stringBoxes_data(alarm, inv_stringBoxes_data[inv_name], input_classes, 
                                                                  output_classes,  output_col_name, prefix_gb, system_name, 
                                                                  verbose = False), 
                     axis = 1)
    
    # 3.2.2) Normalize faulty strings counters
    print("c) Normalizing the number of strings...")
    for col in faulty_cols:
        pre = inv_stringBoxes_data[inv_name][col].iloc[0].copy()
        inv_stringBoxes_data[inv_name].loc[:, col] = inv_stringBoxes_data[inv_name].apply(
            lambda df_row: lstm_utils.normalized_faulty_strings_counter(df_row, col, stringBoxes_config, verbose = False),
            axis = 1)
        print(f"\t|The value within the column '{col}' has been normalized \n\t"\
              f"--> E.g. {inv_stringBoxes_data[inv_name].index[0].strftime('%Y-%m-%d (%H:%M)')} "\
              f"\n\t\t--> FROM {pre.shape}: '{pre.tolist()}' \n\t\t--> TO {inv_stringBoxes_data[inv_name][col].iloc[0].shape}: "\
              f"'{inv_stringBoxes_data[inv_name][col].iloc[0].tolist()}'\n")

    # 4) Check the missing string Boxes
    summed_min = inv_stringBoxes_data[inv_name].iloc[:, 1:].sum(axis = 0).apply(np.sum)
    missing_stringBoxes = summed_min[summed_min == 0].index.tolist()
    print("-" * 80)
    print(f"\nData available for {len(inv_stringBoxes_data[inv_name].columns) - len(missing_stringBoxes)} string boxes "\
          f"({round(((len(inv_stringBoxes_data[inv_name].columns[1:]) - len(missing_stringBoxes))/len(inv_stringBoxes_data[inv_name].columns[1:])) * 100, 2)} %)"\
          f" out of {len(inv_stringBoxes_data[inv_name].columns[1:])} ")
    if len(missing_stringBoxes) > 0:
        print(f"Missing ones ({len(missing_stringBoxes)})\n\t-->", '\n\t--> '.join(missing_stringBoxes))


---------------------------------------- INV1 ----------------------------------------
a) Generating the new space ...
b) Computing the new dataframe...
c) Normalizing the number of strings...
	|The value within the column 'QC1_faulty_strings' has been normalized 
	--> E.g. 2021-06-06 (07:00) 
		--> FROM (1, 12): '[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]' 
		--> TO (1, 1): '[[0.0]]'

	|The value within the column 'QC2_faulty_strings' has been normalized 
	--> E.g. 2021-06-06 (07:00) 
		--> FROM (1, 12): '[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]' 
		--> TO (1, 1): '[[0.0]]'

	|The value within the column 'QC3_faulty_strings' has been normalized 
	--> E.g. 2021-06-06 (07:00) 
		--> FROM (1, 12): '[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]' 
		--> TO (1, 1): '[[0.0]]'

	|The value within the column 'QC4_faulty_strings' has been normalized 
	--> E.g. 2021-06-06 (07:00) 
		--> FROM (1, 12): '[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]' 
		--> TO (1, 1): '[[0.0]]'

	|The value within the column 'QC5_fau

### C.5) Integrate the inverter data 

#### Load the inverter data

In [27]:
dataset_name = '1-hour averaged sampling'

In [28]:
system_path, inv_data, *_ = load_datasets(system_name, subfolder = dataset_name)

-------------------------------------------------------------------------------- 
				PV SYSTEM --> GALATINA 
--------------------------------------------------------------------------------

Loading inverter data...
GALATINA: OK, component data loaded (4) --> INV1, INV2, INV3, INV4
-------------------------------------------------------------------------------- 
FINISHED!: All datasets have been loaded. (SYS: 4 - IRR FILE: 0)
--------------------------------------------------------------------------------
-------------------------------------------------------------------------------- 
EXAMPLE --> Galatina: INV1 (FROM '2019-11-27' TO '2021-06-30': 581 days).
--------------------------------------------------------------------------------
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8818 entries, 0 to 8817
Data columns (total 20 columns):
 #   Column                      Non-Null Count  Dtype         
---  ------                      --------------  -----         
 0   Date/Time  

#### Merge the data

In [29]:
selected_inv_columns = ['Cc 1 (A)', 'Vcc 1 (V)', 'Irradiance (W/mq)', 'Amb. Temp (°C)', 'Humidity (%)', 
                        'Atmospheric Pressure (hPa)', 'Rainfall (mm)']

In [30]:
fill_nan_values = True

In [31]:
verbose = False

In [32]:
for inv_name in inv_names:
    print("\n" + "-" * 110 + "\n" + "-" * 50, inv_name, "-" * 50 + "\n" + "-" * 110, "\n")
    
    # Retrieve the main dataset
    df = inv_stringBoxes_data[inv_name]
    print(f"ALARM LOGS: {len(df)} obs.")
    
    # Retrieve the inverter data
    inv_df = inv_data[inv_name]
    inv_df.index = inv_df['Date/Time']
    
    # Select only the relevant columns concerning the inverter
    inv_df = inv_df[selected_inv_columns]
    print(f"INVERTER DATA: {len(inv_df)} obs.")
    print(f"--> Selected columns ({len(selected_inv_columns)}):\n\t-->", '\n\t--> '.join(selected_inv_columns))
    
    # Merge the data
    merged_df = inv_df.merge(df, how = 'right', left_index = True, right_index = True)
 
    # Chech the NaN values
    empty_ts = set(merged_df[merged_df.isnull().values].index)
    empty_hours = np.array(sorted(Counter(item.strftime('%H') for item in empty_ts).most_common(), key = lambda item: item[0]))
    print(f"--> Missing inverter data: {len(empty_ts)} ({(round((len(empty_ts)/len(merged_df))* 100, 2))} %)")
    
    if verbose:
        plt.figure(figsize = (10, 3))
        sns.barplot(x = [int(hour) for hour in empty_hours[:, 0]], y =  [int(counter) for counter in empty_hours[:, 1]], 
                    color = 'orange')
        plt.title("Missing hours", fontsize = 40, y = 1.05)
        plt.xlabel('Daily hour', fontsize = 20)
        plt.ylabel('Missing instances', fontsize = 15)
        plt.grid()
        plt.show()
    
    # CASE 1: Fille nan values
    if fill_nan_values:
        merged_df.loc[:, 'Cc 1 (A)'].fillna(method = 'ffill', inplace = True) 
        merged_df.loc[:, 'Vcc 1 (V)'].fillna(method = 'ffill',  inplace = True)
        merged_df.loc[:, 'Irradiance (W/mq)'].fillna(method = 'ffill', inplace = True)
        merged_df.loc[:, 'Amb. Temp (°C)'].fillna(method = 'ffill', inplace = True)
        print("--> Filled the (inverter) missing values.")
    
    # Drop observations that was not filled 
    print(f"--> Observations with missing values ({len(merged_df[merged_df.isnull().values])}) have been dropped.\n")
    merged_df.dropna(inplace = True)
    
    if len(merged_df) == 0:
        print("ISSUE: There is not any inverter data!")
        continue
        
    merged_df.info()

    # Assign the merged_df to its inverter
    inv_stringBoxes_data[inv_name] = merged_df


--------------------------------------------------------------------------------------------------------------
-------------------------------------------------- INV1 --------------------------------------------------
-------------------------------------------------------------------------------------------------------------- 

ALARM LOGS: 2411 obs.
INVERTER DATA: 8818 obs.
--> Selected columns (7):
	--> Cc 1 (A)
	--> Vcc 1 (V)
	--> Irradiance (W/mq)
	--> Amb. Temp (°C)
	--> Humidity (%)
	--> Atmospheric Pressure (hPa)
	--> Rainfall (mm)
--> Missing inverter data: 1991 (82.58 %)
--> Filled the (inverter) missing values.
--> Observations with missing values (5973) have been dropped.

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 420 entries, 2021-06-06 07:00:00 to 2021-06-30 22:00:00
Data columns (total 25 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   Cc 1 (A)                    420 non-null    f

### Unpack the new space: create features for each pair of class within the features

In [33]:
for inv_name in inv_names:
    print("-" * 110 + "\n" + "-" * 50, inv_name, "-" * 50 + "\n" + "-" * 110, "\n")
    
    # Retrieve the dataset
    df = inv_stringBoxes_data[inv_name]
    
    # The different cases
    stringBox_cols = [col for col in df.columns if (col not in selected_inv_columns)] # and (col != class_col)
    freq_cols = [col for col in df.columns if 'faulty_strings' in col] 
    
    # Split the data
    partial_dfs = []
    
    # Inverter features
    partial_dfs.append(df[selected_inv_columns])
    
    # B) Split the string box data
    for col in stringBox_cols:
        if 'faulty_strings' in col:
            df_data = df[col].apply(lambda arr: arr[:, 0]).tolist()
        else:
            df_data = df[col].tolist()
            
        partial_dfs.append(pd.DataFrame(
            data = df_data, 
            columns = [f'{col}: {class_name}' for class_name in input_classes], 
            index = df.index
        ))
        
    # C) Build the merged dataframe
    merged_splitted_df = pd.concat(partial_dfs, axis = 1)
    
    # Save the splitted df
    inv_stringBoxes_data[inv_name] = merged_splitted_df
    print("\t\t\t\t\tColumns have been unpacked\n")

--------------------------------------------------------------------------------------------------------------
-------------------------------------------------- INV1 --------------------------------------------------
-------------------------------------------------------------------------------------------------------------- 

					Columns have been unpacked

--------------------------------------------------------------------------------------------------------------
-------------------------------------------------- INV2 --------------------------------------------------
-------------------------------------------------------------------------------------------------------------- 

					Columns have been unpacked

--------------------------------------------------------------------------------------------------------------
-------------------------------------------------- INV3 --------------------------------------------------
------------------------------------------------------

### Remove reduntant features (i.e., unnecessary pairs)

In [34]:
remove_redundant_features = True

In [35]:
redundant_pairs = {
    'Corrente di stringa fuori range': r"^QC\d$",
    'String-box con produzione anomala': r"QC\d_.*" 
}

In [36]:
if remove_redundant_features:
    for inv_name in inv_names:
        print("-" * 110 + "\n" + "-" * 50, inv_name, "-" * 50 + "\n" + "-" * 110, "\n")

        # Retrieve the dataset
        df = inv_stringBoxes_data[inv_name]
    
        cols_to_remove = df.columns.tolist()
        print("TOTAL COLUMNS:", len(cols_to_remove))
        
        # Remove the inverter data
        [cols_to_remove.remove(col_name) for col_name in cols_to_remove.copy() if col_name in selected_inv_columns]
        
        # Select only the unnecessary features (i.e., pairs_to_remove)
        for full_col_name in cols_to_remove.copy():
            component_prefix, alarm = [item.strip() for item in full_col_name.split(':')]

            if alarm in redundant_pairs.keys():
                regex_prefix_to_check = redundant_pairs[alarm]
  
                # Select only features matching the prefix to discard 
                if not match(regex_prefix_to_check, component_prefix):
                    cols_to_remove.remove(full_col_name)
            else:
                # Select only features that is considered as pairs (i.e., pairs_to_remove)
                cols_to_remove.remove(full_col_name)

        # Remove the columns 
        if len(cols_to_remove) > 0:
            print(f"--> Removed {len(cols_to_remove)} columns that were unnecessary/artefacts!")
            print('\t--> ' + '\n\t--> '.join(cols_to_remove))
            print(f"\nCURRENT FEATURES: from {len(df.columns)} to {len(df.columns) - len(cols_to_remove)}\n")
            df.drop(columns = cols_to_remove, inplace = True)
        else:
            print("There are no unnecessary columns!\n")

--------------------------------------------------------------------------------------------------------------
-------------------------------------------------- INV1 --------------------------------------------------
-------------------------------------------------------------------------------------------------------------- 

TOTAL COLUMNS: 25
--> Removed 6 columns that were unnecessary/artefacts!
	--> QC1: Corrente di stringa fuori range
	--> QC2: Corrente di stringa fuori range
	--> QC3: Corrente di stringa fuori range
	--> QC4: Corrente di stringa fuori range
	--> QC5: Corrente di stringa fuori range
	--> QC6: Corrente di stringa fuori range

CURRENT FEATURES: from 25 to 19

--------------------------------------------------------------------------------------------------------------
-------------------------------------------------- INV2 --------------------------------------------------
--------------------------------------------------------------------------------------------

### Fill the empty timestamps

In [37]:
inv_stringBoxes_data[inv_name].head(24)

Unnamed: 0,Cc 1 (A),Vcc 1 (V),Irradiance (W/mq),Amb. Temp (°C),Humidity (%),Atmospheric Pressure (hPa),Rainfall (mm),QC1_strings_time: Corrente di stringa fuori range,QC2_strings_time: Corrente di stringa fuori range,QC3_strings_time: Corrente di stringa fuori range,QC4_strings_time: Corrente di stringa fuori range,QC5_strings_time: Corrente di stringa fuori range,QC6_strings_time: Corrente di stringa fuori range,QC1_faulty_strings: Corrente di stringa fuori range,QC2_faulty_strings: Corrente di stringa fuori range,QC3_faulty_strings: Corrente di stringa fuori range,QC4_faulty_strings: Corrente di stringa fuori range,QC5_faulty_strings: Corrente di stringa fuori range,QC6_faulty_strings: Corrente di stringa fuori range
2021-06-06 07:00:00,28.0,404.0,114.0,22.9,60.62,1011.69,0.0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
2021-06-06 08:00:00,91.0,391.0,252.0,25.67,55.02,1011.35,0.0,0,0,204,154,127,127,0.0,0.0,0.25,0.1667,0.1667,0.1667
2021-06-06 09:00:00,150.0,395.0,366.0,27.58,47.21,1011.26,0.0,0,0,141,128,147,145,0.0,0.0,0.1667,0.1667,0.1667,0.1667
2021-06-06 10:00:00,266.0,395.0,731.0,29.05,39.99,1011.19,0.0,0,0,0,120,102,106,0.0,0.0,0.0,0.1667,0.1667,0.1667
2021-06-06 11:00:00,354.0,386.0,899.0,30.97,33.61,1010.93,0.0,0,0,0,120,113,68,0.0,0.0,0.0,0.1667,0.0833,0.1667
2021-06-06 12:00:00,437.0,378.0,929.0,31.82,33.26,1010.82,0.0,0,0,0,120,8,31,0.0,0.0,0.0,0.1667,0.0833,0.0833
2021-06-06 13:00:00,436.0,371.0,777.0,30.84,32.01,1010.72,0.0,0,0,0,100,0,33,0.0,0.0,0.0,0.1667,0.0,0.0833
2021-06-06 14:00:00,370.0,376.0,710.0,29.58,34.05,1010.33,0.0,0,0,0,172,110,26,0.0,0.0,0.0,0.1667,0.1667,0.1667
2021-06-06 15:00:00,290.0,389.0,505.0,31.1,29.93,1009.93,0.0,0,0,0,120,66,76,0.0,0.0,0.0,0.1667,0.1667,0.1667
2021-06-06 16:00:00,338.0,390.0,751.0,31.26,29.11,1009.6,0.0,0,0,0,110,55,55,0.0,0.0,0.0,0.1667,0.0833,0.0833


In [38]:
fill_empty_timestamps = True

In [39]:
if fill_empty_timestamps:
    for inv_name in inv_names:  
        print("-" * 40, inv_name, "-" * 40)
        
        inv_stringBoxes_data[inv_name] = lstm_utils.fill_empty_ts(inv_stringBoxes_data[inv_name], default_value = 0)
        inv_stringBoxes_data[inv_name].info()

---------------------------------------- INV1 ----------------------------------------
Missing timestamps 172 out of 420
--> SO: The dataframe has been filled with 172 (40.95 %) missing timestamps.

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 592 entries, 2021-06-06 07:00:00 to 2021-06-30 22:00:00
Data columns (total 19 columns):
 #   Column                                               Non-Null Count  Dtype  
---  ------                                               --------------  -----  
 0   Cc 1 (A)                                             592 non-null    float64
 1   Vcc 1 (V)                                            592 non-null    float64
 2   Irradiance (W/mq)                                    592 non-null    float64
 3   Amb. Temp (°C)                                       592 non-null    float64
 4   Humidity (%)                                         592 non-null    float64
 5   Atmospheric Pressure (hPa)                           592 non-null    float64
 6 

### C.8) Standardize data

In [40]:
merged_invs_config = False

#### Read the parameters for standardizing the data

In [41]:
inv_scaler_parameters = dict()

In [42]:
for inv_name in inv_names:
    
    if merged_invs_config:
        file_name = 'All inverters' + '_stdScaler_generalizedApproch.txt'
    else:
        file_name = inv_name + '_stdScaler_generalizedApproch.txt'
        
    params_file_path = path.join(lstm_folder_path, 'Params', file_name)
    
    if path.exists(params_file_path):
        with open(params_file_path, 'r') as params_file:
            lines = params_file.readlines()

            for idk, line in enumerate(lines): 
                if 'standard scaler' in line.lower():
                    if merged_invs_config:
                        inv_name = inv_name
                    else:
                        inv_name = line.strip().split("]")[1].strip()[1:]
                       
                    feature_names = lines[idk + 2].strip().split(",")
                    mean_values = [float(value) for value in lines[idk + 4].strip().split(",")]
                    variance_values = [float(value) for value in lines[idk + 6].strip().split(",")]

                    inv_scaler_parameters[inv_name] = {'features': feature_names, 'mean_values': mean_values, 
                                                       'variance_values': variance_values}
        print(f'[{inv_name}] The parameters have been loaded')
    else:
        print(f"\n[{inv_name}] ISSUE: Information not available.\n")

[INV1] The parameters have been loaded
[INV2] The parameters have been loaded
[INV3] The parameters have been loaded
[INV4] The parameters have been loaded


In [43]:
for inv_name in inv_scaler_parameters.keys(): 
    
    # retrieve the inverter data
    standard_scaler = inv_scaler_parameters[inv_name] 

    # Standardize each feature (i.e., columns )
    for idk_feature, feature in enumerate(standard_scaler['features']):
        if feature in df.columns:
            
            # Retrieve the mean & variance of the feature
            mean = standard_scaler['mean_values'][idk_feature]
            variance = standard_scaler['variance_values'][idk_feature]

            # Apply the standard scaler [z = (x - mean) / std]
            stdScaler = lambda value: (value - mean)/np.sqrt(variance) if np.sqrt(variance) != 0 else 0
            inv_stringBoxes_data[inv_name].loc[:, feature] = inv_stringBoxes_data[inv_name][feature].apply(stdScaler)
        else:
            print("ISSUE! The feature {feature} is not present in the dataset!")
    print(f"\n[{inv_name}] The data has been standardized ({len(standard_scaler['features'])} columns out of {len(df.columns)})")
    print("--> Skipped columns: \n\t" + '\n\t'.join([col for col in df.columns if col not in standard_scaler['features']]))


[INV1] The data has been standardized (13 columns out of 19)
--> Skipped columns: 
	QC1_faulty_strings: Corrente di stringa fuori range
	QC2_faulty_strings: Corrente di stringa fuori range
	QC3_faulty_strings: Corrente di stringa fuori range
	QC4_faulty_strings: Corrente di stringa fuori range
	QC5_faulty_strings: Corrente di stringa fuori range
	QC6_faulty_strings: Corrente di stringa fuori range

[INV2] The data has been standardized (13 columns out of 19)
--> Skipped columns: 
	QC1_faulty_strings: Corrente di stringa fuori range
	QC2_faulty_strings: Corrente di stringa fuori range
	QC3_faulty_strings: Corrente di stringa fuori range
	QC4_faulty_strings: Corrente di stringa fuori range
	QC5_faulty_strings: Corrente di stringa fuori range
	QC6_faulty_strings: Corrente di stringa fuori range

[INV3] The data has been standardized (13 columns out of 19)
--> Skipped columns: 
	QC1_faulty_strings: Corrente di stringa fuori range
	QC2_faulty_strings: Corrente di stringa fuori range
	QC3_f

### End data preparation

In [44]:
for inv_name in inv_names:
    print("\n" + "-" * 110 + "\n" + "-" * 49, "Data:", inv_name, "-" * 49 + "\n" + "-" * 110, "\n")
    inv_stringBoxes_data[inv_name].info()
    display(inv_stringBoxes_data[inv_name])
    # INV1: QC5: String-box con produzione anomala || QC5_strings_time: Corrente di stringa fuori range


--------------------------------------------------------------------------------------------------------------
------------------------------------------------- Data: INV1 -------------------------------------------------
-------------------------------------------------------------------------------------------------------------- 

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 592 entries, 2021-06-06 07:00:00 to 2021-06-30 22:00:00
Data columns (total 19 columns):
 #   Column                                               Non-Null Count  Dtype  
---  ------                                               --------------  -----  
 0   Cc 1 (A)                                             592 non-null    float64
 1   Vcc 1 (V)                                            592 non-null    float64
 2   Irradiance (W/mq)                                    592 non-null    float64
 3   Amb. Temp (°C)                                       592 non-null    float64
 4   Humidity (%)             

Unnamed: 0,Cc 1 (A),Vcc 1 (V),Irradiance (W/mq),Amb. Temp (°C),Humidity (%),Atmospheric Pressure (hPa),Rainfall (mm),QC1_strings_time: Corrente di stringa fuori range,QC2_strings_time: Corrente di stringa fuori range,QC3_strings_time: Corrente di stringa fuori range,QC4_strings_time: Corrente di stringa fuori range,QC5_strings_time: Corrente di stringa fuori range,QC6_strings_time: Corrente di stringa fuori range,QC1_faulty_strings: Corrente di stringa fuori range,QC2_faulty_strings: Corrente di stringa fuori range,QC3_faulty_strings: Corrente di stringa fuori range,QC4_faulty_strings: Corrente di stringa fuori range,QC5_faulty_strings: Corrente di stringa fuori range,QC6_faulty_strings: Corrente di stringa fuori range
2021-06-06 07:00:00,-0.547631,1.097973,-0.412181,1.013493,0.622575,0.767572,-0.096031,-0.435042,0,-0.121029,-0.374062,-0.437216,-0.369733,0.0000,0.0,0.0,0.0000,0.0000,0.0000
2021-06-06 08:00:00,-0.008712,1.068110,0.165718,1.266172,0.459094,0.766875,-0.096031,2.926606,0,-0.121029,0.956181,3.028603,4.992720,0.5556,0.0,0.0,0.1667,0.6667,0.4167
2021-06-06 09:00:00,0.426568,1.048202,0.568183,1.440402,0.231097,0.766690,-0.096031,2.373423,0,-0.121029,1.160834,3.094150,1.262318,0.5556,0.0,0.0,0.1667,0.4167,0.0833
2021-06-06 10:00:00,1.221128,1.033270,1.823738,1.574496,0.020323,0.766546,-0.096031,2.118108,0,-0.121029,0.526410,2.135519,0.718301,0.5556,0.0,0.0,0.1667,0.3333,0.0833
2021-06-06 11:00:00,1.856775,0.963589,2.394758,1.749638,-0.165928,0.766013,-0.096031,2.118108,0,-0.121029,0.147803,0.947473,0.465721,0.5556,0.0,0.0,0.0833,0.2500,0.0833
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-06-30 18:00:00,0.067289,0.928749,0.065961,2.117254,-0.194537,0.756697,-0.096031,-0.435042,0,-0.121029,-0.374062,-0.437216,-0.369733,0.0000,0.0,0.0,0.0000,0.0000,0.0000
2021-06-30 19:00:00,-0.250534,0.963589,-0.222988,1.923868,0.153443,0.757498,-0.096031,-0.435042,0,-0.121029,-0.374062,-0.437216,-0.369733,0.0000,0.0,0.0,0.0000,0.0000,0.0000
2021-06-30 20:00:00,-0.582177,0.973544,-0.515378,1.579057,1.000332,0.757662,-0.096031,-0.435042,0,-0.121029,-0.374062,-0.437216,-0.369733,0.0000,0.0,0.0,0.0000,0.0000,0.0000
2021-06-30 21:00:00,-0.637450,0.097558,-0.639213,1.448612,1.134036,0.757518,-0.096031,-0.435042,0,-0.121029,-0.374062,-0.437216,-0.369733,0.0000,0.0,0.0,0.0000,0.0000,0.0000



--------------------------------------------------------------------------------------------------------------
------------------------------------------------- Data: INV2 -------------------------------------------------
-------------------------------------------------------------------------------------------------------------- 

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 592 entries, 2021-06-06 07:00:00 to 2021-06-30 22:00:00
Data columns (total 19 columns):
 #   Column                                               Non-Null Count  Dtype  
---  ------                                               --------------  -----  
 0   Cc 1 (A)                                             592 non-null    float64
 1   Vcc 1 (V)                                            592 non-null    float64
 2   Irradiance (W/mq)                                    592 non-null    float64
 3   Amb. Temp (°C)                                       592 non-null    float64
 4   Humidity (%)             

Unnamed: 0,Cc 1 (A),Vcc 1 (V),Irradiance (W/mq),Amb. Temp (°C),Humidity (%),Atmospheric Pressure (hPa),Rainfall (mm),QC1_strings_time: Corrente di stringa fuori range,QC2_strings_time: Corrente di stringa fuori range,QC3_strings_time: Corrente di stringa fuori range,QC4_strings_time: Corrente di stringa fuori range,QC5_strings_time: Corrente di stringa fuori range,QC6_strings_time: Corrente di stringa fuori range,QC1_faulty_strings: Corrente di stringa fuori range,QC2_faulty_strings: Corrente di stringa fuori range,QC3_faulty_strings: Corrente di stringa fuori range,QC4_faulty_strings: Corrente di stringa fuori range,QC5_faulty_strings: Corrente di stringa fuori range,QC6_faulty_strings: Corrente di stringa fuori range
2021-06-06 07:00:00,-0.457623,1.244466,-0.280913,1.160454,0.796736,0.949650,-0.08429,-0.255138,-0.207951,-0.210433,-0.23621,-0.186174,-0.116392,0.0,0.0,0.0,0.0,0.0,0.0
2021-06-06 08:00:00,0.125238,1.214463,0.310916,1.409233,0.634977,0.948976,-0.08429,-0.255138,-0.207951,-0.210433,-0.23621,-0.186174,-0.116392,0.0,0.0,0.0,0.0,0.0,0.0
2021-06-06 09:00:00,0.591527,1.179459,0.723082,1.580773,0.409380,0.948797,-0.08429,-0.255138,-0.207951,-0.210433,-0.23621,-0.186174,-0.116392,0.0,0.0,0.0,0.0,0.0,0.0
2021-06-06 10:00:00,1.465818,1.129453,2.008901,1.712797,0.200827,0.948658,-0.08429,-0.255138,-0.207951,-0.210433,-0.23621,-0.186174,-0.116392,0.0,0.0,0.0,0.0,0.0,0.0
2021-06-06 11:00:00,2.172538,1.039442,2.593684,1.885235,0.016537,0.948143,-0.08429,-0.255138,-0.207951,-0.210433,-0.23621,-0.186174,-0.116392,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-06-30 18:00:00,0.205381,1.064445,0.208755,2.247177,-0.011771,0.939140,-0.08429,-0.255138,-0.207951,-0.210433,-0.23621,-0.186174,-0.116392,0.0,0.0,0.0,0.0,0.0,0.0
2021-06-30 19:00:00,-0.137050,1.089448,-0.073068,2.056776,0.332545,0.939913,-0.08429,-0.255138,-0.207951,-0.210433,-0.23621,-0.186174,-0.116392,0.0,0.0,0.0,0.0,0.0,0.0
2021-06-30 20:00:00,-0.508624,1.169458,-0.386596,1.717287,1.170516,0.940072,-0.08429,-0.255138,-0.207951,-0.210433,-0.23621,-0.186174,-0.116392,0.0,0.0,0.0,0.0,0.0,0.0
2021-06-30 21:00:00,-0.566910,0.294354,-0.513417,1.588856,1.302811,0.939933,-0.08429,-0.255138,-0.207951,-0.210433,-0.23621,-0.186174,-0.116392,0.0,0.0,0.0,0.0,0.0,0.0



--------------------------------------------------------------------------------------------------------------
------------------------------------------------- Data: INV3 -------------------------------------------------
-------------------------------------------------------------------------------------------------------------- 

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 592 entries, 2021-06-06 07:00:00 to 2021-06-30 22:00:00
Data columns (total 19 columns):
 #   Column                                               Non-Null Count  Dtype  
---  ------                                               --------------  -----  
 0   Cc 1 (A)                                             592 non-null    float64
 1   Vcc 1 (V)                                            592 non-null    float64
 2   Irradiance (W/mq)                                    592 non-null    float64
 3   Amb. Temp (°C)                                       592 non-null    float64
 4   Humidity (%)             

Unnamed: 0,Cc 1 (A),Vcc 1 (V),Irradiance (W/mq),Amb. Temp (°C),Humidity (%),Atmospheric Pressure (hPa),Rainfall (mm),QC1_strings_time: Corrente di stringa fuori range,QC2_strings_time: Corrente di stringa fuori range,QC3_strings_time: Corrente di stringa fuori range,QC4_strings_time: Corrente di stringa fuori range,QC5_strings_time: Corrente di stringa fuori range,QC6_strings_time: Corrente di stringa fuori range,QC1_faulty_strings: Corrente di stringa fuori range,QC2_faulty_strings: Corrente di stringa fuori range,QC3_faulty_strings: Corrente di stringa fuori range,QC4_faulty_strings: Corrente di stringa fuori range,QC5_faulty_strings: Corrente di stringa fuori range,QC6_faulty_strings: Corrente di stringa fuori range
2021-06-06 07:00:00,-0.508202,1.318491,-0.299405,1.111904,0.733822,0.889401,-0.087673,-0.207126,-0.113618,0,-0.213682,-0.202009,-0.245317,0.0,0.0,0.0,0.0000,0.0000,0.4167
2021-06-06 08:00:00,0.083171,1.159481,0.295457,1.361386,0.572390,0.888723,-0.087673,-0.207126,-0.113618,0,2.186950,8.819533,3.307742,0.0,0.0,0.0,0.0833,0.1667,0.5000
2021-06-06 09:00:00,0.498540,1.174869,0.692032,1.533412,0.347250,0.888544,-0.087673,-0.207126,-0.113618,0,3.771367,5.011739,3.582463,0.0,0.0,0.0,0.0833,0.0833,0.5000
2021-06-06 10:00:00,1.343358,1.144093,1.961768,1.665809,0.139119,0.888404,-0.087673,-0.207126,-0.113618,0,2.667076,3.019970,2.941447,0.0,0.0,0.0,0.0833,0.0833,0.4167
2021-06-06 11:00:00,2.005132,1.067153,2.546194,1.838736,-0.044799,0.887885,-0.087673,-0.207126,-0.113618,0,2.667076,-0.202009,2.941447,0.0,0.0,0.0,0.0833,0.0000,0.4167
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-06-30 18:00:00,0.132452,1.067153,0.184138,2.201701,-0.073049,0.878830,-0.087673,-0.207126,-0.113618,0,-0.213682,-0.202009,1.787619,0.0,0.0,0.0,0.0000,0.0000,0.5000
2021-06-30 19:00:00,-0.212516,1.082541,-0.094160,2.010761,0.270570,0.879608,-0.087673,-0.207126,-0.113618,0,-0.213682,-0.202009,3.490890,0.0,0.0,0.0,0.0000,0.0000,0.5000
2021-06-30 20:00:00,-0.536363,1.128705,-0.403767,1.670312,1.106846,0.879768,-0.087673,-0.207126,-0.113618,0,-0.213682,-0.202009,3.490890,0.0,0.0,0.0,0.0000,0.0000,0.5000
2021-06-30 21:00:00,-0.592684,0.302882,-0.529001,1.541518,1.238874,0.879628,-0.087673,-0.207126,-0.113618,0,-0.213682,-0.202009,3.490890,0.0,0.0,0.0,0.0000,0.0000,0.5000



--------------------------------------------------------------------------------------------------------------
------------------------------------------------- Data: INV4 -------------------------------------------------
-------------------------------------------------------------------------------------------------------------- 

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 348 entries, 2021-06-06 07:00:00 to 2021-06-20 18:00:00
Data columns (total 19 columns):
 #   Column                                               Non-Null Count  Dtype  
---  ------                                               --------------  -----  
 0   Cc 1 (A)                                             348 non-null    float64
 1   Vcc 1 (V)                                            348 non-null    float64
 2   Irradiance (W/mq)                                    348 non-null    float64
 3   Amb. Temp (°C)                                       348 non-null    float64
 4   Humidity (%)             

Unnamed: 0,Cc 1 (A),Vcc 1 (V),Irradiance (W/mq),Amb. Temp (°C),Humidity (%),Atmospheric Pressure (hPa),Rainfall (mm),QC1_strings_time: Corrente di stringa fuori range,QC2_strings_time: Corrente di stringa fuori range,QC3_strings_time: Corrente di stringa fuori range,QC4_strings_time: Corrente di stringa fuori range,QC5_strings_time: Corrente di stringa fuori range,QC6_strings_time: Corrente di stringa fuori range,QC1_faulty_strings: Corrente di stringa fuori range,QC2_faulty_strings: Corrente di stringa fuori range,QC3_faulty_strings: Corrente di stringa fuori range,QC4_faulty_strings: Corrente di stringa fuori range,QC5_faulty_strings: Corrente di stringa fuori range,QC6_faulty_strings: Corrente di stringa fuori range
2021-06-06 07:00:00,-0.392724,1.181921,-0.235491,1.144198,0.695338,0.866092,-0.090443,-0.223758,0,-0.301603,-0.371439,-0.371673,-0.383361,0.0,0.0,0.0000,0.0000,0.0000,0.0000
2021-06-06 08:00:00,0.069536,1.112834,0.259810,1.400928,0.534955,0.865411,-0.090443,-0.223758,0,4.377121,1.631380,3.470594,2.529762,0.0,0.0,0.2500,0.1667,0.1667,0.1667
2021-06-06 09:00:00,0.502446,1.134091,0.668973,1.577952,0.311277,0.865231,-0.090443,-0.223758,0,2.932221,1.293242,4.075675,2.942646,0.0,0.0,0.1667,0.1667,0.1667,0.1667
2021-06-06 10:00:00,1.353592,1.134091,1.979010,1.714195,0.104497,0.865091,-0.090443,-0.223758,0,-0.301603,1.189199,2.714242,2.048065,0.0,0.0,0.0000,0.1667,0.1667,0.1667
2021-06-06 11:00:00,1.999288,1.086262,2.581986,1.892145,-0.078226,0.864571,-0.090443,-0.223758,0,-0.301603,1.189199,3.047037,1.176421,0.0,0.0,0.0000,0.1667,0.0833,0.1667
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-06-20 14:00:00,-0.598173,-0.965101,-0.644654,-0.978229,-1.040814,-1.158791,-0.090443,-0.223758,0,-0.301603,-0.371439,-0.371673,-0.383361,0.0,0.0,0.0000,0.0000,0.0000,0.0000
2021-06-20 15:00:00,-0.598173,-0.965101,-0.644654,-0.978229,-1.040814,-1.158791,-0.090443,-0.223758,0,-0.301603,-0.371439,-0.371673,-0.383361,0.0,0.0,0.0000,0.0000,0.0000,0.0000
2021-06-20 16:00:00,-0.598173,-0.965101,-0.644654,-0.978229,-1.040814,-1.158791,-0.090443,-0.223758,0,-0.301603,-0.371439,-0.371673,-0.383361,0.0,0.0,0.0000,0.0000,0.0000,0.0000
2021-06-20 17:00:00,1.060093,1.128777,1.081724,2.095119,0.052945,0.855884,-0.090443,-0.223758,0,0.019486,-0.033301,1.806620,1.268173,0.0,0.0,0.1667,0.1667,0.1667,0.1667


# D) Load the trained model

## Configurations to load

### Select which type of configurations to load
- Best configurations for each inverter ('avg_config': False)
- Average configurations among the inverters  ('avg_config': True)
- Configurations trained using the data of all the inverters ('merged_invs_config':True)

In [45]:
avg_config = False

### Configurations

In [46]:
if system_name == SYSTEM_NAMES[2]:
    if avg_config:
        config_to_load = {inv_name : {'num_neurons': 64, 'window_length' : 72} for inv_name in inv_names}
    elif merged_invs_config:
         config_to_load = {'All inverters': {'num_neurons': 128, 'window_length': 72}}
    else:
        config_to_load = {
            'INV1': {
                'num_neurons': 92, 
                'window_length': 12
            },
            'INV2': {
                'num_neurons': 32, 
                'window_length':216
            },
            'INV4': {
                'num_neurons': 192, 
                'window_length': 264
            }
        }  
elif system_name == SYSTEM_NAMES[3]:
    if avg_config:
        config_to_load = {inv_name : {'num_neurons': 256, 'window_length' : 120} for inv_name in inv_names}
    elif merged_invs_config:
         config_to_load = {'All inverters': {'num_neurons': 64, 'window_length': 48}}
    else:
        config_to_load = {
            'INV1': {
                'num_neurons': 32, 
                'window_length': 168, 
            },
            'INV2': {
                'num_neurons': 256, 
                'window_length':120,
            }
        }
elif system_name == SYSTEM_NAMES[4]:
    if avg_config:
        config_to_load = {inv_name : {'num_neurons': 256, 'window_length' : 72} for inv_name in inv_names}
    elif merged_invs_config:
         config_to_load = {'All inverters': {'num_neurons': 256, 'window_length': 168}}
    else:
        config_to_load = {
            'INV1': {
                'num_neurons': 128, 
                'window_length':72,
            },
            'INV2': {
                'num_neurons': 128, 
                'window_length':96,
            },
            'INV3': {
                'num_neurons':92, 
                'window_length':216,
            },
            'INV4': {
                'num_neurons':92, 
                'window_length':72,
            }
    }
print("-" * 12, f"PV System: {system_name}", "-" * 12)
print("--> "+ '\n--> '.join([f"{inv_name}: {config['num_neurons']} starting neurons with {config['window_length']} hours" 
                             for inv_name, config in config_to_load.items()]))

------------ PV System: Galatina ------------
--> INV1: 128 starting neurons with 72 hours
--> INV2: 128 starting neurons with 96 hours
--> INV3: 92 starting neurons with 216 hours
--> INV4: 92 starting neurons with 72 hours


## D.2) Load the trained models for each inverter

In [47]:
folder_name = "Trained models - Generalized Version"

In [48]:
trained_inv_model = dict()
for inv_name in inv_names:
    print("-" * 110 + "\n" + "-" * 50, inv_name, "-" * 50 + "\n" + "-" * 110, "\n")

    # Build up the file name
    if merged_invs_config:
        model_name = list(config_to_load.keys())[0]
    else:
        if inv_name in config_to_load.keys():
            model_name = inv_name
        else:
            print(f"\t\t\t[{inv_name}] ISSUE: A trained model is not available.\n")
            continue
    trained_model_folder_name =  f'{model_name}_trained_model'
    
    trained_model_folder_name += "_" + str(config_to_load[model_name]["num_neurons"]) + "N"
    trained_model_folder_name += "_" + str(config_to_load[model_name]["window_length"]) + "H"
    
    # Load the model
    trained_model_folder = path.join(lstm_folder_path, folder_name, trained_model_folder_name)
    if path.exists(trained_model_folder):
        loaded_inv_model = load_model(trained_model_folder)
        loaded_inv_model.summary()

        # Save the loaded model for each inverter
        trained_inv_model[inv_name] = loaded_inv_model
        
        print(f"\n\t\t\t\tThe trained LSTM model (i.e., {model_name}) has been loaded.\n")
    else: 
        print(f"\t\t\t[{inv_name}] ISSUE: A trained model is not available.\n")

--------------------------------------------------------------------------------------------------------------
-------------------------------------------------- INV1 --------------------------------------------------
-------------------------------------------------------------------------------------------------------------- 

Model: "lstm"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_in (LSTM)               (None, 72, 128)           75776     
_________________________________________________________________
lstm_inner1 (LSTM)           (None, 72, 64)            49408     
_________________________________________________________________
lstm_out (LSTM)              (None, 32)                12416     
_________________________________________________________________
output_layer (Dense)         (None, 4)                 132       
Total params: 137,732
Trainable params: 137,732
Non-trainable

# E) Predicted high-priority alarms

In [49]:
last_k_hours = 12

## Detect the high-priority alarms by using the trained model

In [50]:
verbose = True

In [51]:
inv_predicted_classes = dict()
for inv_name in trained_inv_model.keys():
    print("-" * 110 + "\n" + "-" * 50, inv_name, "-" * 50 + "\n" + "-" * 110, "\n")
    
    # Retrieve the temporal window
    model_name = list(config_to_load.keys())[0] if merged_invs_config else inv_name
    model_temporal_window = config_to_load[model_name]['window_length']
    
    # Retrieve the dataset
    df = inv_stringBoxes_data[inv_name]
    print(f"DATA AVAILABLE FROM '{df.index[0].strftime('%Y-%m-%d (%H:%M)')}' TO '{df.index[-1].strftime('%Y-%m-%d (%H:%M)')}' "\
          f"|| {df.index[-1] - df.index[0]}")
    
    # Compute the minimum period -- 
    minimum_starting_ts = df.index[-last_k_hours] - pd.Timedelta(model_temporal_window - 1 , unit = 'hours')
    
    if df.index[0] > minimum_starting_ts:
        print("\n\t\t ISSUE: Data is not enough! Please increase the temporal window")
        print(f"Minimum timestamp required: {minimum_starting_ts} || Given: {df.index[0]}")
        continue
    else:
        df = df.loc[minimum_starting_ts:, :]
        #inv_stringBoxes_data[inv_name] = df

    print(f"--> Temporal window required for the machine-learning model: {model_temporal_window} hours "\
          f"({model_temporal_window//24} days)")
    print(f"--> Period selected: FROM '{df.index[0].strftime('%Y-%m-%d (%H:%M)')}' TO '{df.index[-1].strftime('%Y-%m-%d (%H:%M)')}'"\
          f"|| {df.index[-1] - df.index[0]}")
    
    # Retrieve the trained model
    model = trained_inv_model[inv_name]

    # Prepare the input data 
    input_data, timestamps = lstm_utils.generate_data_sequences_prod_version(df, model_temporal_window, verbose = False)

    if input_data.shape[0] <= 0:
        print("\n\t\t ISSUE: Data is not enough! Please increase the temporal window")
        continue

    # Get the output of the model
    predictions = model.predict(input_data, batch_size = 16, verbose = 0)
   
    if verbose:
        print("\t--> INPUT DATA:", input_data.shape)
        np.set_printoptions(suppress = True)
        print(f"\t--> RAW MODEL OUTPUT:", predictions.shape)
        display(pd.DataFrame(predictions, columns = output_classes, index = timestamps))
                 
    # Turn the probabilities into binary classes
    predicted_classes = np.where(predictions <= 0.5, 0, 1)
    
    # Build the dataframe
    inv_predicted_classes[inv_name] = pd.DataFrame(data = predicted_classes, columns = output_classes, index = timestamps)
    print(f"\n\t\t\t\t[{inv_name}] OK, predicted alarms have been computed.\n")

--------------------------------------------------------------------------------------------------------------
-------------------------------------------------- INV1 --------------------------------------------------
-------------------------------------------------------------------------------------------------------------- 

DATA AVAILABLE FROM '2021-06-06 (07:00)' TO '2021-06-30 (22:00)' || 24 days 15:00:00
--> Temporal window required for the machine-learning model: 72 hours (3 days)
--> Period selected: FROM '2021-06-27 (12:00)' TO '2021-06-30 (22:00)'|| 3 days 10:00:00
	--> INPUT DATA: (12, 72, 19)
	--> RAW MODEL OUTPUT: (12, 4)


Unnamed: 0,Allarme fusibile su polo negativo,Allarme fusibile su polo positivo,Isolamento,String-box con corrente a 0
2021-06-30 11:00:00,9.1e-05,7.2e-05,7.3e-05,0.008469
2021-06-30 12:00:00,9.5e-05,7.3e-05,7.6e-05,0.004681
2021-06-30 13:00:00,0.0001,7.6e-05,8e-05,0.002718
2021-06-30 14:00:00,0.000107,8e-05,8.4e-05,0.001526
2021-06-30 15:00:00,0.000108,8.5e-05,8.7e-05,0.002169
2021-06-30 16:00:00,0.000108,8.8e-05,8.8e-05,0.003318
2021-06-30 17:00:00,0.000108,8.9e-05,8.9e-05,0.003869
2021-06-30 18:00:00,0.00011,9e-05,8.9e-05,0.003484
2021-06-30 19:00:00,0.000111,9.1e-05,8.9e-05,0.003451
2021-06-30 20:00:00,0.000113,9.2e-05,9e-05,0.003872



				[INV1] OK, predicted alarms have been computed.

--------------------------------------------------------------------------------------------------------------
-------------------------------------------------- INV2 --------------------------------------------------
-------------------------------------------------------------------------------------------------------------- 

DATA AVAILABLE FROM '2021-06-06 (07:00)' TO '2021-06-30 (22:00)' || 24 days 15:00:00
--> Temporal window required for the machine-learning model: 96 hours (4 days)
--> Period selected: FROM '2021-06-26 (12:00)' TO '2021-06-30 (22:00)'|| 4 days 10:00:00
	--> INPUT DATA: (12, 96, 19)
	--> RAW MODEL OUTPUT: (12, 4)


Unnamed: 0,Allarme fusibile su polo negativo,Allarme fusibile su polo positivo,Isolamento,String-box con corrente a 0
2021-06-30 11:00:00,6.4e-05,6.1e-05,2.8e-05,0.002916
2021-06-30 12:00:00,6.6e-05,6.2e-05,2.8e-05,0.002537
2021-06-30 13:00:00,6.7e-05,6.3e-05,2.9e-05,0.002454
2021-06-30 14:00:00,6.8e-05,6.4e-05,3e-05,0.002529
2021-06-30 15:00:00,6.9e-05,6.4e-05,3e-05,0.002776
2021-06-30 16:00:00,6.9e-05,6.5e-05,3.1e-05,0.003079
2021-06-30 17:00:00,6.9e-05,6.5e-05,3.1e-05,0.003314
2021-06-30 18:00:00,6.9e-05,6.4e-05,3.1e-05,0.003433
2021-06-30 19:00:00,6.8e-05,6.3e-05,3e-05,0.003666
2021-06-30 20:00:00,6.7e-05,6.3e-05,3e-05,0.004104



				[INV2] OK, predicted alarms have been computed.

--------------------------------------------------------------------------------------------------------------
-------------------------------------------------- INV3 --------------------------------------------------
-------------------------------------------------------------------------------------------------------------- 

DATA AVAILABLE FROM '2021-06-06 (07:00)' TO '2021-06-30 (22:00)' || 24 days 15:00:00
--> Temporal window required for the machine-learning model: 216 hours (9 days)
--> Period selected: FROM '2021-06-21 (12:00)' TO '2021-06-30 (22:00)'|| 9 days 10:00:00
	--> INPUT DATA: (12, 216, 19)
	--> RAW MODEL OUTPUT: (12, 4)


Unnamed: 0,Allarme fusibile su polo negativo,Allarme fusibile su polo positivo,Isolamento,String-box con corrente a 0
2021-06-30 11:00:00,0.000413,0.002763,0.003881,0.771756
2021-06-30 12:00:00,0.000421,0.003431,0.003978,0.873263
2021-06-30 13:00:00,0.000397,0.003741,0.003875,0.894196
2021-06-30 14:00:00,0.000372,0.004064,0.003711,0.915194
2021-06-30 15:00:00,0.000346,0.004176,0.00353,0.919237
2021-06-30 16:00:00,0.000318,0.004257,0.003329,0.917443
2021-06-30 17:00:00,0.000295,0.0042,0.003157,0.908416
2021-06-30 18:00:00,0.000288,0.004482,0.00305,0.925627
2021-06-30 19:00:00,0.00027,0.004254,0.002934,0.90617
2021-06-30 20:00:00,0.000255,0.004003,0.00283,0.880016



				[INV3] OK, predicted alarms have been computed.

--------------------------------------------------------------------------------------------------------------
-------------------------------------------------- INV4 --------------------------------------------------
-------------------------------------------------------------------------------------------------------------- 

DATA AVAILABLE FROM '2021-06-06 (07:00)' TO '2021-06-20 (18:00)' || 14 days 11:00:00
--> Temporal window required for the machine-learning model: 72 hours (3 days)
--> Period selected: FROM '2021-06-17 (08:00)' TO '2021-06-20 (18:00)'|| 3 days 10:00:00
	--> INPUT DATA: (12, 72, 19)
	--> RAW MODEL OUTPUT: (12, 4)


Unnamed: 0,Allarme fusibile su polo negativo,Allarme fusibile su polo positivo,Isolamento,String-box con corrente a 0
2021-06-20 07:00:00,0.0001,9.7e-05,5.8e-05,0.000504
2021-06-20 08:00:00,0.00011,9.4e-05,5.6e-05,0.00025
2021-06-20 09:00:00,0.000114,9.4e-05,5.6e-05,0.000194
2021-06-20 10:00:00,0.000116,9.4e-05,5.6e-05,0.000176
2021-06-20 11:00:00,0.000117,9.4e-05,5.6e-05,0.000169
2021-06-20 12:00:00,0.000117,9.4e-05,5.5e-05,0.000166
2021-06-20 13:00:00,0.000117,9.5e-05,5.5e-05,0.000165
2021-06-20 14:00:00,0.000118,9.5e-05,5.6e-05,0.000167
2021-06-20 15:00:00,0.000117,9.5e-05,5.6e-05,0.000168
2021-06-20 16:00:00,0.000117,9.6e-05,5.6e-05,0.000169



				[INV4] OK, predicted alarms have been computed.



## F) Normalize the predictions

In [52]:
for inv_name in inv_predicted_classes.keys():
    print("-" * 110 + "\n" + "-" * 45, system_name.upper() + ":", inv_name, "-" * 49 + "\n" + "-" * 110)
    
    # Retrieve the predictions
    predicted_classes_df = inv_predicted_classes[inv_name]
    
    if len(predicted_classes_df) > 4:
        print("\n" + "-" * 70 + "\n" + "-" * 19, f"Overview alarms (last {len(predicted_classes_df)} hours)", "-" * 18 + "\n" + "-" * 70)
        total_alarms = predicted_classes_df.sum(axis = 0).to_frame().rename(columns = {0: 'Period counter'})
        display(total_alarms)
        print("\n" + "-" * 70 + "\n" + "-" * 17, f"Hourly timestamps (last {len(predicted_classes_df)} hours)", "-" * 18 + "\n" + "-" * 70)
    display(predicted_classes_df)

--------------------------------------------------------------------------------------------------------------
--------------------------------------------- GALATINA: INV1 -------------------------------------------------
--------------------------------------------------------------------------------------------------------------

----------------------------------------------------------------------
------------------- Overview alarms (last 12 hours) ------------------
----------------------------------------------------------------------


Unnamed: 0,Period counter
Allarme fusibile su polo negativo,0
Allarme fusibile su polo positivo,0
Isolamento,0
String-box con corrente a 0,0



----------------------------------------------------------------------
----------------- Hourly timestamps (last 12 hours) ------------------
----------------------------------------------------------------------


Unnamed: 0,Allarme fusibile su polo negativo,Allarme fusibile su polo positivo,Isolamento,String-box con corrente a 0
2021-06-30 11:00:00,0,0,0,0
2021-06-30 12:00:00,0,0,0,0
2021-06-30 13:00:00,0,0,0,0
2021-06-30 14:00:00,0,0,0,0
2021-06-30 15:00:00,0,0,0,0
2021-06-30 16:00:00,0,0,0,0
2021-06-30 17:00:00,0,0,0,0
2021-06-30 18:00:00,0,0,0,0
2021-06-30 19:00:00,0,0,0,0
2021-06-30 20:00:00,0,0,0,0


--------------------------------------------------------------------------------------------------------------
--------------------------------------------- GALATINA: INV2 -------------------------------------------------
--------------------------------------------------------------------------------------------------------------

----------------------------------------------------------------------
------------------- Overview alarms (last 12 hours) ------------------
----------------------------------------------------------------------


Unnamed: 0,Period counter
Allarme fusibile su polo negativo,0
Allarme fusibile su polo positivo,0
Isolamento,0
String-box con corrente a 0,0



----------------------------------------------------------------------
----------------- Hourly timestamps (last 12 hours) ------------------
----------------------------------------------------------------------


Unnamed: 0,Allarme fusibile su polo negativo,Allarme fusibile su polo positivo,Isolamento,String-box con corrente a 0
2021-06-30 11:00:00,0,0,0,0
2021-06-30 12:00:00,0,0,0,0
2021-06-30 13:00:00,0,0,0,0
2021-06-30 14:00:00,0,0,0,0
2021-06-30 15:00:00,0,0,0,0
2021-06-30 16:00:00,0,0,0,0
2021-06-30 17:00:00,0,0,0,0
2021-06-30 18:00:00,0,0,0,0
2021-06-30 19:00:00,0,0,0,0
2021-06-30 20:00:00,0,0,0,0


--------------------------------------------------------------------------------------------------------------
--------------------------------------------- GALATINA: INV3 -------------------------------------------------
--------------------------------------------------------------------------------------------------------------

----------------------------------------------------------------------
------------------- Overview alarms (last 12 hours) ------------------
----------------------------------------------------------------------


Unnamed: 0,Period counter
Allarme fusibile su polo negativo,0
Allarme fusibile su polo positivo,0
Isolamento,0
String-box con corrente a 0,12



----------------------------------------------------------------------
----------------- Hourly timestamps (last 12 hours) ------------------
----------------------------------------------------------------------


Unnamed: 0,Allarme fusibile su polo negativo,Allarme fusibile su polo positivo,Isolamento,String-box con corrente a 0
2021-06-30 11:00:00,0,0,0,1
2021-06-30 12:00:00,0,0,0,1
2021-06-30 13:00:00,0,0,0,1
2021-06-30 14:00:00,0,0,0,1
2021-06-30 15:00:00,0,0,0,1
2021-06-30 16:00:00,0,0,0,1
2021-06-30 17:00:00,0,0,0,1
2021-06-30 18:00:00,0,0,0,1
2021-06-30 19:00:00,0,0,0,1
2021-06-30 20:00:00,0,0,0,1


--------------------------------------------------------------------------------------------------------------
--------------------------------------------- GALATINA: INV4 -------------------------------------------------
--------------------------------------------------------------------------------------------------------------

----------------------------------------------------------------------
------------------- Overview alarms (last 12 hours) ------------------
----------------------------------------------------------------------


Unnamed: 0,Period counter
Allarme fusibile su polo negativo,0
Allarme fusibile su polo positivo,0
Isolamento,0
String-box con corrente a 0,1



----------------------------------------------------------------------
----------------- Hourly timestamps (last 12 hours) ------------------
----------------------------------------------------------------------


Unnamed: 0,Allarme fusibile su polo negativo,Allarme fusibile su polo positivo,Isolamento,String-box con corrente a 0
2021-06-20 07:00:00,0,0,0,0
2021-06-20 08:00:00,0,0,0,0
2021-06-20 09:00:00,0,0,0,0
2021-06-20 10:00:00,0,0,0,0
2021-06-20 11:00:00,0,0,0,0
2021-06-20 12:00:00,0,0,0,0
2021-06-20 13:00:00,0,0,0,0
2021-06-20 14:00:00,0,0,0,0
2021-06-20 15:00:00,0,0,0,0
2021-06-20 16:00:00,0,0,0,0
