Notebook corresponding to the "Approach-3" presented in the paper.

This is the same approach used in the ["Tinyml anomaly detection for industrial machines with periodic duty cycles" (Sensor Application Symposium 2024)](https://ieeexplore.ieee.org/abstract/document/10636584/), and serves as the baseline experiment.

Two experiments are carried on:
1) As in the SAS2024, the performance is evaluated in leave-one-month-out CV in the original 4 months (called DS1).
2) The generalization is evaluated using the whole DS1 for training and the whole DS2 for testing.

This approach uses two classifiers, one for the internal-states and another one for the duty-cycles.
Therefore, each experiment is divided in four part because some classifier have been trained with different seed initializers. The four combinations are:

 (1) Internal-state classifier with seed -> Duty-cycle classifier with seed.

 (2) Internal-state classifier without seed -> Duty-cycle classifier with seed.

 (3) Internal-state classifier with seed -> Duty-cycle classifier without seed.

 (4) Internal-state classifier without seed -> Duty-cycle classifier without seed.


 The predicted state labels are loaded from the files generated in the approach-2. This avoid train again the same classifiers.

In [1]:
from custom_functions import *

import datetime
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder

# import data, extract feature and preprocessing

input data

In [2]:
directory="../../../data/"
#first 4 months of data (DS1)
data_csv_jun21 = read_month_data(directory+'Confidential_Drive_data_Jun2021.csv',1)
data_csv_okt21 = read_month_data(directory+'Confidential_Drive_data_Okt2021.csv',1)
data_csv_jan22 = read_month_data(directory+'Confidential_Drive_data_Jan2022.csv',1)
data_csv_april22 = read_month_data(directory+'Confidential_Drive_data_April2022.csv',1)
#new 4 months (DS2)
data_csv_jun23 = read_month_data(directory+'Confidential_Drive_data_June2023_Drift20.csv')
data_csv_aug23 = read_month_data(directory+'Confidential_Drive_data_Aug2023_Drift20.csv')
data_csv_okt23 = read_month_data(directory+'Confidential_Drive_data_Oct2023_Drift20.csv')
data_csv_dec23 = read_month_data(directory+'Confidential_Drive_data_Dec2023_Drift20.csv')

#re-order the column name to be consistent with the previous csv files
desired_order=["High-pressure","Low-pressure","Speed"]
data_csv_jun23=data_csv_jun23[desired_order]
data_csv_aug23=data_csv_aug23[desired_order]
data_csv_okt23=data_csv_okt23[desired_order]
data_csv_dec23=data_csv_dec23[desired_order]

#These data has duplicated entries
data_csv_okt23 = data_csv_okt23[~data_csv_okt23.index.duplicated(keep='first')]

# round to zero speed less than zero
data_csv_jun21.loc[data_csv_jun21['Speed'] < 0 , 'Speed'] = 0
data_csv_okt21.loc[data_csv_okt21['Speed'] < 0 , 'Speed'] = 0
data_csv_jan22.loc[data_csv_jan22['Speed'] < 0 , 'Speed'] = 0
data_csv_april22.loc[data_csv_april22['Speed'] < 0 , 'Speed'] = 0
data_csv_jun23.loc[data_csv_jun23['Speed'] < 0 , 'Speed'] = 0
data_csv_aug23.loc[data_csv_aug23['Speed'] < 0 , 'Speed'] = 0
data_csv_okt23.loc[data_csv_okt23['Speed'] < 0 , 'Speed'] = 0
data_csv_dec23.loc[data_csv_dec23['Speed'] < 0 , 'Speed'] = 0


# complete the dataset with missing values
full_timestamp = pd.date_range(start = data_csv_jun21.index[0], end = data_csv_jun21.index[-1],inclusive="both",freq="1min" )
data_csv_jun21 = data_csv_jun21.reindex(full_timestamp)

full_timestamp = pd.date_range(start = data_csv_okt21.index[0], end = data_csv_okt21.index[-1],inclusive="both",freq="1min" )
data_csv_okt21 = data_csv_okt21.reindex(full_timestamp)

full_timestamp = pd.date_range(start = data_csv_jan22.index[0], end = data_csv_jan22.index[-1],inclusive="both",freq="1min" )
data_csv_jan22 = data_csv_jan22.reindex(full_timestamp)

full_timestamp = pd.date_range(start = data_csv_april22.index[0], end = data_csv_april22.index[-1],inclusive="both",freq="1min" )
data_csv_april22 = data_csv_april22.reindex(full_timestamp)

full_timestamp = pd.date_range(start = data_csv_jun23.index[0], end = data_csv_jun23.index[-1],inclusive="both",freq="1min" )
data_csv_jun23 = data_csv_jun23.reindex(full_timestamp)

full_timestamp = pd.date_range(start = data_csv_aug23.index[0], end = data_csv_aug23.index[-1],inclusive="both",freq="1min" )
data_csv_aug23 = data_csv_aug23.reindex(full_timestamp)

full_timestamp = pd.date_range(start = data_csv_okt23.index[0], end = data_csv_okt23.index[-1],inclusive="both",freq="1min" )
data_csv_okt23 = data_csv_okt23.reindex(full_timestamp)

full_timestamp = pd.date_range(start = data_csv_dec23.index[0], end = data_csv_dec23.index[-1],inclusive="both",freq="1min" )
data_csv_dec23 = data_csv_dec23.reindex(full_timestamp)



#use linear interpolation for the NaN missing values
interpolate_values(data_csv_jun21)
interpolate_values(data_csv_okt21)
interpolate_values(data_csv_jan22)
interpolate_values(data_csv_april22)
interpolate_values(data_csv_jun23)
interpolate_values(data_csv_aug23)
interpolate_values(data_csv_okt23)
interpolate_values(data_csv_dec23)

del desired_order, directory, full_timestamp

compute features

In [3]:
list_data_csv = [data_csv_jun21,data_csv_okt21,data_csv_jan22,data_csv_april22,data_csv_jun23,data_csv_aug23,data_csv_okt23,data_csv_dec23]
for data in list_data_csv:
    extract_features(data)

ground truth reference

In [4]:
#read files from imagimob
directory="../../data/"
column_interest=['Time(Seconds)' , 'Length(Seconds)',"Label(string)"]

#read labels of states
file_imagimob_1 = pd.read_csv(directory+"April_2022/Label.label",usecols=column_interest)
file_imagimob_2 = pd.read_csv(directory+"Jan_2022/Label.label",usecols=column_interest)
file_imagimob_3 = pd.read_csv(directory+"Jun_2021/Label.label",usecols=column_interest)
file_imagimob_4 = pd.read_csv(directory+"Okt_2021/Label.label",usecols=column_interest)

timestamps_april2022 = df_timestamps(file_imagimob_1)
timestamps_jan2022 = df_timestamps(file_imagimob_2)
timestamps_jun2021 = df_timestamps(file_imagimob_3)
timestamps_okt2021 = df_timestamps(file_imagimob_4)

#read labels of duty-cycle
file_imagimob_1 = pd.read_csv(directory+"April_2022/Label_cycle.label",usecols=column_interest)
file_imagimob_2 = pd.read_csv(directory+"Jan_2022/Label_cycle.label",usecols=column_interest)
file_imagimob_3 = pd.read_csv(directory+"Jun_2021/Label_cycle.label",usecols=column_interest)
file_imagimob_4 = pd.read_csv(directory+"Okt_2021/Label_cycle.label",usecols=column_interest)

timestamps_cycle_april2022 = df_timestamps(file_imagimob_1)
timestamps_cycle_jan2022 = df_timestamps(file_imagimob_2)
timestamps_cycle_jun2021 = df_timestamps(file_imagimob_3)
timestamps_cycle_okt2021 = df_timestamps(file_imagimob_4)

In [5]:
#generate vector with the labels of reference (states)
downsampled_freq='1T'
true_label_april22 = ndarray_labels(datetime.datetime(2022, 4, 1),datetime.datetime(2022, 5, 1),timestamps_april2022,downsampled_freq)
true_label_jan22 = ndarray_labels(datetime.datetime(2021, 12, 21),datetime.datetime(2022, 1, 21),timestamps_jan2022,downsampled_freq)
true_label_jun21 = ndarray_labels(datetime.datetime(2021, 6, 1),datetime.datetime(2021, 7, 1),timestamps_jun2021,downsampled_freq)
true_label_okt21 = ndarray_labels(datetime.datetime(2021, 10, 1),datetime.datetime(2021, 11, 1),timestamps_okt2021,downsampled_freq)

#generate vector with the labels of reference (duty-cycle)
true_label_cycle_april22 = ndarray_labels(datetime.datetime(2022, 4, 1),datetime.datetime(2022, 5, 1),timestamps_cycle_april2022,downsampled_freq)
true_label_cycle_jan22 = ndarray_labels(datetime.datetime(2021, 12, 21),datetime.datetime(2022, 1, 21),timestamps_cycle_jan2022,downsampled_freq)
true_label_cycle_jun21 = ndarray_labels(datetime.datetime(2021, 6, 1),datetime.datetime(2021, 7, 1),timestamps_cycle_jun2021,downsampled_freq)
true_label_cycle_okt21 = ndarray_labels(datetime.datetime(2021, 10, 1),datetime.datetime(2021, 11, 1),timestamps_cycle_okt2021,downsampled_freq)

true_label_cycle_april22 = np.where(true_label_cycle_april22 == None, 'No_cycle', true_label_cycle_april22)
true_label_cycle_jan22 = np.where(true_label_cycle_jan22 == None, 'No_cycle', true_label_cycle_jan22)
true_label_cycle_jun21 = np.where(true_label_cycle_jun21 == None, 'No_cycle', true_label_cycle_jun21)
true_label_cycle_okt21 = np.where(true_label_cycle_okt21 == None, 'No_cycle', true_label_cycle_okt21)

imput ground-truth duty-cycle labels

In [6]:
#read files from imagimob
directory="../../data/"
#read labels of duty-cycle
labels_jun21 = import_cycle_labels(directory+"Jun_2021/Label_cycle.label")
labels_okt21 = import_cycle_labels(directory+"Okt_2021/Label_cycle.label")
labels_jan22 = import_cycle_labels(directory+"Jan_2022/Label_cycle.label")
labels_april22 = import_cycle_labels(directory+"April_2022/Label_cycle.label")
labels_jun23 = import_cycle_labels(directory+"June_23/Label_cycle.label")
labels_aug23 = import_cycle_labels(directory+"Aug_23/Label_cycle.label")
labels_okt23 = import_cycle_labels(directory+"Okt_23/Label_cycle.label")
labels_dec23 = import_cycle_labels(directory+"Dec_23/Label_cycle.label")

for data in [labels_jun23,labels_aug23,labels_okt23,labels_dec23]:
    replace_labels_cycles(data)

 Data preparation and pre-processing

In [7]:
true_label_jun21 [true_label_jun21=='E']='B'
true_label_okt21 [true_label_okt21=='E']='B'
true_label_jan22 [true_label_jan22=='E']='B'
true_label_april22 [true_label_april22=='E']='B'

data_DS1=[data_csv_jun21, data_csv_okt21,data_csv_jan22,data_csv_april22]
data_DS2=[data_csv_jun23, data_csv_aug23,data_csv_okt23,data_csv_dec23]

true_state_labels_DS1=[true_label_jun21, true_label_okt21,true_label_jan22,true_label_april22]
timestamps_cycle_DS1 = [timestamps_cycle_jun2021,timestamps_cycle_okt2021,timestamps_cycle_jan2022,timestamps_cycle_april2022]

df_testset_DS2= pd.concat(data_DS2)

file_name_states_DS1= ["jun2021_state.txt" ,"okt2021_state.txt","jan2022_state.txt","april2022_state.txt"]
file_name_cycles_DS1= ["jun2021_cycle.txt" ,"okt2021_cycle.txt","jan2022_cycle.txt","april2022_cycle.txt"]

scaler = MinMaxScaler()

dir_exp1 = "./results/approach3/DS1/"
dir_exp2 = "./results/approach3/DS2/"

flag_save_results=True

delete not requires variables

In [8]:
del timestamps_april2022, timestamps_jan2022, timestamps_jun2021, timestamps_okt2021
del file_imagimob_1,file_imagimob_2,file_imagimob_3,file_imagimob_4, column_interest, directory

In [9]:
#define multiples variables
delta = pd.Timedelta(minutes=3)

# Encode the non-consecutive labels and the labels for the classifier
le_cycle = LabelEncoder()
le_cycle.fit(['Normal','Abnormal'])
le_state = LabelEncoder()
le_state.fit(['Z','A','B','C','D','None'])

scaler_cycle = MinMaxScaler()

dir_exp1_approach2 = "./results/approach2/DS1/"
dir_exp2_approach2 = "./results/approach2/DS2/"
dir_exp1_approach3 = "./results/approach3/DS1/"
dir_exp2_approach3 = "./results/approach3/DS2/"

flag_save_results=True

# Experiment 1
Train/test on DS1 using leave-one-month CV

In [None]:
def leave_one_month_out_data_split(i,data_DS1,true_state_labels_DS1,delta):
    dfs_ref_state= pd.concat([data for j, data in enumerate(data_DS1) if j != i])
    dfs_ref_state["ref_label"]= np.concatenate([data for j, data in enumerate(true_state_labels_DS1) if j != i])

    dfs_ref_cycle = pd.concat([timestamps for j, timestamps in enumerate(timestamps_cycle_DS1) if j != i])

    train_sequences = find_non_consecutive_sequences(dfs_ref_state, dfs_ref_cycle,delta,column_name="ref_label")

    # # Determine the maximum sequence length for padding and add some extra value
    max_sequence_length = max(len(seq['non_consecutive_labels']) for seq in train_sequences)+3

    # # Encode the sequences for training
    train_features = [le_state.transform(seq['non_consecutive_labels']) for seq in train_sequences]
    X_res = pad_sequences(train_features, max_sequence_length)
    y_res = le_cycle.transform([seq['label'] for seq in train_sequences])

    unique_values, counts = np.unique(y_res, return_counts=True)
    value_counts = dict(zip(unique_values, counts))
    value_porcentages = dict(zip(unique_values, counts/sum(counts)*100))
    print("Value class-counts in Balanced dataset:",value_counts)
    print("Value class-porcentage in Balanced dataset:",value_porcentages)

    x_test=data_DS1[i].copy()
    x_test['detected_cycles']=x_test["Speed_order3"].apply(lambda x: detect_cycle(x))
    x_test["ref_state_label"]=true_state_labels_DS1[i]
    
    return X_res, y_res, x_test ,max_sequence_length

Since the results path depend on if the classifier use or not the seed parameters, there are 4 combination of classifiers: "classifiers_state" w/ and w/o seed and "classifier_cycle" w/ and w/o seed.

1) classifier_cycle with seed - classifier_state with seed

In [None]:
seeds=list(range(0,10))
classifiers=["rf","dt","xtree","mlp"]
for seed_state in seeds:
    for classifier_state in classifiers:
        folder_path = dir_exp1_approach2+classifier_state+"/"+str(seed_state)+"/"
        aux= df_timestamps(pd.read_csv(folder_path+file_name_states_DS1[0],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
        recog_label_jun21 = ndarray_labels(data_DS1[0].index[0],data_DS1[0].index[-1],aux,downsampled_freq)
        aux = df_timestamps(pd.read_csv(folder_path+file_name_states_DS1[1],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
        recog_label_okt21 = ndarray_labels(data_DS1[1].index[0],data_DS1[1].index[-1],aux,downsampled_freq)
        aux = df_timestamps(pd.read_csv(folder_path+file_name_states_DS1[2],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
        recog_label_jan22 = ndarray_labels(data_DS1[2].index[0],data_DS1[2].index[-1],aux,downsampled_freq)
        aux = df_timestamps(pd.read_csv(folder_path+file_name_states_DS1[3],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
        recog_label_april22 = ndarray_labels(data_DS1[3].index[0],data_DS1[3].index[-1],aux,downsampled_freq)
        recog_state_labels_DS1=[recog_label_jun21, recog_label_okt21,recog_label_jan22,recog_label_april22]                        

        for i in range(len(data_DS1)):
            x_res, y_res,x_test,max_sequence_length = leave_one_month_out_data_split(i,data_DS1,recog_state_labels_DS1,delta)

            for classifier_cycle in classifiers:
                for seed_cycle in seeds:
                    clf_duty = train_cycle_classifier(classifier_cycle,x_res, y_res,seed_cycle)
                    x_test["recognized_label"] = recog_state_labels_DS1[i]

                    detection_x_test = boundaries_cycles(x_test)

                    data_sequences = find_non_consecutive_sequences(x_test, detection_x_test,delta,column_name="recognized_label")
                    test_features = [le_state.transform(seq['non_consecutive_labels']) for seq in data_sequences]
                    test_features_padded = pad_sequences(test_features, max_sequence_length)

                    #test
                    y_pred = clf_duty.predict(test_features_padded)

                    # save the results in a file
                    df_temp= pd.DataFrame()
                    df_temp["start"]=detection_x_test["start"]
                    df_temp["end"]=detection_x_test["end"]
                    df_temp["label"]=le_cycle.inverse_transform(y_pred)

                    if flag_save_results:
                        folder_path = dir_exp1_approach3+classifier_cycle+"/"+str(seed_cycle)+"/"+classifier_state+"/"+str(seed_state)+"/"
                        os.makedirs(folder_path, exist_ok=True)

                        create_reference_label_file(folder_path+file_name_cycles_DS1[i],df_temp)

Value class-counts in Balanced dataset: {0: 85, 1: 390}
Value class-porcentage in Unbalanced dataset: {0: 17.894736842105264, 1: 82.10526315789474}
Fitting 5 folds for each of 20 candidates, totalling 100 fits
File saved: ./results/approach3/DS1/rf/0/rf/0/jun2021_cycle.txt
Fitting 5 folds for each of 20 candidates, totalling 100 fits
File saved: ./results/approach3/DS1/rf/1/rf/0/jun2021_cycle.txt
Fitting 5 folds for each of 20 candidates, totalling 100 fits


KeyboardInterrupt: 

2) classifier_cycle with seed - classifier_state without seed

In [None]:
seeds=list(range(0,10))
classifiers=["rf","dt","xtree","mlp"]
classifiers_cycle=["rf","dt","xtree","mlp"]
classifiers_state=["xgboost","nb"]


for classifier_state in classifiers_state:
    folder_path = dir_exp1_approach2+classifier_state+"/"
    aux= df_timestamps(pd.read_csv(folder_path+file_name_states_DS1[0],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
    recog_label_jun21 = ndarray_labels(data_DS1[0].index[0],data_DS1[0].index[-1],aux,downsampled_freq)
    aux = df_timestamps(pd.read_csv(folder_path+file_name_states_DS1[1],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
    recog_label_okt21 = ndarray_labels(data_DS1[1].index[0],data_DS1[1].index[-1],aux,downsampled_freq)
    aux = df_timestamps(pd.read_csv(folder_path+file_name_states_DS1[2],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
    recog_label_jan22 = ndarray_labels(data_DS1[2].index[0],data_DS1[2].index[-1],aux,downsampled_freq)
    aux = df_timestamps(pd.read_csv(folder_path+file_name_states_DS1[3],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
    recog_label_april22 = ndarray_labels(data_DS1[3].index[0],data_DS1[3].index[-1],aux,downsampled_freq)
    recog_state_labels_DS1=[recog_label_jun21, recog_label_okt21,recog_label_jan22,recog_label_april22]                        

    for i in range(len(data_DS1)):
        x_res, y_res,x_test,max_sequence_length = leave_one_month_out_data_split(i,data_DS1,recog_state_labels_DS1,delta)
        
        for seed_cycle in seeds:
            for classifier_cycle in classifiers_cycle:
                clf_duty = train_cycle_classifier(classifier_cycle,x_res, y_res,seed_cycle)
                x_test["recognized_label"] = recog_state_labels_DS1[i]

                detection_x_test = boundaries_cycles(x_test)

                data_sequences = find_non_consecutive_sequences(x_test, detection_x_test,delta,column_name="recognized_label")
                test_features = [le_state.transform(seq['non_consecutive_labels']) for seq in data_sequences]
                test_features_padded = pad_sequences(test_features, max_sequence_length)

                #test
                y_pred = clf_duty.predict(test_features_padded)

                # save the results in a file
                df_temp= pd.DataFrame()
                df_temp["start"]=detection_x_test["start"]
                df_temp["end"]=detection_x_test["end"]
                df_temp["label"]=le_cycle.inverse_transform(y_pred)

                if flag_save_results:
                    folder_path = dir_exp1_approach3+classifier_cycle+"/"+str(seed_cycle)+"/"+classifier_state+"/"
                    os.makedirs(folder_path, exist_ok=True)

                    create_reference_label_file(folder_path+file_name_cycles_DS1[i],df_temp)

Value class-counts in Balanced dataset: {0: 85, 1: 390}
Value class-porcentage in Unbalanced dataset: {0: 17.894736842105264, 1: 82.10526315789474}
Fitting 5 folds for each of 20 candidates, totalling 100 fits
Best parameters: {'max_depth': 4, 'n_estimators': 25}
Best score: 0.9169770739412535
Fitting 5 folds for each of 144 candidates, totalling 720 fits
Best parameters: {'criterion': 'gini', 'max_depth': None, 'max_features': 'sqrt', 'min_samples_split': 20}
Best score: 0.9150701237623882
Fitting 5 folds for each of 12 candidates, totalling 60 fits
Best parameters: {'max_depth': 4, 'n_estimators': 10}
Best score: 0.9127658341141345
Fitting 5 folds for each of 36 candidates, totalling 180 fits
Best parameters: {'hidden_layer_sizes': (4,), 'learning_rate_init': 0.001}
Best score: 0.901636286812599
Fitting 5 folds for each of 20 candidates, totalling 100 fits
Best parameters: {'max_depth': 6, 'n_estimators': 10}
Best score: 0.9101235078110488
Fitting 5 folds for each of 144 candidates, 

3) classifier_cycle without seed - classifier_state with seed

In [None]:
seeds=list(range(0,10))
classifiers=["rf","dt","xtree","mlp"]
classifiers_cycle=["xgboost","nb"]
classifiers_state=["rf","dt","xtree","mlp"]

for seed in seeds:
    for classifier_state in classifiers_state:
        folder_path = dir_exp1_approach2+classifier_state+"/"+str(seed)+"/"
        aux= df_timestamps(pd.read_csv(folder_path+file_name_states_DS1[0],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
        recog_label_jun21 = ndarray_labels(data_DS1[0].index[0],data_DS1[0].index[-1],aux,downsampled_freq)
        aux = df_timestamps(pd.read_csv(folder_path+file_name_states_DS1[1],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
        recog_label_okt21 = ndarray_labels(data_DS1[1].index[0],data_DS1[1].index[-1],aux,downsampled_freq)
        aux = df_timestamps(pd.read_csv(folder_path+file_name_states_DS1[2],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
        recog_label_jan22 = ndarray_labels(data_DS1[2].index[0],data_DS1[2].index[-1],aux,downsampled_freq)
        aux = df_timestamps(pd.read_csv(folder_path+file_name_states_DS1[3],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
        recog_label_april22 = ndarray_labels(data_DS1[3].index[0],data_DS1[3].index[-1],aux,downsampled_freq)
        recog_state_labels_DS1=[recog_label_jun21, recog_label_okt21,recog_label_jan22,recog_label_april22]                        

        for i in range(len(data_DS1)):
            x_res, y_res,x_test,max_sequence_length = leave_one_month_out_data_split(i,data_DS1,recog_state_labels_DS1,delta)

            for classifier_cycle in classifiers_cycle:
                clf_duty = train_cycle_classifier(classifier_cycle,x_res, y_res)
                x_test["recognized_label"] = recog_state_labels_DS1[i]

                detection_x_test = boundaries_cycles(x_test)

                data_sequences = find_non_consecutive_sequences(x_test, detection_x_test,delta,column_name="recognized_label")
                test_features = [le_state.transform(seq['non_consecutive_labels']) for seq in data_sequences]
                test_features_padded = pad_sequences(test_features, max_sequence_length)

                #test
                y_pred = clf_duty.predict(test_features_padded)

                # save the results in a file
                df_temp= pd.DataFrame()
                df_temp["start"]=detection_x_test["start"]
                df_temp["end"]=detection_x_test["end"]
                df_temp["label"]=le_cycle.inverse_transform(y_pred)

                if flag_save_results:
                    folder_path = dir_exp1_approach3+classifier_cycle+"/"+classifier_state+"/"+str(seed)+"/"
                    os.makedirs(folder_path, exist_ok=True)

                    create_reference_label_file(folder_path+file_name_cycles_DS1[i],df_temp)

Value class-counts in Balanced dataset: {0: 85, 1: 390}
Value class-porcentage in Unbalanced dataset: {0: 17.894736842105264, 1: 82.10526315789474}
Fitting 5 folds for each of 36 candidates, totalling 180 fits
Best parameters: {'learning_rate': 0.2, 'max_depth': 4, 'n_estimators': 10}
Best score: 0.9178172186203375
Value class-counts in Balanced dataset: {0: 89, 1: 344}
Value class-porcentage in Unbalanced dataset: {0: 20.554272517321014, 1: 79.44572748267899}
Fitting 5 folds for each of 36 candidates, totalling 180 fits
Best parameters: {'learning_rate': 0.1, 'max_depth': 4, 'n_estimators': 25}
Best score: 0.9078864314223812
Value class-counts in Balanced dataset: {0: 62, 1: 359}
Value class-porcentage in Unbalanced dataset: {0: 14.726840855106888, 1: 85.2731591448931}
Fitting 5 folds for each of 36 candidates, totalling 180 fits
Best parameters: {'learning_rate': 0.1, 'max_depth': 4, 'n_estimators': 25}
Best score: 0.9335313496027782
Value class-counts in Balanced dataset: {0: 85, 1:

4) classifier_cycle without seed - classifier_state without seed

In [None]:
seeds=list(range(0,10))
classifiers=["xgboost","nb"]

for classifier_state in classifiers:
    folder_path = dir_exp1_approach2+classifier_state+"/"
    aux= df_timestamps(pd.read_csv(folder_path+file_name_states_DS1[0],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
    recog_label_jun21 = ndarray_labels(data_DS1[0].index[0],data_DS1[0].index[-1],aux,downsampled_freq)
    aux = df_timestamps(pd.read_csv(folder_path+file_name_states_DS1[1],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
    recog_label_okt21 = ndarray_labels(data_DS1[1].index[0],data_DS1[1].index[-1],aux,downsampled_freq)
    aux = df_timestamps(pd.read_csv(folder_path+file_name_states_DS1[2],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
    recog_label_jan22 = ndarray_labels(data_DS1[2].index[0],data_DS1[2].index[-1],aux,downsampled_freq)
    aux = df_timestamps(pd.read_csv(folder_path+file_name_states_DS1[3],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
    recog_label_april22 = ndarray_labels(data_DS1[3].index[0],data_DS1[3].index[-1],aux,downsampled_freq)
    recog_state_labels_DS1=[recog_label_jun21, recog_label_okt21,recog_label_jan22,recog_label_april22]                        

    for i in range(len(data_DS1)):
        x_res, y_res,x_test,max_sequence_length = leave_one_month_out_data_split(i,data_DS1,recog_state_labels_DS1,delta)

        for classifier_cycle in classifiers:
            clf_duty = train_cycle_classifier(classifier_cycle,x_res, y_res)
            x_test["recognized_label"] = recog_state_labels_DS1[i]

            detection_x_test = boundaries_cycles(x_test)

            data_sequences = find_non_consecutive_sequences(x_test, detection_x_test,delta,column_name="recognized_label")
            test_features = [le_state.transform(seq['non_consecutive_labels']) for seq in data_sequences]
            test_features_padded = pad_sequences(test_features, max_sequence_length)

            #test
            y_pred = clf_duty.predict(test_features_padded)

            # save the results in a file
            df_temp= pd.DataFrame()
            df_temp["start"]=detection_x_test["start"]
            df_temp["end"]=detection_x_test["end"]
            df_temp["label"]=le_cycle.inverse_transform(y_pred)

            if flag_save_results:
                folder_path = dir_exp1_approach3+classifier_cycle+"/"+classifier_state+"/"
                os.makedirs(folder_path, exist_ok=True)

                create_reference_label_file(folder_path+file_name_cycles_DS1[i],df_temp)

Value class-counts in Balanced dataset: {0: 85, 1: 390}
Value class-porcentage in Unbalanced dataset: {0: 17.894736842105264, 1: 82.10526315789474}
Fitting 5 folds for each of 36 candidates, totalling 180 fits
Best parameters: {'learning_rate': 0.1, 'max_depth': 4, 'n_estimators': 50}
Best score: 0.9069918262862959
Value class-counts in Balanced dataset: {0: 89, 1: 344}
Value class-porcentage in Unbalanced dataset: {0: 20.554272517321014, 1: 79.44572748267899}
Fitting 5 folds for each of 36 candidates, totalling 180 fits
Best parameters: {'learning_rate': 0.1, 'max_depth': 4, 'n_estimators': 25}
Best score: 0.8944149409273192
Value class-counts in Balanced dataset: {0: 62, 1: 359}
Value class-porcentage in Unbalanced dataset: {0: 14.726840855106888, 1: 85.2731591448931}
Fitting 5 folds for each of 36 candidates, totalling 180 fits
Best parameters: {'learning_rate': 0.2, 'max_depth': 4, 'n_estimators': 50}
Best score: 0.9172062132199207
Value class-counts in Balanced dataset: {0: 85, 1:

## performance

In [None]:
df_results_exp1 = pd.DataFrame(columns=['cycle_classifier', 'state_classifier', 
                                        'Abnormal mean F1-score','Abnormal mean Precision','Abnormal mean Recall',
                                        'Normal mean F1-score','Normal mean Precision','Normal mean Recall',
                                        'Overall mean F1-score','Overall mean Precision','Overall mean Recall',
                                        'Abnormal std F1-score','Abnormal std Precision','Abnormal std Recall',
                                        'Normal std F1-score','Normal std Precision','Normal std Recall',
                                        'Overall std F1-score','Overall std Precision','Overall std Recall'])

seeds=list(range(0,10))
classifiers=["xgboost","rf","dt","nb","xtree","mlp"]
classifiers_w_seed=["rf","dt","xtree","mlp"]
classifiers_wo_seed=["xgboost","nb"]
dir = './results/reference_cycle_labels/'
reference_path = dir
collar = 202.75


# 1) classifier_cycle with seed - classifier_state with seed
for classifier_cycle in classifiers_w_seed:
    for classifier_state in classifiers_w_seed:
        detection_files_abnormal, f1score_files_abnormal, precision_files_abnormal, recall_files_abnormal = [],[],[],[]
        detection_files_normal, f1score_files_normal, precision_files_normal, recall_files_normal = [],[],[],[]
        f1score_files_overall, precision_files_overall, recall_files_overall = [],[],[]
        for seed_cycle in seeds:
            for seed_state in seeds:
                result_path = dir_exp1_approach3+classifier_cycle+"/"+str(seed_cycle)+"/"+classifier_state+"/"+str(seed_state)+"/"
                f1score_file, precision_file, recall_file, f1score_abnormal, precision_abnormal, recall_abnormal, f1score_normal, precision_normal, recall_normal = compute_classification_sedeval(reference_path,result_path,collar)
                f1score_files_overall.append(f1score_file)
                precision_files_overall.append(precision_file)
                recall_files_overall.append(recall_file)
                f1score_files_abnormal.append(f1score_abnormal)
                precision_files_abnormal.append(precision_abnormal)
                recall_files_abnormal.append(recall_abnormal)
                f1score_files_normal.append(f1score_normal)
                precision_files_normal.append(precision_normal)
                recall_files_normal.append(recall_normal)

        dflocal = pd.DataFrame({'cycle_classifier':classifier_cycle, 'state_classifier':classifier_state, 
                            'Abnormal mean F1-score':np.mean(f1score_files_abnormal)*100,
                            'Abnormal mean Precision':np.mean(precision_files_abnormal)*100,
                            'Abnormal mean Recall':np.mean(recall_files_abnormal)*100,
                            'Normal mean F1-score':np.mean(f1score_files_normal)*100,
                            'Normal mean Precision':np.mean(precision_files_normal)*100,
                            'Normal mean Recall':np.mean(recall_files_normal)*100,
                            'Overall mean F1-score':np.mean(f1score_files_overall)*100,
                            'Overall mean Precision':np.mean(precision_files_overall)*100,
                            'Overall mean Recall':np.mean(recall_files_overall)*100,
                            'Abnormal std F1-score':np.std(f1score_files_abnormal)*100,
                            'Abnormal std Precision':np.std(precision_files_abnormal)*100,
                            'Abnormal std Recall':np.std(recall_files_abnormal)*100,
                            'Normal std F1-score':np.std(f1score_files_normal)*100,
                            'Normal std Precision':np.std(precision_files_normal)*100,
                            'Normal std Recall':np.std(recall_files_normal)*100,
                            'Overall std F1-score':np.std(f1score_files_overall)*100,
                            'Overall std Precision':np.std(precision_files_overall)*100,
                            'Overall std Recall':np.std(recall_files_overall)*100},index=[0])
        
        df_results_exp1 = pd.concat([df_results_exp1, dflocal], ignore_index=True)

# 2) classifier_cycle with seed - classifier_state without seed
for classifier_cycle in classifiers_w_seed:
    for classifier_state in classifiers_wo_seed:
        detection_files_abnormal, f1score_files_abnormal, precision_files_abnormal, recall_files_abnormal = [],[],[],[]
        detection_files_normal, f1score_files_normal, precision_files_normal, recall_files_normal = [],[],[],[]
        f1score_files_overall, precision_files_overall, recall_files_overall = [],[],[]
        for seed_cycle in seeds:
            result_path = dir_exp1_approach3+classifier_cycle+"/"+str(seed_cycle)+"/"+classifier_state+"/"
            f1score_file, precision_file, recall_file, f1score_abnormal, precision_abnormal, recall_abnormal, f1score_normal, precision_normal, recall_normal = compute_classification_sedeval(reference_path,result_path,collar)
            f1score_files_overall.append(f1score_file)
            precision_files_overall.append(precision_file)
            recall_files_overall.append(recall_file)
            f1score_files_abnormal.append(f1score_abnormal)
            precision_files_abnormal.append(precision_abnormal)
            recall_files_abnormal.append(recall_abnormal)
            f1score_files_normal.append(f1score_normal)
            precision_files_normal.append(precision_normal)
            recall_files_normal.append(recall_normal)

        dflocal = pd.DataFrame({'cycle_classifier':classifier_cycle, 'state_classifier':classifier_state, 
                            'Abnormal mean F1-score':np.mean(f1score_files_abnormal)*100,
                            'Abnormal mean Precision':np.mean(precision_files_abnormal)*100,
                            'Abnormal mean Recall':np.mean(recall_files_abnormal)*100,
                            'Normal mean F1-score':np.mean(f1score_files_normal)*100,
                            'Normal mean Precision':np.mean(precision_files_normal)*100,
                            'Normal mean Recall':np.mean(recall_files_normal)*100,
                            'Overall mean F1-score':np.mean(f1score_files_overall)*100,
                            'Overall mean Precision':np.mean(precision_files_overall)*100,
                            'Overall mean Recall':np.mean(recall_files_overall)*100,
                            'Abnormal std F1-score':np.std(f1score_files_abnormal)*100,
                            'Abnormal std Precision':np.std(precision_files_abnormal)*100,
                            'Abnormal std Recall':np.std(recall_files_abnormal)*100,
                            'Normal std F1-score':np.std(f1score_files_normal)*100,
                            'Normal std Precision':np.std(precision_files_normal)*100,
                            'Normal std Recall':np.std(recall_files_normal)*100,
                            'Overall std F1-score':np.std(f1score_files_overall)*100,
                            'Overall std Precision':np.std(precision_files_overall)*100,
                            'Overall std Recall':np.std(recall_files_overall)*100},index=[0])
        
        df_results_exp1 = pd.concat([df_results_exp1, dflocal], ignore_index=True)

# 3) classifier_cycle without seed - classifier_state with seed
for classifier_cycle in classifiers_wo_seed:
    for classifier_state in classifiers_w_seed:
        detection_files_abnormal, f1score_files_abnormal, precision_files_abnormal, recall_files_abnormal = [],[],[],[]
        f1score_files_overall, precision_files_overall, recall_files_overall = [],[],[]
        for seed_state in seeds:
            result_path = dir_exp1_approach3+classifier_cycle+"/"+classifier_state+"/"+str(seed_state)+"/"
            f1score_file, precision_file, recall_file, f1score_abnormal, precision_abnormal, recall_abnormal, f1score_normal, precision_normal, recall_normal = compute_classification_sedeval(reference_path,result_path,collar)
            f1score_files_overall.append(f1score_file)
            precision_files_overall.append(precision_file)
            recall_files_overall.append(recall_file)
            f1score_files_abnormal.append(f1score_abnormal)
            precision_files_abnormal.append(precision_abnormal)
            recall_files_abnormal.append(recall_abnormal)
            f1score_files_normal.append(f1score_normal)
            precision_files_normal.append(precision_normal)
            recall_files_normal.append(recall_normal)

        dflocal = pd.DataFrame({'cycle_classifier':classifier_cycle, 'state_classifier':classifier_state, 
                            'Abnormal mean F1-score':np.mean(f1score_files_abnormal)*100,
                            'Abnormal mean Precision':np.mean(precision_files_abnormal)*100,
                            'Abnormal mean Recall':np.mean(recall_files_abnormal)*100,
                            'Normal mean F1-score':np.mean(f1score_files_normal)*100,
                            'Normal mean Precision':np.mean(precision_files_normal)*100,
                            'Normal mean Recall':np.mean(recall_files_normal)*100,
                            'Overall mean F1-score':np.mean(f1score_files_overall)*100,
                            'Overall mean Precision':np.mean(precision_files_overall)*100,
                            'Overall mean Recall':np.mean(recall_files_overall)*100,
                            'Abnormal std F1-score':np.std(f1score_files_abnormal)*100,
                            'Abnormal std Precision':np.std(precision_files_abnormal)*100,
                            'Abnormal std Recall':np.std(recall_files_abnormal)*100,
                            'Normal std F1-score':np.std(f1score_files_normal)*100,
                            'Normal std Precision':np.std(precision_files_normal)*100,
                            'Normal std Recall':np.std(recall_files_normal)*100,
                            'Overall std F1-score':np.std(f1score_files_overall)*100,
                            'Overall std Precision':np.std(precision_files_overall)*100,
                            'Overall std Recall':np.std(recall_files_overall)*100},index=[0])
        
        df_results_exp1 = pd.concat([df_results_exp1, dflocal], ignore_index=True)

# 4) classifier_cycle without seed - classifier_state without seed
for classifier_cycle in classifiers_wo_seed:
    for classifier_state in classifiers_wo_seed:
        result_path = dir_exp1_approach3+classifier_cycle+"/"+classifier_state+"/"
        f1score_file, precision_file, recall_file, f1score_abnormal, precision_abnormal, recall_abnormal, f1score_normal, precision_normal, recall_normal = compute_classification_sedeval(reference_path,result_path,collar)
        dflocal = pd.DataFrame({'cycle_classifier':classifier_cycle, 'state_classifier':classifier_state, 
                            'Abnormal mean F1-score':f1score_abnormal*100,
                                'Abnormal mean Precision':precision_abnormal*100,
                                'Abnormal mean Recall':recall_abnormal*100,
                                'Normal mean F1-score':(f1score_normal)*100,
                                'Normal mean Precision':(precision_normal)*100,
                                'Normal mean Recall':(recall_normal)*100,
                                'Overall mean F1-score':f1score_file*100,
                                'Overall mean Precision':precision_file*100,
                                'Overall mean Recall':recall_file*100},index=[0])
        
        df_results_exp1 = pd.concat([df_results_exp1, dflocal], ignore_index=True)

df_results_exp1.to_csv(dir_exp1_approach3 + 'experiment1_results.csv',index=False)

UnboundLocalError: local variable 'event_based_metrics' referenced before assignment

# Experiment 2
Train in DS1 and test in DS2

I need to train the duty-cycle classifier using the same process for training the state classifier, i.e. using the four mounts of data.
For testing, I will use the state_labels, previously classified.
Therefore, it will be multiple combinations of duty-cycle classification with state classification. Each duty-cycle classifier will be tested in all state classifier.

In [12]:
dfs_ref_state= pd.concat(data_DS1)
dfs_ref_cycle = pd.concat(timestamps_cycle_DS1)



Since the results path depend on if the classifier use or not the seed parameters, there are 4 combination of classifiers: "classifiers_state" w/ and w/o seed and "classifier_cycle" w/ and w/o seed.

1) classifier_cycle with seed - classifier_state with seed

In [None]:
seeds=list(range(0,10))
classifiers=["rf","dt","xtree","mlp"]

for seed_state in seeds:
    for classifier_state in classifiers:
        folder_path = dir_exp1_approach2+classifier_state+"/"+str(seed_state)+"/"
        aux= df_timestamps(pd.read_csv(folder_path+file_name_states_DS1[0],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
        recog_label_jun21 = ndarray_labels(data_DS1[0].index[0],data_DS1[0].index[-1],aux,downsampled_freq)
        aux = df_timestamps(pd.read_csv(folder_path+file_name_states_DS1[1],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
        recog_label_okt21 = ndarray_labels(data_DS1[1].index[0],data_DS1[1].index[-1],aux,downsampled_freq)
        aux = df_timestamps(pd.read_csv(folder_path+file_name_states_DS1[2],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
        recog_label_jan22 = ndarray_labels(data_DS1[2].index[0],data_DS1[2].index[-1],aux,downsampled_freq)
        aux = df_timestamps(pd.read_csv(folder_path+file_name_states_DS1[3],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
        recog_label_april22 = ndarray_labels(data_DS1[3].index[0],data_DS1[3].index[-1],aux,downsampled_freq)
        dfs_ref_state["recog_label"]=np.concatenate([recog_label_jun21, recog_label_okt21,recog_label_jan22,recog_label_april22])

        train_sequences = find_non_consecutive_sequences(dfs_ref_state, dfs_ref_cycle,delta,column_name="recog_label")

        # # Determine the maximum sequence length for padding and add some extra value
        max_sequence_length = max(len(seq['non_consecutive_labels']) for seq in train_sequences)+3

        # # Encode the sequences for training
        train_features = [le_state.transform(seq['non_consecutive_labels']) for seq in train_sequences]
        X_res = pad_sequences(train_features, max_sequence_length)
        y_res = le_cycle.transform([seq['label'] for seq in train_sequences])

        #import classified state_labels
        folder_path = dir_exp2_approach2+classifier_state+"/"+str(seed_state)
        aux = df_timestamps(pd.read_csv(folder_path+"/jun23_state.txt",sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
        data_csv_jun23["recognized_label"] = ndarray_labels(data_csv_jun23.index[0],data_csv_jun23.index[-1],aux,downsampled_freq)
        aux = df_timestamps(pd.read_csv(folder_path+"/aug23_state.txt",sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
        data_csv_aug23["recognized_label"] = ndarray_labels(data_csv_aug23.index[0],data_csv_aug23.index[-1],aux,downsampled_freq)
        aux = df_timestamps(pd.read_csv(folder_path+"/okt23_state.txt",sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
        data_csv_okt23["recognized_label"] = ndarray_labels(data_csv_okt23.index[0],data_csv_okt23.index[-1],aux,downsampled_freq)
        aux = df_timestamps(pd.read_csv(folder_path+"/dec23_state.txt",sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
        data_csv_dec23["recognized_label"] = ndarray_labels(data_csv_dec23.index[0],data_csv_dec23.index[-1],aux,downsampled_freq)


        #compute the detection of duty-cycle for each month
        data_csv_jun23['detected_cycles']=data_csv_jun23["Speed_order3"].apply(lambda x: detect_cycle(x))
        data_csv_aug23['detected_cycles']=data_csv_aug23["Speed_order3"].apply(lambda x: detect_cycle(x))
        data_csv_okt23['detected_cycles']=data_csv_okt23["Speed_order3"].apply(lambda x: detect_cycle(x))
        data_csv_dec23['detected_cycles']=data_csv_dec23["Speed_order3"].apply(lambda x: detect_cycle(x))

        detection_jun23 = boundaries_cycles(data_csv_jun23)
        detection_aug23 = boundaries_cycles(data_csv_aug23)
        detection_okt23 = boundaries_cycles(data_csv_okt23)
        detection_dec23 = boundaries_cycles(data_csv_dec23)


        #merge test data
        dfs_data = [data_csv_jun23,data_csv_aug23,data_csv_okt23,data_csv_dec23]
        dfs_detection_cycle = [detection_jun23,detection_aug23,detection_okt23,detection_dec23]

        file_name= ["jun23_cycle.txt" ,"aug23_cycle.txt","okt23_cycle.txt","dec23_cycle.txt"]


        for classifier_cycle in classifiers:
            for seed_cycle in seeds:
                #train
                clf_duty = train_cycle_classifier(classifier_cycle,X_res, y_res,seed_cycle)
                
                #compute the sequence for each month of data
                all_sequences = []
                for i in range(len(dfs_data)):
                    data_sequences = find_non_consecutive_sequences(dfs_data[i], dfs_detection_cycle[i],delta,column_name="recognized_label")
                    all_sequences.append(data_sequences)

                for i in range(len(dfs_data)):
                    test_sequences = all_sequences[i]

                    # Encode the sequences for testing
                    test_features = [le_state.transform(seq['non_consecutive_labels']) for seq in test_sequences]
                    test_features_padded = pad_sequences(test_features, max_sequence_length)

                #test the duty-cycle classifier over the labels classifier with the 
                # different classifiers evaluated for recognize states

                    #test
                    y_pred = clf_duty.predict(test_features_padded)

                    # save the results in a file
                    df_temp= pd.DataFrame()
                    df_temp["start"]=dfs_detection_cycle[i]["start"]
                    df_temp["end"]=dfs_detection_cycle[i]["end"]
                    df_temp["label"]=le_cycle.inverse_transform(y_pred)

                    if flag_save_results:
                        folder_path = dir_exp2_approach3+classifier_cycle+"/"+str(seed_cycle)+"/"+classifier_state+"/"+str(seed_state)+"/"
                        os.makedirs(folder_path, exist_ok=True)

                        create_reference_label_file(folder_path+file_name[i],df_temp)


Fitting 5 folds for each of 20 candidates, totalling 100 fits
File saved in:  ./results/approach3/DS2/rf/0/rf/0/jun23_cycle.txt
File saved in:  ./results/approach3/DS2/rf/0/rf/0/aug23_cycle.txt
File saved in:  ./results/approach3/DS2/rf/0/rf/0/okt23_cycle.txt
File saved in:  ./results/approach3/DS2/rf/0/rf/0/dec23_cycle.txt
Fitting 5 folds for each of 20 candidates, totalling 100 fits
File saved in:  ./results/approach3/DS2/rf/1/rf/0/jun23_cycle.txt
File saved in:  ./results/approach3/DS2/rf/1/rf/0/aug23_cycle.txt
File saved in:  ./results/approach3/DS2/rf/1/rf/0/okt23_cycle.txt
File saved in:  ./results/approach3/DS2/rf/1/rf/0/dec23_cycle.txt
Fitting 5 folds for each of 20 candidates, totalling 100 fits
File saved in:  ./results/approach3/DS2/rf/2/rf/0/jun23_cycle.txt
File saved in:  ./results/approach3/DS2/rf/2/rf/0/aug23_cycle.txt
File saved in:  ./results/approach3/DS2/rf/2/rf/0/okt23_cycle.txt
File saved in:  ./results/approach3/DS2/rf/2/rf/0/dec23_cycle.txt
Fitting 5 folds for ea

KeyboardInterrupt: 

2) classifier_cycle with seed - classifier_state without seed

In [None]:
seeds=list(range(0,10))
classifiers_cycle=["rf","dt","xtree","mlp"]
classifiers_state=["xgboost","nb"]

for classifier_state in classifiers_state:
    folder_path = dir_exp1_approach2+classifier_state+"/"
    aux= df_timestamps(pd.read_csv(folder_path+file_name_states_DS1[0],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
    recog_label_jun21 = ndarray_labels(data_DS1[0].index[0],data_DS1[0].index[-1],aux,downsampled_freq)
    aux = df_timestamps(pd.read_csv(folder_path+file_name_states_DS1[1],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
    recog_label_okt21 = ndarray_labels(data_DS1[1].index[0],data_DS1[1].index[-1],aux,downsampled_freq)
    aux = df_timestamps(pd.read_csv(folder_path+file_name_states_DS1[2],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
    recog_label_jan22 = ndarray_labels(data_DS1[2].index[0],data_DS1[2].index[-1],aux,downsampled_freq)
    aux = df_timestamps(pd.read_csv(folder_path+file_name_states_DS1[3],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
    recog_label_april22 = ndarray_labels(data_DS1[3].index[0],data_DS1[3].index[-1],aux,downsampled_freq)
    dfs_ref_state["recog_label"]=np.concatenate([recog_label_jun21, recog_label_okt21,recog_label_jan22,recog_label_april22])

    train_sequences = find_non_consecutive_sequences(dfs_ref_state, dfs_ref_cycle,delta,column_name="recog_label")

    # # Determine the maximum sequence length for padding and add some extra value
    max_sequence_length = max(len(seq['non_consecutive_labels']) for seq in train_sequences)+3

    # # Encode the sequences for training
    train_features = [le_state.transform(seq['non_consecutive_labels']) for seq in train_sequences]
    X_res = pad_sequences(train_features, max_sequence_length)
    y_res = le_cycle.transform([seq['label'] for seq in train_sequences])

    #import classified state_labels
    folder_path = dir_exp2_approach2+classifier_state
    aux = df_timestamps(pd.read_csv(folder_path+"/jun23_state.txt",sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
    data_csv_jun23["recognized_label"] = ndarray_labels(data_csv_jun23.index[0],data_csv_jun23.index[-1],aux,downsampled_freq)
    aux = df_timestamps(pd.read_csv(folder_path+"/aug23_state.txt",sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
    data_csv_aug23["recognized_label"] = ndarray_labels(data_csv_aug23.index[0],data_csv_aug23.index[-1],aux,downsampled_freq)
    aux = df_timestamps(pd.read_csv(folder_path+"/okt23_state.txt",sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
    data_csv_okt23["recognized_label"] = ndarray_labels(data_csv_okt23.index[0],data_csv_okt23.index[-1],aux,downsampled_freq)
    aux = df_timestamps(pd.read_csv(folder_path+"/dec23_state.txt",sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
    data_csv_dec23["recognized_label"] = ndarray_labels(data_csv_dec23.index[0],data_csv_dec23.index[-1],aux,downsampled_freq)


    #compute the detection of duty-cycle for each month
    data_csv_jun23['detected_cycles']=data_csv_jun23["Speed_order3"].apply(lambda x: detect_cycle(x))
    data_csv_aug23['detected_cycles']=data_csv_aug23["Speed_order3"].apply(lambda x: detect_cycle(x))
    data_csv_okt23['detected_cycles']=data_csv_okt23["Speed_order3"].apply(lambda x: detect_cycle(x))
    data_csv_dec23['detected_cycles']=data_csv_dec23["Speed_order3"].apply(lambda x: detect_cycle(x))

    detection_jun23 = boundaries_cycles(data_csv_jun23)
    detection_aug23 = boundaries_cycles(data_csv_aug23)
    detection_okt23 = boundaries_cycles(data_csv_okt23)
    detection_dec23 = boundaries_cycles(data_csv_dec23)


    #merge test data
    dfs_data = [data_csv_jun23,data_csv_aug23,data_csv_okt23,data_csv_dec23]
    dfs_detection_cycle = [detection_jun23,detection_aug23,detection_okt23,detection_dec23]

    file_name= ["jun23_cycle.txt" ,"aug23_cycle.txt","okt23_cycle.txt","dec23_cycle.txt"]


    for seed_cycle in seeds:
        for classifier_cycle in classifiers_cycle:
            #train
            clf_duty = train_cycle_classifier(classifier_cycle,X_res, y_res,seed_cycle)
            
            #test the duty-cycle classifier over the labels classifier with the 
            # different classifiers evaluated for recognize states
        

            #compute the sequence for each month of data
            all_sequences = []
            for i in range(len(dfs_data)):
                data_sequences = find_non_consecutive_sequences(dfs_data[i], dfs_detection_cycle[i],delta,column_name="recognized_label")
                all_sequences.append(data_sequences)

            for i in range(len(dfs_data)):
                test_sequences = all_sequences[i]

                # Encode the sequences for testing
                test_features = [le_state.transform(seq['non_consecutive_labels']) for seq in test_sequences]
                test_features_padded = pad_sequences(test_features, max_sequence_length)

                #test
                y_pred = clf_duty.predict(test_features_padded)

                # save the results in a file
                df_temp= pd.DataFrame()
                df_temp["start"]=dfs_detection_cycle[i]["start"]
                df_temp["end"]=dfs_detection_cycle[i]["end"]
                df_temp["label"]=le_cycle.inverse_transform(y_pred)

                if flag_save_results:
                    folder_path = dir_exp2_approach3+classifier_cycle+"/"+str(seed_cycle)+"/"+classifier_state+"/"
                    os.makedirs(folder_path, exist_ok=True)

                    create_reference_label_file(folder_path+file_name[i],df_temp)


Fitting 5 folds for each of 20 candidates, totalling 100 fits
File saved in:  ./results/approach3/DS2/rf/0/xgboost/jun23_cycle.txt
File saved in:  ./results/approach3/DS2/rf/0/xgboost/aug23_cycle.txt
File saved in:  ./results/approach3/DS2/rf/0/xgboost/okt23_cycle.txt
File saved in:  ./results/approach3/DS2/rf/0/xgboost/dec23_cycle.txt
Fitting 5 folds for each of 144 candidates, totalling 720 fits


exception calling callback for <Future at 0x7f8a17f53c70 state=finished raised BrokenProcessPool>
joblib.externals.loky.process_executor._RemoteTraceback: 
"""
Traceback (most recent call last):
  File "/home/luciano/virtualenv/python3.8.10/lib/python3.8/site-packages/joblib/externals/loky/process_executor.py", line 391, in _process_worker
    call_item = call_queue.get(block=True, timeout=timeout)
  File "/usr/lib/python3.8/multiprocessing/queues.py", line 102, in get
    if not self._rlock.acquire(block, timeout):
KeyboardInterrupt
"""

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/home/luciano/virtualenv/python3.8.10/lib/python3.8/site-packages/joblib/externals/loky/_base.py", line 26, in _invoke_callbacks
    callback(self)
  File "/home/luciano/virtualenv/python3.8.10/lib/python3.8/site-packages/joblib/parallel.py", line 385, in __call__
    self.parallel.dispatch_next()
  File "/home/luciano/virtualenv/python3.8.

KeyboardInterrupt: 

3) classifier_cycle without seed - classifier_state with seed

In [None]:
seeds=list(range(0,10))
classifiers_cycle=["nb","xgboost"]
classifiers_state=["rf","dt","xtree","mlp"]

for seed in seeds:
    for classifier_state in classifiers_state:
        folder_path = dir_exp1_approach2+classifier_state+"/"+str(seed)+"/"
        aux= df_timestamps(pd.read_csv(folder_path+file_name_states_DS1[0],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
        recog_label_jun21 = ndarray_labels(data_DS1[0].index[0],data_DS1[0].index[-1],aux,downsampled_freq)
        aux = df_timestamps(pd.read_csv(folder_path+file_name_states_DS1[1],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
        recog_label_okt21 = ndarray_labels(data_DS1[1].index[0],data_DS1[1].index[-1],aux,downsampled_freq)
        aux = df_timestamps(pd.read_csv(folder_path+file_name_states_DS1[2],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
        recog_label_jan22 = ndarray_labels(data_DS1[2].index[0],data_DS1[2].index[-1],aux,downsampled_freq)
        aux = df_timestamps(pd.read_csv(folder_path+file_name_states_DS1[3],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
        recog_label_april22 = ndarray_labels(data_DS1[3].index[0],data_DS1[3].index[-1],aux,downsampled_freq)
        dfs_ref_state["recog_label"]=np.concatenate([recog_label_jun21, recog_label_okt21,recog_label_jan22,recog_label_april22])

        train_sequences = find_non_consecutive_sequences(dfs_ref_state, dfs_ref_cycle,delta,column_name="recog_label")

        # # Determine the maximum sequence length for padding and add some extra value
        max_sequence_length = max(len(seq['non_consecutive_labels']) for seq in train_sequences)+3

        # # Encode the sequences for training
        train_features = [le_state.transform(seq['non_consecutive_labels']) for seq in train_sequences]
        X_res = pad_sequences(train_features, max_sequence_length)
        y_res = le_cycle.transform([seq['label'] for seq in train_sequences])

        #import classified state_labels
        folder_path = dir_exp2_approach2+classifier_state+"/"+str(seed)
        aux = df_timestamps(pd.read_csv(folder_path+"/jun23_state.txt",sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
        data_csv_jun23["recognized_label"] = ndarray_labels(data_csv_jun23.index[0],data_csv_jun23.index[-1],aux,downsampled_freq)
        aux = df_timestamps(pd.read_csv(folder_path+"/aug23_state.txt",sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
        data_csv_aug23["recognized_label"] = ndarray_labels(data_csv_aug23.index[0],data_csv_aug23.index[-1],aux,downsampled_freq)
        aux = df_timestamps(pd.read_csv(folder_path+"/okt23_state.txt",sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
        data_csv_okt23["recognized_label"] = ndarray_labels(data_csv_okt23.index[0],data_csv_okt23.index[-1],aux,downsampled_freq)
        aux = df_timestamps(pd.read_csv(folder_path+"/dec23_state.txt",sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
        data_csv_dec23["recognized_label"] = ndarray_labels(data_csv_dec23.index[0],data_csv_dec23.index[-1],aux,downsampled_freq)


        #compute the detection of duty-cycle for each month
        data_csv_jun23['detected_cycles']=data_csv_jun23["Speed_order3"].apply(lambda x: detect_cycle(x))
        data_csv_aug23['detected_cycles']=data_csv_aug23["Speed_order3"].apply(lambda x: detect_cycle(x))
        data_csv_okt23['detected_cycles']=data_csv_okt23["Speed_order3"].apply(lambda x: detect_cycle(x))
        data_csv_dec23['detected_cycles']=data_csv_dec23["Speed_order3"].apply(lambda x: detect_cycle(x))

        detection_jun23 = boundaries_cycles(data_csv_jun23)
        detection_aug23 = boundaries_cycles(data_csv_aug23)
        detection_okt23 = boundaries_cycles(data_csv_okt23)
        detection_dec23 = boundaries_cycles(data_csv_dec23)


        #merge test data
        dfs_data = [data_csv_jun23,data_csv_aug23,data_csv_okt23,data_csv_dec23]
        dfs_detection_cycle = [detection_jun23,detection_aug23,detection_okt23,detection_dec23]

        file_name= ["jun23_cycle.txt" ,"aug23_cycle.txt","okt23_cycle.txt","dec23_cycle.txt"]

        for classifier_cycle in classifiers_cycle:
            
            #train
            clf_duty = train_cycle_classifier(classifier_cycle,X_res, y_res)
    
    #test the duty-cycle classifier over the labels classifier with the 
    # different classifiers evaluated for recognize states            

            #compute the sequence for each month of data
            all_sequences = []
            for i in range(len(dfs_data)):
                data_sequences = find_non_consecutive_sequences(dfs_data[i], dfs_detection_cycle[i],delta,column_name="recognized_label")
                all_sequences.append(data_sequences)

            for i in range(len(dfs_data)):
                test_sequences = all_sequences[i]

                # Encode the sequences for testing
                test_features = [le_state.transform(seq['non_consecutive_labels']) for seq in test_sequences]
                test_features_padded = pad_sequences(test_features, max_sequence_length)

                #test
                y_pred = clf_duty.predict(test_features_padded)

                # save the results in a file
                df_temp= pd.DataFrame()
                df_temp["start"]=dfs_detection_cycle[i]["start"]
                df_temp["end"]=dfs_detection_cycle[i]["end"]
                df_temp["label"]=le_cycle.inverse_transform(y_pred)

                if flag_save_results:
                    folder_path = dir_exp2_approach3+classifier_cycle+"/"+classifier_state+"/"+str(seed)+"/"
                    os.makedirs(folder_path, exist_ok=True)

                    create_reference_label_file(folder_path+file_name[i],df_temp)

Results saved in ./results/approach3/DS2/nb/rf/0/jun23_cycle.txt
Results saved in ./results/approach3/DS2/nb/rf/0/aug23_cycle.txt
Results saved in ./results/approach3/DS2/nb/rf/0/okt23_cycle.txt
Results saved in ./results/approach3/DS2/nb/rf/0/dec23_cycle.txt
Fitting 5 folds for each of 36 candidates, totalling 180 fits


KeyboardInterrupt: 

4) classifier_cycle without seed - classifier_state without seed

In [None]:
classifiers=["xgboost","nb"]
for classifier_state in classifiers:
    folder_path = dir_exp1_approach2+classifier_state+"/"
    aux= df_timestamps(pd.read_csv(folder_path+file_name_states_DS1[0],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
    recog_label_jun21 = ndarray_labels(data_DS1[0].index[0],data_DS1[0].index[-1],aux,downsampled_freq)
    aux = df_timestamps(pd.read_csv(folder_path+file_name_states_DS1[1],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
    recog_label_okt21 = ndarray_labels(data_DS1[1].index[0],data_DS1[1].index[-1],aux,downsampled_freq)
    aux = df_timestamps(pd.read_csv(folder_path+file_name_states_DS1[2],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
    recog_label_jan22 = ndarray_labels(data_DS1[2].index[0],data_DS1[2].index[-1],aux,downsampled_freq)
    aux = df_timestamps(pd.read_csv(folder_path+file_name_states_DS1[3],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
    recog_label_april22 = ndarray_labels(data_DS1[3].index[0],data_DS1[3].index[-1],aux,downsampled_freq)
    dfs_ref_state["recog_label"]=np.concatenate([recog_label_jun21, recog_label_okt21,recog_label_jan22,recog_label_april22])

    train_sequences = find_non_consecutive_sequences(dfs_ref_state, dfs_ref_cycle,delta,column_name="recog_label")

    # # Determine the maximum sequence length for padding and add some extra value
    max_sequence_length = max(len(seq['non_consecutive_labels']) for seq in train_sequences)+3

    # # Encode the sequences for training
    train_features = [le_state.transform(seq['non_consecutive_labels']) for seq in train_sequences]
    X_res = pad_sequences(train_features, max_sequence_length)
    y_res = le_cycle.transform([seq['label'] for seq in train_sequences])

#import classified state_labels
    folder_path = dir_exp2_approach2+classifier_state
    aux = df_timestamps(pd.read_csv(folder_path+"/jun23_state.txt",sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
    data_csv_jun23["recognized_label"] = ndarray_labels(data_csv_jun23.index[0],data_csv_jun23.index[-1],aux,downsampled_freq)
    aux = df_timestamps(pd.read_csv(folder_path+"/aug23_state.txt",sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
    data_csv_aug23["recognized_label"] = ndarray_labels(data_csv_aug23.index[0],data_csv_aug23.index[-1],aux,downsampled_freq)
    aux = df_timestamps(pd.read_csv(folder_path+"/okt23_state.txt",sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
    data_csv_okt23["recognized_label"] = ndarray_labels(data_csv_okt23.index[0],data_csv_okt23.index[-1],aux,downsampled_freq)
    aux = df_timestamps(pd.read_csv(folder_path+"/dec23_state.txt",sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
    data_csv_dec23["recognized_label"] = ndarray_labels(data_csv_dec23.index[0],data_csv_dec23.index[-1],aux,downsampled_freq)


    #compute the detection of duty-cycle for each month
    data_csv_jun23['detected_cycles']=data_csv_jun23["Speed_order3"].apply(lambda x: detect_cycle(x))
    data_csv_aug23['detected_cycles']=data_csv_aug23["Speed_order3"].apply(lambda x: detect_cycle(x))
    data_csv_okt23['detected_cycles']=data_csv_okt23["Speed_order3"].apply(lambda x: detect_cycle(x))
    data_csv_dec23['detected_cycles']=data_csv_dec23["Speed_order3"].apply(lambda x: detect_cycle(x))

    detection_jun23 = boundaries_cycles(data_csv_jun23)
    detection_aug23 = boundaries_cycles(data_csv_aug23)
    detection_okt23 = boundaries_cycles(data_csv_okt23)
    detection_dec23 = boundaries_cycles(data_csv_dec23)


    #merge test data
    dfs_data = [data_csv_jun23,data_csv_aug23,data_csv_okt23,data_csv_dec23]
    dfs_detection_cycle = [detection_jun23,detection_aug23,detection_okt23,detection_dec23]

    file_name= ["jun23_cycle.txt" ,"aug23_cycle.txt","okt23_cycle.txt","dec23_cycle.txt"]


    for classifier_cycle in classifiers:
        #train
        clf_duty = train_cycle_classifier(classifier_cycle,X_res, y_res)
    
    #test the duty-cycle classifier over the labels classifier with the 
    # different classifiers evaluated for recognize states

        #compute the sequence for each month of data
        all_sequences = []
        for i in range(len(dfs_data)):
            data_sequences = find_non_consecutive_sequences(dfs_data[i], dfs_detection_cycle[i],delta,column_name="recognized_label")
            all_sequences.append(data_sequences)

        for i in range(len(dfs_data)):
            test_sequences = all_sequences[i]

            # Encode the sequences for testing
            test_features = [le_state.transform(seq['non_consecutive_labels']) for seq in test_sequences]
            test_features_padded = pad_sequences(test_features, max_sequence_length)

            # # Normalize features
            # test_features_padded = scaler_cycle.fit_transform(test_features_padded)

            #test
            y_pred = clf_duty.predict(test_features_padded)

            # save the results in a file
            df_temp= pd.DataFrame()
            df_temp["start"]=dfs_detection_cycle[i]["start"]
            df_temp["end"]=dfs_detection_cycle[i]["end"]
            df_temp["label"]=le_cycle.inverse_transform(y_pred)

            if flag_save_results:
                folder_path = dir_exp2_approach3+classifier_cycle+"/"+classifier_state+"/"
                os.makedirs(folder_path, exist_ok=True)

                create_reference_label_file(folder_path+file_name[i],df_temp)


Fitting 5 folds for each of 36 candidates, totalling 180 fits
Best parameters: {'learning_rate': 0.2, 'max_depth': 6, 'n_estimators': 10}
Best score: 0.9135477784230002
Fitting 5 folds for each of 36 candidates, totalling 180 fits
Best parameters: {'learning_rate': 0.01, 'max_depth': 4, 'n_estimators': 10}
Best score: 0.8183279575750971


## performance

In [None]:
df_results_exp2 = pd.DataFrame(columns=['cycle_classifier', 'state_classifier', 
                                        'Abnormal mean F1-score','Abnormal mean Precision','Abnormal mean Recall',
                                        'Normal mean F1-score','Normal mean Precision','Normal mean Recall',
                                        'Overall mean F1-score','Overall mean Precision','Overall mean Recall',
                                        'Abnormal std F1-score','Abnormal std Precision','Abnormal std Recall',
                                        'Normal std F1-score','Normal std Precision','Normal std Recall',
                                        'Overall std F1-score','Overall std Precision','Overall std Recall'])

seeds=list(range(0,10))
classifiers=["xgboost","rf","dt","nb","xtree","mlp"]
classifiers_w_seed=["rf","dt","xtree","mlp"]
classifiers_wo_seed=["xgboost","nb"]
dir = './results/reference_cycle_labels/'
reference_path = dir
collar = 202.75


# 1) classifier_cycle with seed - classifier_state with seed
for classifier_cycle in classifiers_w_seed:
    for classifier_state in classifiers_w_seed:
        detection_files_abnormal, f1score_files_abnormal, precision_files_abnormal, recall_files_abnormal = [],[],[],[]
        detection_files_normal, f1score_files_normal, precision_files_normal, recall_files_normal = [],[],[],[]
        f1score_files_overall, precision_files_overall, recall_files_overall = [],[],[]
        for seed_cycle in seeds:
            for seed_state in seeds:
                result_path = dir_exp2_approach3+classifier_cycle+"/"+str(seed_cycle)+"/"+classifier_state+"/"+str(seed_state)+"/"
                f1score_file, precision_file, recall_file, f1score_abnormal, precision_abnormal, recall_abnormal, f1score_normal, precision_normal, recall_normal = compute_classification_sedeval(reference_path,result_path,collar)
                check_nan=(f1score_file, precision_file, recall_file, f1score_abnormal, precision_abnormal, recall_abnormal, f1score_normal, precision_normal, recall_normal)
                if all(check_nan):
                    f1score_files_overall.append(f1score_file)
                    precision_files_overall.append(precision_file)
                    recall_files_overall.append(recall_file)
                    f1score_files_abnormal.append(f1score_abnormal)
                    precision_files_abnormal.append(precision_abnormal)
                    recall_files_abnormal.append(recall_abnormal)
                    f1score_files_normal.append(f1score_normal)
                    precision_files_normal.append(precision_normal)
                    recall_files_normal.append(recall_normal)

        dflocal = pd.DataFrame({'cycle_classifier':classifier_cycle, 'state_classifier':classifier_state, 
                            'Abnormal mean F1-score':np.mean(f1score_files_abnormal)*100,
                            'Abnormal mean Precision':np.mean(precision_files_abnormal)*100,
                            'Abnormal mean Recall':np.mean(recall_files_abnormal)*100,
                            'Normal mean F1-score':np.mean(f1score_files_normal)*100,
                            'Normal mean Precision':np.mean(precision_files_normal)*100,
                            'Normal mean Recall':np.mean(recall_files_normal)*100,
                            'Overall mean F1-score':np.mean(f1score_files_overall)*100,
                            'Overall mean Precision':np.mean(precision_files_overall)*100,
                            'Overall mean Recall':np.mean(recall_files_overall)*100,
                            'Abnormal std F1-score':np.std(f1score_files_abnormal)*100,
                            'Abnormal std Precision':np.std(precision_files_abnormal)*100,
                            'Abnormal std Recall':np.std(recall_files_abnormal)*100,
                            'Normal std F1-score':np.std(f1score_files_normal)*100,
                            'Normal std Precision':np.std(precision_files_normal)*100,
                            'Normal std Recall':np.std(recall_files_normal)*100,
                            'Overall std F1-score':np.std(f1score_files_overall)*100,
                            'Overall std Precision':np.std(precision_files_overall)*100,
                            'Overall std Recall':np.std(recall_files_overall)*100},index=[0])
        
        df_results_exp2 = pd.concat([df_results_exp2, dflocal], ignore_index=True)

# 2) classifier_cycle with seed - classifier_state without seed
for classifier_cycle in classifiers_w_seed:
    for classifier_state in classifiers_wo_seed:
        detection_files_abnormal, f1score_files_abnormal, precision_files_abnormal, recall_files_abnormal = [],[],[],[]
        detection_files_normal, f1score_files_normal, precision_files_normal, recall_files_normal = [],[],[],[]
        f1score_files_overall, precision_files_overall, recall_files_overall = [],[],[]
        for seed_cycle in seeds:
            result_path = dir_exp2_approach3+classifier_cycle+"/"+str(seed_cycle)+"/"+classifier_state+"/"
            f1score_file, precision_file, recall_file, f1score_abnormal, precision_abnormal, recall_abnormal, f1score_normal, precision_normal, recall_normal = compute_classification_sedeval(reference_path,result_path,collar)
            check_nan=(f1score_file, precision_file, recall_file, f1score_abnormal, precision_abnormal, recall_abnormal, f1score_normal, precision_normal, recall_normal)
            if all(check_nan):
                f1score_files_overall.append(f1score_file)
                precision_files_overall.append(precision_file)
                recall_files_overall.append(recall_file)
                f1score_files_abnormal.append(f1score_abnormal)
                precision_files_abnormal.append(precision_abnormal)
                recall_files_abnormal.append(recall_abnormal)
                f1score_files_normal.append(f1score_normal)
                precision_files_normal.append(precision_normal)
                recall_files_normal.append(recall_normal)

        dflocal = pd.DataFrame({'cycle_classifier':classifier_cycle, 'state_classifier':classifier_state, 
                            'Abnormal mean F1-score':np.mean(f1score_files_abnormal)*100,
                            'Abnormal mean Precision':np.mean(precision_files_abnormal)*100,
                            'Abnormal mean Recall':np.mean(recall_files_abnormal)*100,
                            'Normal mean F1-score':np.mean(f1score_files_normal)*100,
                            'Normal mean Precision':np.mean(precision_files_normal)*100,
                            'Normal mean Recall':np.mean(recall_files_normal)*100,
                            'Overall mean F1-score':np.mean(f1score_files_overall)*100,
                            'Overall mean Precision':np.mean(precision_files_overall)*100,
                            'Overall mean Recall':np.mean(recall_files_overall)*100,
                            'Abnormal std F1-score':np.std(f1score_files_abnormal)*100,
                            'Abnormal std Precision':np.std(precision_files_abnormal)*100,
                            'Abnormal std Recall':np.std(recall_files_abnormal)*100,
                            'Normal std F1-score':np.std(f1score_files_normal)*100,
                            'Normal std Precision':np.std(precision_files_normal)*100,
                            'Normal std Recall':np.std(recall_files_normal)*100,
                            'Overall std F1-score':np.std(f1score_files_overall)*100,
                            'Overall std Precision':np.std(precision_files_overall)*100,
                            'Overall std Recall':np.std(recall_files_overall)*100},index=[0])
        
        df_results_exp2 = pd.concat([df_results_exp2, dflocal], ignore_index=True)

# 3) classifier_cycle without seed - classifier_state with seed
for classifier_cycle in classifiers_wo_seed:
    for classifier_state in classifiers_w_seed:
        detection_files_abnormal, f1score_files_abnormal, precision_files_abnormal, recall_files_abnormal = [],[],[],[]
        f1score_files_overall, precision_files_overall, recall_files_overall = [],[],[]
        for seed_state in seeds:
            result_path = dir_exp2_approach3+classifier_cycle+"/"+classifier_state+"/"+str(seed_state)+"/"
            f1score_file, precision_file, recall_file, f1score_abnormal, precision_abnormal, recall_abnormal, f1score_normal, precision_normal, recall_normal = compute_classification_sedeval(reference_path,result_path,collar)
            check_nan=(f1score_file, precision_file, recall_file, f1score_abnormal, precision_abnormal, recall_abnormal, f1score_normal, precision_normal, recall_normal)
            if all(check_nan):
                f1score_files_overall.append(f1score_file)
                precision_files_overall.append(precision_file)
                recall_files_overall.append(recall_file)
                f1score_files_abnormal.append(f1score_abnormal)
                precision_files_abnormal.append(precision_abnormal)
                recall_files_abnormal.append(recall_abnormal)
                f1score_files_normal.append(f1score_normal)
                precision_files_normal.append(precision_normal)
                recall_files_normal.append(recall_normal)

        dflocal = pd.DataFrame({'cycle_classifier':classifier_cycle, 'state_classifier':classifier_state, 
                            'Abnormal mean F1-score':np.mean(f1score_files_abnormal)*100,
                            'Abnormal mean Precision':np.mean(precision_files_abnormal)*100,
                            'Abnormal mean Recall':np.mean(recall_files_abnormal)*100,
                            'Normal mean F1-score':np.mean(f1score_files_normal)*100,
                            'Normal mean Precision':np.mean(precision_files_normal)*100,
                            'Normal mean Recall':np.mean(recall_files_normal)*100,
                            'Overall mean F1-score':np.mean(f1score_files_overall)*100,
                            'Overall mean Precision':np.mean(precision_files_overall)*100,
                            'Overall mean Recall':np.mean(recall_files_overall)*100,
                            'Abnormal std F1-score':np.std(f1score_files_abnormal)*100,
                            'Abnormal std Precision':np.std(precision_files_abnormal)*100,
                            'Abnormal std Recall':np.std(recall_files_abnormal)*100,
                            'Normal std F1-score':np.std(f1score_files_normal)*100,
                            'Normal std Precision':np.std(precision_files_normal)*100,
                            'Normal std Recall':np.std(recall_files_normal)*100,
                            'Overall std F1-score':np.std(f1score_files_overall)*100,
                            'Overall std Precision':np.std(precision_files_overall)*100,
                            'Overall std Recall':np.std(recall_files_overall)*100},index=[0])
        
        df_results_exp2 = pd.concat([df_results_exp2, dflocal], ignore_index=True)

# 4) classifier_cycle without seed - classifier_state without seed
for classifier_cycle in classifiers_wo_seed:
    for classifier_state in classifiers_wo_seed:
        result_path = dir_exp2_approach3+classifier_cycle+"/"+classifier_state+"/"
        f1score_file, precision_file, recall_file, f1score_abnormal, precision_abnormal, recall_abnormal, f1score_normal, precision_normal, recall_normal = compute_classification_sedeval(reference_path,result_path,collar)
        check_nan=(f1score_file, precision_file, recall_file, f1score_abnormal, precision_abnormal, recall_abnormal, f1score_normal, precision_normal, recall_normal)
        if all(check_nan):
            dflocal = pd.DataFrame({'cycle_classifier':classifier_cycle, 'state_classifier':classifier_state, 
                                'Abnormal mean F1-score':f1score_abnormal*100,
                                'Abnormal mean Precision':precision_abnormal*100,
                                'Abnormal mean Recall':recall_abnormal*100,
                                'Normal mean F1-score':(f1score_normal)*100,
                                'Normal mean Precision':(precision_normal)*100,
                                'Normal mean Recall':(recall_normal)*100,
                                'Overall mean F1-score':f1score_file*100,
                                'Overall mean Precision':precision_file*100,
                                'Overall mean Recall':recall_file*100},index=[0])
        
            df_results_exp2 = pd.concat([df_results_exp2, dflocal], ignore_index=True)

df_results_exp2.to_csv(dir_exp2_approach3+'experiment2_results.csv',index=False)

  cycle_classifier state_classifier  Abnormal mean F1-score   
0               rf               rf               66.204733  \

   Abnormal mean Precision  Abnormal mean Recall  Normal mean F1-score   
0                64.678531             67.857143             90.500717  \

   Normal mean Precision  Normal mean Recall  Overall mean F1-score   
0               91.14525           89.871314              85.035565  \

   Overall mean Precision  Overall mean Recall  Abnormal std F1-score   
0               85.035565            85.035565               1.701056  \

   Abnormal std Precision  Abnormal std Recall  Normal std F1-score   
0                 1.52342             2.661134               0.4378  \

   Normal std Precision  Normal std Recall  Overall std F1-score   
0              0.663614           0.705843              0.673013  \

   Overall std Precision  Overall std Recall  
0               0.673013            0.673013  


KeyboardInterrupt: 

# Deployment on MCU

Data preparation and pre-processing

In [10]:
df_dataset= pd.concat([data_csv_jun21, data_csv_okt21,data_csv_jan22,data_csv_april22])

df_dataset["ref_label"]= np.concatenate((true_label_jun21, true_label_okt21,true_label_jan22,true_label_april22), axis=0)    
df_dataset["ref_label_cycle"]= np.concatenate((true_label_cycle_jun21, true_label_cycle_okt21,true_label_cycle_jan22,true_label_cycle_april22), axis=0)    

removed_indices = df_dataset[df_dataset['ref_label'].isnull()].index.tolist()
df_dataset = df_dataset[df_dataset['ref_label'].notnull()]
df_dataset=df_dataset.reset_index()

# remove the recognized_label column added in the experiment1 or experiment2
if 'recognized_label' in df_dataset.columns:
    df_dataset = df_dataset.drop('recognized_label', axis=1)

x = df_dataset[df_dataset.columns[1:-2]]
y_cycle = df_dataset[df_dataset.columns[-1]]
y_state = df_dataset[df_dataset.columns[-2]]

y_state [y_state=='E']='B'

# normalize feature to range [0;1]
scaler = MinMaxScaler(clip=True)
scaler.fit(x)
x_train = pd.DataFrame(scaler.transform(x), columns=x.columns)

y_state [y_state=='E']='B'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  y_state [y_state=='E']='B'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  y_state [y_state=='E']='B'


balance ds1 for training

In [11]:
x_train_balanced, y_train_balanced,_ = balance_dataset(x_train,y_state)

# Print balanced dataset
unique_values, counts = np.unique(y_state, return_counts=True)
value_counts = dict(zip(unique_values, counts))
value_porcentages = dict(zip(unique_values, counts/sum(counts)*100))
print("Value class-counts in Unbalanced dataset:",value_counts)
print("Value class-porcentage in Unbalanced dataset:",value_porcentages)

unique_values, counts = np.unique(y_train_balanced, return_counts=True)
value_counts = dict(zip(unique_values, counts))
value_porcentages = dict(zip(unique_values, counts/sum(counts)*100))
print("Value class-counts in Balanced dataset:",value_counts)
print("Value class-porcentage in Unbalanced dataset:",value_porcentages)

del unique_values,counts,value_counts,value_porcentages

Value class-counts in Unbalanced dataset: {'A': 10566, 'B': 123236, 'C': 11444, 'D': 24298}
Value class-porcentage in Unbalanced dataset: {'A': 6.232010569527674, 'B': 72.68673618647667, 'C': 6.7498702401736415, 'D': 14.331383003822019}
Value class-counts in Balanced dataset: {0: 21132, 1: 30809, 2: 22888, 3: 24298}
Value class-porcentage in Unbalanced dataset: {0: 21.31810707476268, 1: 31.080331292180734, 2: 23.089571963239077, 3: 24.511989669817506}


In [12]:
print("SCALED VALUES TO BE ADDED IN THE C-CODE")
print(f"Minimun: ",scaler.data_min_)
print(f"Maximun: ",scaler.data_max_)

SCALED VALUES TO BE ADDED IN THE C-CODE
Minimun:  [ 0.          0.          0.          0.          0.          0.
 -1.34333333  0.          0.          0.         -1.194      -1.37      ]
Maximun:  [155.93        41.73        45.55        45.39333333 155.35333333
  41.31333333 120.82333333  45.36       154.672       40.976
 119.466      123.51      ]


train and test data

In [13]:
#test data (DS2)
df_testset= pd.concat([data_csv_jun23, data_csv_aug23,data_csv_okt23,data_csv_dec23])
x_test_float = pd.DataFrame(scaler.transform(df_testset), columns=df_testset.columns,index=df_testset.index)

x_train_float= x_train_balanced.copy()
x_train_float.describe()

Unnamed: 0,High-pressure,Low-pressure,Speed,Speed_order3,High-pressure_order3,Low-pressure_order3,Diff-pressure_order3,Speed_order5,High-pressure_order5,Low-pressure_order5,Diff-pressure_order5,Diff-pressure
count,99127.0,99127.0,99127.0,99127.0,99127.0,99127.0,99127.0,99127.0,99127.0,99127.0,99127.0,99127.0
mean,0.332104,0.568258,0.436464,0.437485,0.333488,0.574119,0.240925,0.435835,0.335079,0.578888,0.242838,0.235758
std,0.284168,0.319134,0.482399,0.478376,0.284036,0.321885,0.299536,0.47349,0.283725,0.323944,0.301045,0.294641
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.196146,0.484783,0.0,0.0,0.201605,0.490399,0.058445,0.0,0.202642,0.494607,0.057998,0.057335
50%,0.247804,0.675533,0.0,0.0,0.24971,0.682266,0.084011,0.0,0.252483,0.687964,0.090356,0.075673
75%,0.410463,0.834891,0.987706,0.990454,0.413747,0.842908,0.243192,0.99052,0.418893,0.849619,0.254031,0.234625
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [14]:
dfs_ref_state= pd.concat(data_DS1)
dfs_ref_cycle = pd.concat(timestamps_cycle_DS1)

data_csv_jun23_float=pd.DataFrame(scaler.transform(data_csv_jun23), columns=data_csv_jun23.columns,index=data_csv_jun23.index)
data_csv_aug23_float=pd.DataFrame(scaler.transform(data_csv_aug23), columns=data_csv_aug23.columns,index=data_csv_aug23.index)
data_csv_okt23_float=pd.DataFrame(scaler.transform(data_csv_okt23), columns=data_csv_okt23.columns,index=data_csv_okt23.index)
data_csv_dec23_float=pd.DataFrame(scaler.transform(data_csv_dec23), columns=data_csv_dec23.columns,index=data_csv_dec23.index)

## mlp + xtree

In [15]:
import tensorflow as tf
from tensorflow.keras import activations
from tensorflow.keras import layers,metrics
print(tf.__version__)
import os
os.environ["CUDA_VISIBLE_DEVICES"] = '-1' 
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'

2025-06-23 13:11:22.121935: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-06-23 13:11:22.146828: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2025-06-23 13:11:22.321251: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2025-06-23 13:11:22.323038: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


2.13.0


In [16]:
classifier_state="mlp"
classifier_cycle="xtree"
seed=0

dir_model_pred = "./results/approach3/MCU/"
model_path = "./results/c_code/approach3/"
TF_MODEL = "mlp_float.keras"
TF_MODEL_I8="mlp_int8.tflite"

tf.random.set_seed(seed)

In [17]:
clf_state_float = train_state_supervised_classifier(classifier_state,x_train_float, y_train_balanced,seed)
clf_state_float

In [18]:
# Reproduce in TensorFlow the MLP obtained with Sklearn
model = tf.keras.Sequential()
model.add(layers.Dense(12, input_dim=len(x_train_float.columns), activation='relu')) 
model.add(layers.Dense(4, activation='softmax'))
optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)
model.compile(optimizer=optimizer,loss='SparseCategoricalCrossentropy',
              metrics=['sparse_categorical_accuracy'])

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 12)                156       
                                                                 
 dense_1 (Dense)             (None, 4)                 52        
                                                                 
Total params: 208 (832.00 Byte)
Trainable params: 208 (832.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


2025-06-23 13:14:55.261157: E tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:268] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
2025-06-23 13:14:55.261281: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:168] retrieving CUDA diagnostic information for host: luciano-Precision-3660
2025-06-23 13:14:55.261293: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:175] hostname: luciano-Precision-3660
2025-06-23 13:14:55.261525: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:199] libcuda reported version is: 535.183.1
2025-06-23 13:14:55.261561: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:203] kernel reported version is: 535.183.1
2025-06-23 13:14:55.261570: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:309] kernel version seems to match DSO: 535.183.1


In [19]:
NUM_EPOCHS=50
history = model.fit(x=x_train_float, y=y_train_balanced,epochs=NUM_EPOCHS,shuffle=True)
model.save(model_path+TF_MODEL)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


### float32

convert model

In [24]:
model_load = tf.keras.models.load_model(model_path+TF_MODEL)

In [26]:
#conversion without quantization
TFL_MODEL_FILE= model_path +"mlp_float.tflite"
converter = tf.lite.TFLiteConverter.from_keras_model(model_load)
tflite_model = converter.convert()
open(TFL_MODEL_FILE, "wb").write(tflite_model)

INFO:tensorflow:Assets written to: /tmp/tmpa9mvmcg0/assets


INFO:tensorflow:Assets written to: /tmp/tmpa9mvmcg0/assets
2025-06-23 13:19:00.800666: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:364] Ignored output_format.
2025-06-23 13:19:00.800689: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:367] Ignored drop_control_dependency.
2025-06-23 13:19:00.801222: I tensorflow/cc/saved_model/reader.cc:45] Reading SavedModel from: /tmp/tmpa9mvmcg0
2025-06-23 13:19:00.801630: I tensorflow/cc/saved_model/reader.cc:91] Reading meta graph with tags { serve }
2025-06-23 13:19:00.801637: I tensorflow/cc/saved_model/reader.cc:132] Reading SavedModel debug info (if present) from: /tmp/tmpa9mvmcg0
2025-06-23 13:19:00.802930: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:375] MLIR V1 optimization pass is not enabled
2025-06-23 13:19:00.803325: I tensorflow/cc/saved_model/loader.cc:231] Restoring SavedModel bundle.
2025-06-23 13:19:00.821818: I tensorflow/cc/saved_model/loader.cc:215] Running initializatio

2520

In [27]:
interpreter = tf.lite.Interpreter(model_path=TFL_MODEL_FILE)
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
interpreter.allocate_tensors()

INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


In [28]:
output_data = []

x_test_float32 = x_test_float.to_numpy().astype("float32")

for x in x_test_float32:
  interpreter.set_tensor(input_details[0]['index'], x.reshape(input_details[0]['shape']))
  interpreter.invoke()
  output_data.append(interpreter.get_tensor(output_details[0]['index'])[0])

y_test_pred_lite = np.array(output_data)

In [31]:
print(input_details[0])
print(output_details[0])

{'name': 'serving_default_dense_input:0', 'index': 0, 'shape': array([ 1, 12], dtype=int32), 'shape_signature': array([-1, 12], dtype=int32), 'dtype': <class 'numpy.float32'>, 'quantization': (0.0, 0), 'quantization_parameters': {'scales': array([], dtype=float32), 'zero_points': array([], dtype=int32), 'quantized_dimension': 0}, 'sparsity_parameters': {}}
{'name': 'StatefulPartitionedCall:0', 'index': 7, 'shape': array([1, 4], dtype=int32), 'shape_signature': array([-1,  4], dtype=int32), 'dtype': <class 'numpy.float32'>, 'quantization': (0.0, 0), 'quantization_parameters': {'scales': array([], dtype=float32), 'zero_points': array([], dtype=int32), 'quantized_dimension': 0}, 'sparsity_parameters': {}}


classify states

In [32]:
#test state-cycles
y_predict = np.argmax(y_test_pred_lite, axis=1)
y_pred_smoothed = smooth_labels(y_predict,3)     #apply 3rd median filter
y_recognized=le_state.inverse_transform(y_pred_smoothed.astype(int))
df_temp=df_testset.copy()
df_temp["recognized_label"]=y_recognized
df_temp=df_temp.reset_index()

#classify duty-cycle
df_recognized_states_jun23 = create_segments_state(datetime.datetime(2023, 6, 1),datetime.datetime(2023, 7, 1),df_temp)
df_recognized_states_aug23 = create_segments_state(datetime.datetime(2023, 8, 1),datetime.datetime(2023, 9, 1),df_temp)
df_recognized_states_okt23 = create_segments_state(datetime.datetime(2023, 10, 1),datetime.datetime(2023, 11, 1),df_temp)
df_recognized_states_dec23 = create_segments_state(datetime.datetime(2023, 12, 1),datetime.datetime(2024, 1, 1),df_temp)

folder_path = dir_model_pred+"/float"
os.makedirs(folder_path, exist_ok=True)

if flag_save_results:
    create_reference_label_file(folder_path+"/jun23_state.txt",df_recognized_states_jun23)
    create_reference_label_file(folder_path+"/aug23_state.txt",df_recognized_states_aug23)
    create_reference_label_file(folder_path+"/okt23_state.txt",df_recognized_states_okt23)
    create_reference_label_file(folder_path+"/dec23_state.txt",df_recognized_states_dec23)        

classify cycle

In [33]:
folder_path_train_labels = dir_exp1_approach2+classifier_state+"/"+str(seed)+"/"
aux= df_timestamps(pd.read_csv(folder_path_train_labels+file_name_states_DS1[0],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
recog_label_jun21 = ndarray_labels(data_DS1[0].index[0],data_DS1[0].index[-1],aux,downsampled_freq)
aux = df_timestamps(pd.read_csv(folder_path_train_labels+file_name_states_DS1[1],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
recog_label_okt21 = ndarray_labels(data_DS1[1].index[0],data_DS1[1].index[-1],aux,downsampled_freq)
aux = df_timestamps(pd.read_csv(folder_path_train_labels+file_name_states_DS1[2],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
recog_label_jan22 = ndarray_labels(data_DS1[2].index[0],data_DS1[2].index[-1],aux,downsampled_freq)
aux = df_timestamps(pd.read_csv(folder_path_train_labels+file_name_states_DS1[3],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
recog_label_april22 = ndarray_labels(data_DS1[3].index[0],data_DS1[3].index[-1],aux,downsampled_freq)
dfs_ref_state["recog_label"]=np.concatenate([recog_label_jun21, recog_label_okt21,recog_label_jan22,recog_label_april22])

train_sequences = find_non_consecutive_sequences(dfs_ref_state, dfs_ref_cycle,delta,column_name="recog_label")

# # Determine the maximum sequence length for padding and add some extra value
max_sequence_length = max(len(seq['non_consecutive_labels']) for seq in train_sequences)+3

# # Encode the sequences for training
train_features = [le_state.transform(seq['non_consecutive_labels']) for seq in train_sequences]
train_features_padded = pad_sequences(train_features, max_sequence_length)
train_labels = le_cycle.transform([seq['label'] for seq in train_sequences])
X_res, y_res = train_features_padded, train_labels

#import classified state_labels
aux = df_timestamps(pd.read_csv(folder_path+"/jun23_state.txt",sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
data_csv_jun23_float["recognized_label"] = ndarray_labels(data_csv_jun23_float.index[0],data_csv_jun23_float.index[-1],aux,downsampled_freq)
aux = df_timestamps(pd.read_csv(folder_path+"/aug23_state.txt",sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
data_csv_aug23_float["recognized_label"] = ndarray_labels(data_csv_aug23_float.index[0],data_csv_aug23_float.index[-1],aux,downsampled_freq)
aux = df_timestamps(pd.read_csv(folder_path+"/okt23_state.txt",sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
data_csv_okt23_float["recognized_label"] = ndarray_labels(data_csv_okt23_float.index[0],data_csv_okt23_float.index[-1],aux,downsampled_freq)
aux = df_timestamps(pd.read_csv(folder_path+"/dec23_state.txt",sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
data_csv_dec23_float["recognized_label"] = ndarray_labels(data_csv_dec23_float.index[0],data_csv_dec23_float.index[-1],aux,downsampled_freq)

In [34]:
#compute the detection of duty-cycle for each month
threshold_speed = 2.5
threshold_speed_normalized = (threshold_speed-scaler.data_min_[2])/scaler.data_range_[2]
# threshold_speed_quant = (threshold_speed_normalized*(2**32 - 1)).astype('float')
data_csv_jun23_float.loc[data_csv_jun23_float['Speed_order3'] > threshold_speed_normalized ,'detected_cycles'] = 'Cycle'
data_csv_aug23_float.loc[data_csv_aug23_float['Speed_order3'] > threshold_speed_normalized ,'detected_cycles'] = 'Cycle'
data_csv_okt23_float.loc[data_csv_okt23_float['Speed_order3'] > threshold_speed_normalized ,'detected_cycles'] = 'Cycle'
data_csv_dec23_float.loc[data_csv_dec23_float['Speed_order3'] > threshold_speed_normalized ,'detected_cycles'] = 'Cycle'

detection_jun23 = boundaries_cycles(data_csv_jun23_float)
detection_aug23 = boundaries_cycles(data_csv_aug23_float)
detection_okt23 = boundaries_cycles(data_csv_okt23_float)
detection_dec23 = boundaries_cycles(data_csv_dec23_float)

#merge test data
dfs_data = [data_csv_jun23_float,data_csv_aug23_float,data_csv_okt23_float,data_csv_dec23_float]
dfs_detection_cycle = [detection_jun23,detection_aug23,detection_okt23,detection_dec23]

file_name= ["jun23_cycle.txt" ,"aug23_cycle.txt", 
            "okt23_cycle.txt","dec23_cycle.txt"]

#train
clf_duty_float = train_cycle_classifier(classifier_cycle,X_res, y_res,seed)

#compute the sequence for each month of data
all_sequences = []
for i in range(len(dfs_data)):
    data_sequences = find_non_consecutive_sequences(dfs_data[i], dfs_detection_cycle[i],delta,column_name="recognized_label")
    all_sequences.append(data_sequences)

for i in range(len(dfs_data)):
    test_sequences = all_sequences[i]

    # Encode the sequences for testing
    test_features = [le_state.transform(seq['non_consecutive_labels']) for seq in test_sequences]
    test_features_padded = pad_sequences(test_features, max_sequence_length)

    #test
    y_pred = clf_duty_float.predict(test_features_padded)

    # save the results in a file
    df_temp= pd.DataFrame()
    df_temp["start"]=dfs_detection_cycle[i]["start"]
    df_temp["end"]=dfs_detection_cycle[i]["end"]
    df_temp["label"]=le_cycle.inverse_transform(y_pred)

    if flag_save_results:
        create_reference_label_file(folder_path+"/"+file_name[i],df_temp)

Fitting 5 folds for each of 12 candidates, totalling 60 fits


convert model to C

In [35]:
!xxd -i ./results/c_code/approach3/mlp_float.tflite > ./results/c_code/approach3/mlp_float.h
!cat ./results/c_code/approach3/mlp_float.h

unsigned char __results_c_code_approach3_mlp_float_tflite[] = {
  0x1c, 0x00, 0x00, 0x00, 0x54, 0x46, 0x4c, 0x33, 0x14, 0x00, 0x20, 0x00,
  0x1c, 0x00, 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00,
  0x08, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
  0x90, 0x00, 0x00, 0x00, 0xe8, 0x00, 0x00, 0x00, 0x14, 0x05, 0x00, 0x00,
  0x24, 0x05, 0x00, 0x00, 0x74, 0x09, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
  0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00,
  0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00,
  0x0c, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00,
  0x0f, 0x00, 0x00, 0x00, 0x73, 0x65, 0x72, 0x76, 0x69, 0x6e, 0x67, 0x5f,
  0x64, 0x65, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x00, 0x01, 0x00, 0x00, 0x00,
  0x04, 0x00, 0x00, 0x00, 0x98, 0xff, 0xff, 0xff, 0x07, 0x00, 0x00, 0x00,
  0x04, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x64, 0x65, 0x6e, 0x73,
  0x65, 0x5f, 0x31, 0x00, 0x01, 0x00, 0x00, 0x00

In [37]:
from emlearn import convert

cmodel = convert(clf_duty_float, method='inline',dtype='float')
cmodel.save(file = model_path+"clf_duty_float.h", name='clf_duty')

'\n\n\n    // !!! This file is generated using emlearn !!!\n\n    #include <eml_trees.h>\n    \n\nEmlTreesNode clf_duty_nodes[978] = {\n  { 1, 2.8516892655061974, 1, 19 },\n  { 5, 1.9123220834472785, 2, 15 },\n  { 0, 2.856760682937112, 3, 8 },\n  { 3, 1.5818645318885634, 4, 10 },\n  { 0, 1.285560169390198, 5, 8 },\n  { 4, 1.62462162721186, 6, 9 },\n  { 4, 0.8876280337048825, 7, 9 },\n  { 2, 0.5317330260443189, 8, 8 },\n  { -1, 0, -1, -1 },\n  { -1, 1, -1, -1 },\n  { 1, 1.4275219759100684, 11, 13 },\n  { 2, 2.8778161014792585, 12, 8 },\n  { 0, 0.20944428360529443, 8, 9 },\n  { 4, 1.993628840424879, 14, 9 },\n  { 4, 0.10130490367361573, 9, 9 },\n  { 6, 0.7027177641646554, 8, 16 },\n  { 8, 0.37898516998579035, 17, 8 },\n  { 6, 2.7813649353484458, 18, 9 },\n  { 7, 1.3027019627870535, 8, 8 },\n  { 4, 0.08220052909208486, 20, 8 },\n  { 0, 1.518249496127595, 8, 21 },\n  { 3, 0.3926442728343626, 8, 9 },\n  { 3, 1.8131194369928536, 23, 31 },\n  { 2, 1.150271020899653, 8, 24 },\n  { 2, 2.3136512

### int8

In [38]:
def representative_data_gen():
  for i_value in tf.data.Dataset.from_tensor_slices(x_test_float).batch(1).take(int(len(x_test_float)/2)):
    i_value_f32 = tf.dtypes.cast(i_value, tf.float32)
    yield [i_value_f32]

In [39]:
converter = tf.lite.TFLiteConverter.from_keras_model(model_load)
converter.representative_dataset = tf.lite.RepresentativeDataset(representative_data_gen)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.int8
converter.inference_output_type = tf.int8

In [40]:
tflite_model_int_quant = converter.convert()
TF_MODEL_I8= model_path + "mlp_int8.tflite"
open(TF_MODEL_I8, "wb").write(tflite_model_int_quant)

INFO:tensorflow:Assets written to: /tmp/tmprcwbd4zi/assets


INFO:tensorflow:Assets written to: /tmp/tmprcwbd4zi/assets
2025-06-23 13:35:37.979069: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:364] Ignored output_format.
2025-06-23 13:35:37.979087: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:367] Ignored drop_control_dependency.
2025-06-23 13:35:37.979216: I tensorflow/cc/saved_model/reader.cc:45] Reading SavedModel from: /tmp/tmprcwbd4zi
2025-06-23 13:35:37.979592: I tensorflow/cc/saved_model/reader.cc:91] Reading meta graph with tags { serve }
2025-06-23 13:35:37.979600: I tensorflow/cc/saved_model/reader.cc:132] Reading SavedModel debug info (if present) from: /tmp/tmprcwbd4zi
2025-06-23 13:35:37.980703: I tensorflow/cc/saved_model/loader.cc:231] Restoring SavedModel bundle.
2025-06-23 13:35:37.996546: I tensorflow/cc/saved_model/loader.cc:215] Running initialization op on SavedModel bundle at path: /tmp/tmprcwbd4zi
2025-06-23 13:35:38.001449: I tensorflow/cc/saved_model/loader.cc:314] SavedModel

2240

In [41]:
interpreter = tf.lite.Interpreter(model_path=TF_MODEL_I8)
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
interpreter.allocate_tensors()

# Here we manually quantize the float32 data to provide int8 inputs
input_scale, input_zero_point = input_details[0]['quantization']
np_x_test = x_test_float32 / input_scale + input_zero_point

output_data = []

for x in np_x_test:
  interpreter.set_tensor(input_details[0]['index'], x.reshape(input_details[0]['shape']).astype("int8"))
  interpreter.invoke()
  output_data.append(interpreter.get_tensor(output_details[0]['index'])[0])

y_test_pred_lite_iquant = np.array(output_data)

In [43]:
print(input_details[0])
print(output_details[0])

{'name': 'serving_default_dense_input:0', 'index': 0, 'shape': array([ 1, 12], dtype=int32), 'shape_signature': array([-1, 12], dtype=int32), 'dtype': <class 'numpy.int8'>, 'quantization': (0.003921568859368563, -128), 'quantization_parameters': {'scales': array([0.00392157], dtype=float32), 'zero_points': array([-128], dtype=int32), 'quantized_dimension': 0}, 'sparsity_parameters': {}}
{'name': 'StatefulPartitionedCall:0', 'index': 7, 'shape': array([1, 4], dtype=int32), 'shape_signature': array([-1,  4], dtype=int32), 'dtype': <class 'numpy.int8'>, 'quantization': (0.00390625, -128), 'quantization_parameters': {'scales': array([0.00390625], dtype=float32), 'zero_points': array([-128], dtype=int32), 'quantized_dimension': 0}, 'sparsity_parameters': {}}


classify states

In [44]:
#test state-cycles
y_predict = np.argmax(y_test_pred_lite_iquant, axis=1)
y_pred_smoothed = smooth_labels(y_predict,3)     #apply 3rd median filter
y_recognized=le_state.inverse_transform(y_pred_smoothed.astype(int))
df_temp=df_testset.copy()
df_temp["recognized_label"]=y_recognized
df_temp=df_temp.reset_index()

#classify duty-cycle
df_recognized_states_jun23 = create_segments_state(datetime.datetime(2023, 6, 1),datetime.datetime(2023, 7, 1),df_temp)
df_recognized_states_aug23 = create_segments_state(datetime.datetime(2023, 8, 1),datetime.datetime(2023, 9, 1),df_temp)
df_recognized_states_okt23 = create_segments_state(datetime.datetime(2023, 10, 1),datetime.datetime(2023, 11, 1),df_temp)
df_recognized_states_dec23 = create_segments_state(datetime.datetime(2023, 12, 1),datetime.datetime(2024, 1, 1),df_temp)

folder_path = dir_model_pred+"int8"
os.makedirs(folder_path, exist_ok=True)

if True:
    create_reference_label_file(folder_path+"/jun23_state.txt",df_recognized_states_jun23)
    create_reference_label_file(folder_path+"/aug23_state.txt",df_recognized_states_aug23)
    create_reference_label_file(folder_path+"/okt23_state.txt",df_recognized_states_okt23)
    create_reference_label_file(folder_path+"/dec23_state.txt",df_recognized_states_dec23)        

classify cycle

In [45]:
folder_path_train_labels = dir_exp1_approach2+classifier_state+"/"+str(seed)+"/"
aux= df_timestamps(pd.read_csv(folder_path_train_labels+file_name_states_DS1[0],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
recog_label_jun21 = ndarray_labels(data_DS1[0].index[0],data_DS1[0].index[-1],aux,downsampled_freq)
aux = df_timestamps(pd.read_csv(folder_path_train_labels+file_name_states_DS1[1],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
recog_label_okt21 = ndarray_labels(data_DS1[1].index[0],data_DS1[1].index[-1],aux,downsampled_freq)
aux = df_timestamps(pd.read_csv(folder_path_train_labels+file_name_states_DS1[2],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
recog_label_jan22 = ndarray_labels(data_DS1[2].index[0],data_DS1[2].index[-1],aux,downsampled_freq)
aux = df_timestamps(pd.read_csv(folder_path_train_labels+file_name_states_DS1[3],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
recog_label_april22 = ndarray_labels(data_DS1[3].index[0],data_DS1[3].index[-1],aux,downsampled_freq)
dfs_ref_state["recog_label"]=np.concatenate([recog_label_jun21, recog_label_okt21,recog_label_jan22,recog_label_april22])

train_sequences = find_non_consecutive_sequences(dfs_ref_state, dfs_ref_cycle,delta,column_name="recog_label")

# # Determine the maximum sequence length for padding and add some extra value
max_sequence_length = max(len(seq['non_consecutive_labels']) for seq in train_sequences)+3

# # Encode the sequences for training
train_features = [le_state.transform(seq['non_consecutive_labels']) for seq in train_sequences]
train_features_padded = pad_sequences(train_features, max_sequence_length)
train_labels = le_cycle.transform([seq['label'] for seq in train_sequences])
X_res, y_res = train_features_padded, train_labels

#import classified state_labels
aux = df_timestamps(pd.read_csv(folder_path+"/jun23_state.txt",sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
data_csv_jun23_float["recognized_label"] = ndarray_labels(data_csv_jun23_float.index[0],data_csv_jun23_float.index[-1],aux,downsampled_freq)
aux = df_timestamps(pd.read_csv(folder_path+"/aug23_state.txt",sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
data_csv_aug23_float["recognized_label"] = ndarray_labels(data_csv_aug23_float.index[0],data_csv_aug23_float.index[-1],aux,downsampled_freq)
aux = df_timestamps(pd.read_csv(folder_path+"/okt23_state.txt",sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
data_csv_okt23_float["recognized_label"] = ndarray_labels(data_csv_okt23_float.index[0],data_csv_okt23_float.index[-1],aux,downsampled_freq)
aux = df_timestamps(pd.read_csv(folder_path+"/dec23_state.txt",sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
data_csv_dec23_float["recognized_label"] = ndarray_labels(data_csv_dec23_float.index[0],data_csv_dec23_float.index[-1],aux,downsampled_freq)

In [46]:
#compute the detection of duty-cycle for each month
threshold_speed = 2.5
threshold_speed_normalized = (threshold_speed-scaler.data_min_[2])/scaler.data_range_[2]
# threshold_speed_quant = (threshold_speed_normalized*(2**32 - 1)).astype('float')
data_csv_jun23_float.loc[data_csv_jun23_float['Speed_order3'] > threshold_speed_normalized ,'detected_cycles'] = 'Cycle'
data_csv_aug23_float.loc[data_csv_aug23_float['Speed_order3'] > threshold_speed_normalized ,'detected_cycles'] = 'Cycle'
data_csv_okt23_float.loc[data_csv_okt23_float['Speed_order3'] > threshold_speed_normalized ,'detected_cycles'] = 'Cycle'
data_csv_dec23_float.loc[data_csv_dec23_float['Speed_order3'] > threshold_speed_normalized ,'detected_cycles'] = 'Cycle'

detection_jun23 = boundaries_cycles(data_csv_jun23_float)
detection_aug23 = boundaries_cycles(data_csv_aug23_float)
detection_okt23 = boundaries_cycles(data_csv_okt23_float)
detection_dec23 = boundaries_cycles(data_csv_dec23_float)

#merge test data
dfs_data = [data_csv_jun23_float,data_csv_aug23_float,data_csv_okt23_float,data_csv_dec23_float]
dfs_detection_cycle = [detection_jun23,detection_aug23,detection_okt23,detection_dec23]

file_name= ["jun23_cycle.txt" ,"aug23_cycle.txt", 
            "okt23_cycle.txt","dec23_cycle.txt"]

#train
clf_duty_uint8 = train_cycle_classifier(classifier_cycle,X_res, y_res,seed)

#compute the sequence for each month of data
all_sequences = []
for i in range(len(dfs_data)):
    data_sequences = find_non_consecutive_sequences(dfs_data[i], dfs_detection_cycle[i],delta,column_name="recognized_label")
    all_sequences.append(data_sequences)

for i in range(len(dfs_data)):
    test_sequences = all_sequences[i]

    # Encode the sequences for testing
    test_features = [le_state.transform(seq['non_consecutive_labels']) for seq in test_sequences]
    test_features_padded = pad_sequences(test_features, max_sequence_length)

    #test
    y_pred = clf_duty_uint8.predict(test_features_padded)

    # save the results in a file
    df_temp= pd.DataFrame()
    df_temp["start"]=dfs_detection_cycle[i]["start"]
    df_temp["end"]=dfs_detection_cycle[i]["end"]
    df_temp["label"]=le_cycle.inverse_transform(y_pred)

    if flag_save_results:
        folder_path = dir_model_pred + "int8/"
        os.makedirs(folder_path, exist_ok=True)

        create_reference_label_file(folder_path+file_name[i],df_temp)

Fitting 5 folds for each of 12 candidates, totalling 60 fits


convert model to C

In [47]:
!xxd -i  ./results/c_code/approach3/mlp_int8.tflite > ./results/c_code/approach3/mlp_int8.h

In [48]:
from emlearn import convert

cmodel = convert(clf_duty_uint8, method='inline',dtype='uint8_t')
cmodel.save(file = model_path + "clf_duty_uint8.h", name='clf_duty')

tmp/myinlinetree.c: In function ‘classify’:
    6 |         printf("%d,%f\n", row, (float)myinlinetree_predict(values, length));
      |                                                            ^~~~~~
      |                                                            |
      |                                                            const float *
In file included from tmp/myinlinetree.c:2:
tmp/myinlinetree.h:5153:45: note: expected ‘const uint8_t *’ {aka ‘const unsigned char *’} but argument is of type ‘const float *’
 5153 | int32_t myinlinetree_predict(const uint8_t *features, int32_t features_length) {
      |                              ~~~~~~~~~~~~~~~^~~~~~~~


'\n\n\n    // !!! This file is generated using emlearn !!!\n\n    #include <eml_trees.h>\n    \n\nEmlTreesNode clf_duty_nodes[978] = {\n  { 1, 2.8516892655061974, 1, 19 },\n  { 5, 1.9123220834472785, 2, 15 },\n  { 0, 2.856760682937112, 3, 8 },\n  { 3, 1.5818645318885634, 4, 10 },\n  { 0, 1.285560169390198, 5, 8 },\n  { 4, 1.62462162721186, 6, 9 },\n  { 4, 0.8876280337048825, 7, 9 },\n  { 2, 0.5317330260443189, 8, 8 },\n  { -1, 0, -1, -1 },\n  { -1, 1, -1, -1 },\n  { 1, 1.4275219759100684, 11, 13 },\n  { 2, 2.8778161014792585, 12, 8 },\n  { 0, 0.20944428360529443, 8, 9 },\n  { 4, 1.993628840424879, 14, 9 },\n  { 4, 0.10130490367361573, 9, 9 },\n  { 6, 0.7027177641646554, 8, 16 },\n  { 8, 0.37898516998579035, 17, 8 },\n  { 6, 2.7813649353484458, 18, 9 },\n  { 7, 1.3027019627870535, 8, 8 },\n  { 4, 0.08220052909208486, 20, 8 },\n  { 0, 1.518249496127595, 8, 21 },\n  { 3, 0.3926442728343626, 8, 9 },\n  { 3, 1.8131194369928536, 23, 31 },\n  { 2, 1.150271020899653, 8, 24 },\n  { 2, 2.3136512

### performance

In [49]:
df_results = pd.DataFrame(columns=['quantization','state_classifier', "detection mean",
                                    'Abnormal mean F1-score','Normal mean F1-score','Overall mean F1-score'])

dir = './results/reference_cycle_labels/'
reference_path = dir
collar = 202.75

classifier_states=["mlp"]
classifier_cycles=["xtree"]
quantizations=["float","int8"]
for classifier_state,classifier_cycle in zip(classifier_states,classifier_cycles):
    for q in quantizations:
        result_path = dir_model_pred +q+"/"
        f1score_file, precision_file, recall_file, f1score_abnormal, precision_abnormal, recall_abnormal , f1score_normal, precision_normal, recall_normal= compute_classification_sedeval(reference_path,result_path,collar)
        detection_file_overall = compute_detection_sedeval(reference_path,result_path,collar)
        print("---------- "+classifier_state+ " - "+q+" ----------")
        print("DETECTION: "+ str(detection_file_overall*100) )
        print("ABNORMAL:")
        print("F1-score: "+ str(f1score_abnormal*100) )
        print("Precision: "+ str(precision_abnormal*100) )
        print("Recall: "+ str(recall_abnormal*100) )
        print("NORMAL:")
        print("F1-score: "+ str(f1score_normal*100) )
        print("Precision: "+ str(precision_normal*100) )
        print("Recall: "+ str(recall_normal*100) )
        print("OVERALL:")
        print("F1-score: "+ str(f1score_file*100)  )
        print("Precision: "+ str(precision_file*100) )
        print("Recall: "+ str(recall_file*100) )

        dflocal = pd.DataFrame({'quantization':q,
                                'state_classifier':classifier_state, 
                                "detection mean": np.mean(detection_file_overall)*100,
                                'Abnormal mean F1-score':np.mean(f1score_abnormal)*100,
                                'Normal mean F1-score':np.mean(f1score_normal)*100,
                                'Overall mean F1-score':np.mean(f1score_file)*100},index=[0])
            
        df_results = pd.concat([df_results, dflocal], ignore_index=True)

df_results.to_csv(dir_model_pred + 'results_mcu.csv',index=False)

---------- mlp - float ----------
DETECTION: 99.16317991631799
ABNORMAL:
F1-score: 68.04979253112033
Precision: 60.29411764705882
Recall: 78.0952380952381
NORMAL:
F1-score: 89.5104895104895
Precision: 93.56725146198829
Recall: 85.79088471849866
OVERALL:
F1-score: 84.10041841004184
Precision: 84.10041841004184
Recall: 84.10041841004184
---------- mlp - int8 ----------
DETECTION: 99.16317991631799
ABNORMAL:
F1-score: 62.10045662100456
Precision: 59.64912280701754
Recall: 64.76190476190476
NORMAL:
F1-score: 88.7381275440977
Precision: 89.83516483516483
Recall: 87.66756032171583
OVERALL:
F1-score: 82.63598326359832
Precision: 82.63598326359832
Recall: 82.63598326359832
