Notebook corresponding to the "Approach-3" presented in the paper.

This is the same approach used in the ["Tinyml anomaly detection for industrial machines with periodic duty cycles" (Sensor Application Symposium 2024)](https://ieeexplore.ieee.org/abstract/document/10636584/), and serves as the baseline experiment.

Two experiments are carried on:
1) As in the SAS2024, the performance is evaluated in leave-one-month-out CV in the original 4 months (called DS1).
2) The generalization is evaluated using the whole DS1 for training and the whole DS2 for testing.

This approach uses two classifiers, one for the internal-states and another one for the duty-cycles.
Therefore, each experiment is divided in four part because some classifier have been trained with different seed initializers. The four combinations are:

 (1) Internal-state classifier with seed -> Duty-cycle classifier with seed.

 (2) Internal-state classifier without seed -> Duty-cycle classifier with seed.

 (3) Internal-state classifier with seed -> Duty-cycle classifier without seed.

 (4) Internal-state classifier without seed -> Duty-cycle classifier without seed.


 The predicted state labels are loaded from the files generated in the approach-2. This avoid train again the same classifiers.

In [None]:
from custom_functions import *

import datetime
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder

# import data, extract feature and preprocessing

input data

In [None]:
directory="../../../data/"
#first 4 months of data (DS1)
data_csv_jun21 = read_month_data(directory+'Confidential_Drive_data_Jun2021.csv',1)
data_csv_okt21 = read_month_data(directory+'Confidential_Drive_data_Okt2021.csv',1)
data_csv_jan22 = read_month_data(directory+'Confidential_Drive_data_Jan2022.csv',1)
data_csv_april22 = read_month_data(directory+'Confidential_Drive_data_April2022.csv',1)
#new 4 months (DS2)
data_csv_jun23 = read_month_data(directory+'Confidential_Drive_data_June2023_Drift20.csv')
data_csv_aug23 = read_month_data(directory+'Confidential_Drive_data_Aug2023_Drift20.csv')
data_csv_okt23 = read_month_data(directory+'Confidential_Drive_data_Oct2023_Drift20.csv')
data_csv_dec23 = read_month_data(directory+'Confidential_Drive_data_Dec2023_Drift20.csv')

#re-order the column name to be consistent with the previous csv files
desired_order=["High-pressure","Low-pressure","Speed"]
data_csv_jun23=data_csv_jun23[desired_order]
data_csv_aug23=data_csv_aug23[desired_order]
data_csv_okt23=data_csv_okt23[desired_order]
data_csv_dec23=data_csv_dec23[desired_order]

#These data has duplicated entries
data_csv_okt23 = data_csv_okt23[~data_csv_okt23.index.duplicated(keep='first')]

# round to zero speed less than zero
data_csv_jun21.loc[data_csv_jun21['Speed'] < 0 , 'Speed'] = 0
data_csv_okt21.loc[data_csv_okt21['Speed'] < 0 , 'Speed'] = 0
data_csv_jan22.loc[data_csv_jan22['Speed'] < 0 , 'Speed'] = 0
data_csv_april22.loc[data_csv_april22['Speed'] < 0 , 'Speed'] = 0
data_csv_jun23.loc[data_csv_jun23['Speed'] < 0 , 'Speed'] = 0
data_csv_aug23.loc[data_csv_aug23['Speed'] < 0 , 'Speed'] = 0
data_csv_okt23.loc[data_csv_okt23['Speed'] < 0 , 'Speed'] = 0
data_csv_dec23.loc[data_csv_dec23['Speed'] < 0 , 'Speed'] = 0


# complete the dataset with missing values
full_timestamp = pd.date_range(start = data_csv_jun21.index[0], end = data_csv_jun21.index[-1],inclusive="both",freq="1min" )
data_csv_jun21 = data_csv_jun21.reindex(full_timestamp)

full_timestamp = pd.date_range(start = data_csv_okt21.index[0], end = data_csv_okt21.index[-1],inclusive="both",freq="1min" )
data_csv_okt21 = data_csv_okt21.reindex(full_timestamp)

full_timestamp = pd.date_range(start = data_csv_jan22.index[0], end = data_csv_jan22.index[-1],inclusive="both",freq="1min" )
data_csv_jan22 = data_csv_jan22.reindex(full_timestamp)

full_timestamp = pd.date_range(start = data_csv_april22.index[0], end = data_csv_april22.index[-1],inclusive="both",freq="1min" )
data_csv_april22 = data_csv_april22.reindex(full_timestamp)

full_timestamp = pd.date_range(start = data_csv_jun23.index[0], end = data_csv_jun23.index[-1],inclusive="both",freq="1min" )
data_csv_jun23 = data_csv_jun23.reindex(full_timestamp)

full_timestamp = pd.date_range(start = data_csv_aug23.index[0], end = data_csv_aug23.index[-1],inclusive="both",freq="1min" )
data_csv_aug23 = data_csv_aug23.reindex(full_timestamp)

full_timestamp = pd.date_range(start = data_csv_okt23.index[0], end = data_csv_okt23.index[-1],inclusive="both",freq="1min" )
data_csv_okt23 = data_csv_okt23.reindex(full_timestamp)

full_timestamp = pd.date_range(start = data_csv_dec23.index[0], end = data_csv_dec23.index[-1],inclusive="both",freq="1min" )
data_csv_dec23 = data_csv_dec23.reindex(full_timestamp)



#use linear interpolation for the NaN missing values
interpolate_values(data_csv_jun21)
interpolate_values(data_csv_okt21)
interpolate_values(data_csv_jan22)
interpolate_values(data_csv_april22)
interpolate_values(data_csv_jun23)
interpolate_values(data_csv_aug23)
interpolate_values(data_csv_okt23)
interpolate_values(data_csv_dec23)

del desired_order, directory, full_timestamp

compute features

In [None]:
list_data_csv = [data_csv_jun21,data_csv_okt21,data_csv_jan22,data_csv_april22,data_csv_jun23,data_csv_aug23,data_csv_okt23,data_csv_dec23]
for data in list_data_csv:
    extract_features(data)

ground truth reference

In [None]:
#read files from imagimob
directory="../../data/"
column_interest=['Time(Seconds)' , 'Length(Seconds)',"Label(string)"]

#read labels of states
file_imagimob_1 = pd.read_csv(directory+"April_2022/Label.label",usecols=column_interest)
file_imagimob_2 = pd.read_csv(directory+"Jan_2022/Label.label",usecols=column_interest)
file_imagimob_3 = pd.read_csv(directory+"Jun_2021/Label.label",usecols=column_interest)
file_imagimob_4 = pd.read_csv(directory+"Okt_2021/Label.label",usecols=column_interest)

timestamps_april2022 = df_timestamps(file_imagimob_1)
timestamps_jan2022 = df_timestamps(file_imagimob_2)
timestamps_jun2021 = df_timestamps(file_imagimob_3)
timestamps_okt2021 = df_timestamps(file_imagimob_4)

#read labels of duty-cycle
file_imagimob_1 = pd.read_csv(directory+"April_2022/Label_cycle.label",usecols=column_interest)
file_imagimob_2 = pd.read_csv(directory+"Jan_2022/Label_cycle.label",usecols=column_interest)
file_imagimob_3 = pd.read_csv(directory+"Jun_2021/Label_cycle.label",usecols=column_interest)
file_imagimob_4 = pd.read_csv(directory+"Okt_2021/Label_cycle.label",usecols=column_interest)

timestamps_cycle_april2022 = df_timestamps(file_imagimob_1)
timestamps_cycle_jan2022 = df_timestamps(file_imagimob_2)
timestamps_cycle_jun2021 = df_timestamps(file_imagimob_3)
timestamps_cycle_okt2021 = df_timestamps(file_imagimob_4)

In [None]:
#generate vector with the labels of reference (states)
downsampled_freq='1T'
true_label_april22 = ndarray_labels(datetime.datetime(2022, 4, 1),datetime.datetime(2022, 5, 1),timestamps_april2022,downsampled_freq)
true_label_jan22 = ndarray_labels(datetime.datetime(2021, 12, 21),datetime.datetime(2022, 1, 21),timestamps_jan2022,downsampled_freq)
true_label_jun21 = ndarray_labels(datetime.datetime(2021, 6, 1),datetime.datetime(2021, 7, 1),timestamps_jun2021,downsampled_freq)
true_label_okt21 = ndarray_labels(datetime.datetime(2021, 10, 1),datetime.datetime(2021, 11, 1),timestamps_okt2021,downsampled_freq)

#generate vector with the labels of reference (duty-cycle)
true_label_cycle_april22 = ndarray_labels(datetime.datetime(2022, 4, 1),datetime.datetime(2022, 5, 1),timestamps_cycle_april2022,downsampled_freq)
true_label_cycle_jan22 = ndarray_labels(datetime.datetime(2021, 12, 21),datetime.datetime(2022, 1, 21),timestamps_cycle_jan2022,downsampled_freq)
true_label_cycle_jun21 = ndarray_labels(datetime.datetime(2021, 6, 1),datetime.datetime(2021, 7, 1),timestamps_cycle_jun2021,downsampled_freq)
true_label_cycle_okt21 = ndarray_labels(datetime.datetime(2021, 10, 1),datetime.datetime(2021, 11, 1),timestamps_cycle_okt2021,downsampled_freq)

true_label_cycle_april22 = np.where(true_label_cycle_april22 == None, 'No_cycle', true_label_cycle_april22)
true_label_cycle_jan22 = np.where(true_label_cycle_jan22 == None, 'No_cycle', true_label_cycle_jan22)
true_label_cycle_jun21 = np.where(true_label_cycle_jun21 == None, 'No_cycle', true_label_cycle_jun21)
true_label_cycle_okt21 = np.where(true_label_cycle_okt21 == None, 'No_cycle', true_label_cycle_okt21)

imput ground-truth duty-cycle labels

In [None]:
#read files from imagimob
directory="../../data/"
#read labels of duty-cycle
labels_jun21 = import_cycle_labels(directory+"Jun_2021/Label_cycle.label")
labels_okt21 = import_cycle_labels(directory+"Okt_2021/Label_cycle.label")
labels_jan22 = import_cycle_labels(directory+"Jan_2022/Label_cycle.label")
labels_april22 = import_cycle_labels(directory+"April_2022/Label_cycle.label")
labels_jun23 = import_cycle_labels(directory+"June_23/Label_cycle.label")
labels_aug23 = import_cycle_labels(directory+"Aug_23/Label_cycle.label")
labels_okt23 = import_cycle_labels(directory+"Okt_23/Label_cycle.label")
labels_dec23 = import_cycle_labels(directory+"Dec_23/Label_cycle.label")

for data in [labels_jun23,labels_aug23,labels_okt23,labels_dec23]:
    replace_labels_cycles(data)

 Data preparation and pre-processing

In [None]:
true_label_jun21 [true_label_jun21=='E']='B'
true_label_okt21 [true_label_okt21=='E']='B'
true_label_jan22 [true_label_jan22=='E']='B'
true_label_april22 [true_label_april22=='E']='B'

data_DS1=[data_csv_jun21, data_csv_okt21,data_csv_jan22,data_csv_april22]
data_DS2=[data_csv_jun23, data_csv_aug23,data_csv_okt23,data_csv_dec23]

true_state_labels_DS1=[true_label_jun21, true_label_okt21,true_label_jan22,true_label_april22]
timestamps_cycle_DS1 = [timestamps_cycle_jun2021,timestamps_cycle_okt2021,timestamps_cycle_jan2022,timestamps_cycle_april2022]

df_testset_DS2= pd.concat(data_DS2)

file_name_states_DS1= ["jun2021_state.txt" ,"okt2021_state.txt","jan2022_state.txt","april2022_state.txt"]
file_name_cycles_DS1= ["jun2021_cycle.txt" ,"okt2021_cycle.txt","jan2022_cycle.txt","april2022_cycle.txt"]

scaler = MinMaxScaler()

dir_exp1 = "./results/approach3/DS1/"
dir_exp2 = "./results/approach3/DS2/"

flag_save_results=True

delete not requires variables

In [None]:
del timestamps_april2022, timestamps_jan2022, timestamps_jun2021, timestamps_okt2021
del file_imagimob_1,file_imagimob_2,file_imagimob_3,file_imagimob_4, column_interest, directory

In [None]:
#define multiples variables
delta = pd.Timedelta(minutes=3)

# Encode the non-consecutive labels and the labels for the classifier
le_cycle = LabelEncoder()
le_cycle.fit(['Normal','Abnormal'])
le_state = LabelEncoder()
le_state.fit(['Z','A','B','C','D','None'])

scaler_cycle = MinMaxScaler()

dir_exp1_approach2 = "./results/approach2/DS1/"
dir_exp2_approach2 = "./results/approach2/DS2/"
dir_exp1_approach3 = "./results/approach3/DS1/"
dir_exp2_approach3 = "./results/approach3/DS2/"

flag_save_results=True

# Experiment 1
Train/test on DS1 using leave-one-month CV

In [None]:
def leave_one_month_out_data_split(i,data_DS1,true_state_labels_DS1,delta):
    dfs_ref_state= pd.concat([data for j, data in enumerate(data_DS1) if j != i])
    dfs_ref_state["ref_label"]= np.concatenate([data for j, data in enumerate(true_state_labels_DS1) if j != i])

    dfs_ref_cycle = pd.concat([timestamps for j, timestamps in enumerate(timestamps_cycle_DS1) if j != i])

    train_sequences = find_non_consecutive_sequences(dfs_ref_state, dfs_ref_cycle,delta,column_name="ref_label")

    # # Determine the maximum sequence length for padding and add some extra value
    max_sequence_length = max(len(seq['non_consecutive_labels']) for seq in train_sequences)+3

    # # Encode the sequences for training
    train_features = [le_state.transform(seq['non_consecutive_labels']) for seq in train_sequences]
    X_res = pad_sequences(train_features, max_sequence_length)
    y_res = le_cycle.transform([seq['label'] for seq in train_sequences])

    unique_values, counts = np.unique(y_res, return_counts=True)
    value_counts = dict(zip(unique_values, counts))
    value_porcentages = dict(zip(unique_values, counts/sum(counts)*100))
    print("Value class-counts in Balanced dataset:",value_counts)
    print("Value class-porcentage in Balanced dataset:",value_porcentages)

    x_test=data_DS1[i].copy()
    x_test['detected_cycles']=x_test["Speed_order3"].apply(lambda x: detect_cycle(x))
    x_test["ref_state_label"]=true_state_labels_DS1[i]
    
    return X_res, y_res, x_test ,max_sequence_length

Since the results path depend on if the classifier use or not the seed parameters, there are 4 combination of classifiers: "classifiers_state" w/ and w/o seed and "classifier_cycle" w/ and w/o seed.

1) classifier_cycle with seed - classifier_state with seed

In [None]:
seeds=list(range(0,10))
classifiers=["rf","dt","xtree","mlp"]
for seed_state in seeds:
    for classifier_state in classifiers:
        folder_path = dir_exp1_approach2+classifier_state+"/"+str(seed_state)+"/"
        aux= df_timestamps(pd.read_csv(folder_path+file_name_states_DS1[0],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
        recog_label_jun21 = ndarray_labels(data_DS1[0].index[0],data_DS1[0].index[-1],aux,downsampled_freq)
        aux = df_timestamps(pd.read_csv(folder_path+file_name_states_DS1[1],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
        recog_label_okt21 = ndarray_labels(data_DS1[1].index[0],data_DS1[1].index[-1],aux,downsampled_freq)
        aux = df_timestamps(pd.read_csv(folder_path+file_name_states_DS1[2],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
        recog_label_jan22 = ndarray_labels(data_DS1[2].index[0],data_DS1[2].index[-1],aux,downsampled_freq)
        aux = df_timestamps(pd.read_csv(folder_path+file_name_states_DS1[3],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
        recog_label_april22 = ndarray_labels(data_DS1[3].index[0],data_DS1[3].index[-1],aux,downsampled_freq)
        recog_state_labels_DS1=[recog_label_jun21, recog_label_okt21,recog_label_jan22,recog_label_april22]                        

        for i in range(len(data_DS1)):
            x_res, y_res,x_test,max_sequence_length = leave_one_month_out_data_split(i,data_DS1,recog_state_labels_DS1,delta)

            for classifier_cycle in classifiers:
                for seed_cycle in seeds:
                    clf_duty = train_cycle_classifier(classifier_cycle,x_res, y_res,seed_cycle)
                    x_test["recognized_label"] = recog_state_labels_DS1[i]

                    detection_x_test = boundaries_cycles(x_test)

                    data_sequences = find_non_consecutive_sequences(x_test, detection_x_test,delta,column_name="recognized_label")
                    test_features = [le_state.transform(seq['non_consecutive_labels']) for seq in data_sequences]
                    test_features_padded = pad_sequences(test_features, max_sequence_length)

                    #test
                    y_pred = clf_duty.predict(test_features_padded)

                    # save the results in a file
                    df_temp= pd.DataFrame()
                    df_temp["start"]=detection_x_test["start"]
                    df_temp["end"]=detection_x_test["end"]
                    df_temp["label"]=le_cycle.inverse_transform(y_pred)

                    if flag_save_results:
                        folder_path = dir_exp1_approach3+classifier_cycle+"/"+str(seed_cycle)+"/"+classifier_state+"/"+str(seed_state)+"/"
                        os.makedirs(folder_path, exist_ok=True)

                        create_reference_label_file(folder_path+file_name_cycles_DS1[i],df_temp)

2) classifier_cycle with seed - classifier_state without seed

In [None]:
seeds=list(range(0,10))
classifiers=["rf","dt","xtree","mlp"]
classifiers_cycle=["rf","dt","xtree","mlp"]
classifiers_state=["xgboost","nb"]


for classifier_state in classifiers_state:
    folder_path = dir_exp1_approach2+classifier_state+"/"
    aux= df_timestamps(pd.read_csv(folder_path+file_name_states_DS1[0],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
    recog_label_jun21 = ndarray_labels(data_DS1[0].index[0],data_DS1[0].index[-1],aux,downsampled_freq)
    aux = df_timestamps(pd.read_csv(folder_path+file_name_states_DS1[1],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
    recog_label_okt21 = ndarray_labels(data_DS1[1].index[0],data_DS1[1].index[-1],aux,downsampled_freq)
    aux = df_timestamps(pd.read_csv(folder_path+file_name_states_DS1[2],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
    recog_label_jan22 = ndarray_labels(data_DS1[2].index[0],data_DS1[2].index[-1],aux,downsampled_freq)
    aux = df_timestamps(pd.read_csv(folder_path+file_name_states_DS1[3],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
    recog_label_april22 = ndarray_labels(data_DS1[3].index[0],data_DS1[3].index[-1],aux,downsampled_freq)
    recog_state_labels_DS1=[recog_label_jun21, recog_label_okt21,recog_label_jan22,recog_label_april22]                        

    for i in range(len(data_DS1)):
        x_res, y_res,x_test,max_sequence_length = leave_one_month_out_data_split(i,data_DS1,recog_state_labels_DS1,delta)
        
        for seed_cycle in seeds:
            for classifier_cycle in classifiers_cycle:
                clf_duty = train_cycle_classifier(classifier_cycle,x_res, y_res,seed_cycle)
                x_test["recognized_label"] = recog_state_labels_DS1[i]

                detection_x_test = boundaries_cycles(x_test)

                data_sequences = find_non_consecutive_sequences(x_test, detection_x_test,delta,column_name="recognized_label")
                test_features = [le_state.transform(seq['non_consecutive_labels']) for seq in data_sequences]
                test_features_padded = pad_sequences(test_features, max_sequence_length)

                #test
                y_pred = clf_duty.predict(test_features_padded)

                # save the results in a file
                df_temp= pd.DataFrame()
                df_temp["start"]=detection_x_test["start"]
                df_temp["end"]=detection_x_test["end"]
                df_temp["label"]=le_cycle.inverse_transform(y_pred)

                if flag_save_results:
                    folder_path = dir_exp1_approach3+classifier_cycle+"/"+str(seed_cycle)+"/"+classifier_state+"/"
                    os.makedirs(folder_path, exist_ok=True)

                    create_reference_label_file(folder_path+file_name_cycles_DS1[i],df_temp)

3) classifier_cycle without seed - classifier_state with seed

In [None]:
seeds=list(range(0,10))
classifiers=["rf","dt","xtree","mlp"]
classifiers_cycle=["xgboost","nb"]
classifiers_state=["rf","dt","xtree","mlp"]

for seed in seeds:
    for classifier_state in classifiers_state:
        folder_path = dir_exp1_approach2+classifier_state+"/"+str(seed)+"/"
        aux= df_timestamps(pd.read_csv(folder_path+file_name_states_DS1[0],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
        recog_label_jun21 = ndarray_labels(data_DS1[0].index[0],data_DS1[0].index[-1],aux,downsampled_freq)
        aux = df_timestamps(pd.read_csv(folder_path+file_name_states_DS1[1],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
        recog_label_okt21 = ndarray_labels(data_DS1[1].index[0],data_DS1[1].index[-1],aux,downsampled_freq)
        aux = df_timestamps(pd.read_csv(folder_path+file_name_states_DS1[2],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
        recog_label_jan22 = ndarray_labels(data_DS1[2].index[0],data_DS1[2].index[-1],aux,downsampled_freq)
        aux = df_timestamps(pd.read_csv(folder_path+file_name_states_DS1[3],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
        recog_label_april22 = ndarray_labels(data_DS1[3].index[0],data_DS1[3].index[-1],aux,downsampled_freq)
        recog_state_labels_DS1=[recog_label_jun21, recog_label_okt21,recog_label_jan22,recog_label_april22]                        

        for i in range(len(data_DS1)):
            x_res, y_res,x_test,max_sequence_length = leave_one_month_out_data_split(i,data_DS1,recog_state_labels_DS1,delta)

            for classifier_cycle in classifiers_cycle:
                clf_duty = train_cycle_classifier(classifier_cycle,x_res, y_res)
                x_test["recognized_label"] = recog_state_labels_DS1[i]

                detection_x_test = boundaries_cycles(x_test)

                data_sequences = find_non_consecutive_sequences(x_test, detection_x_test,delta,column_name="recognized_label")
                test_features = [le_state.transform(seq['non_consecutive_labels']) for seq in data_sequences]
                test_features_padded = pad_sequences(test_features, max_sequence_length)

                #test
                y_pred = clf_duty.predict(test_features_padded)

                # save the results in a file
                df_temp= pd.DataFrame()
                df_temp["start"]=detection_x_test["start"]
                df_temp["end"]=detection_x_test["end"]
                df_temp["label"]=le_cycle.inverse_transform(y_pred)

                if flag_save_results:
                    folder_path = dir_exp1_approach3+classifier_cycle+"/"+classifier_state+"/"+str(seed)+"/"
                    os.makedirs(folder_path, exist_ok=True)

                    create_reference_label_file(folder_path+file_name_cycles_DS1[i],df_temp)

4) classifier_cycle without seed - classifier_state without seed

In [None]:
seeds=list(range(0,10))
classifiers=["xgboost","nb"]

for classifier_state in classifiers:
    folder_path = dir_exp1_approach2+classifier_state+"/"
    aux= df_timestamps(pd.read_csv(folder_path+file_name_states_DS1[0],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
    recog_label_jun21 = ndarray_labels(data_DS1[0].index[0],data_DS1[0].index[-1],aux,downsampled_freq)
    aux = df_timestamps(pd.read_csv(folder_path+file_name_states_DS1[1],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
    recog_label_okt21 = ndarray_labels(data_DS1[1].index[0],data_DS1[1].index[-1],aux,downsampled_freq)
    aux = df_timestamps(pd.read_csv(folder_path+file_name_states_DS1[2],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
    recog_label_jan22 = ndarray_labels(data_DS1[2].index[0],data_DS1[2].index[-1],aux,downsampled_freq)
    aux = df_timestamps(pd.read_csv(folder_path+file_name_states_DS1[3],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
    recog_label_april22 = ndarray_labels(data_DS1[3].index[0],data_DS1[3].index[-1],aux,downsampled_freq)
    recog_state_labels_DS1=[recog_label_jun21, recog_label_okt21,recog_label_jan22,recog_label_april22]                        

    for i in range(len(data_DS1)):
        x_res, y_res,x_test,max_sequence_length = leave_one_month_out_data_split(i,data_DS1,recog_state_labels_DS1,delta)

        for classifier_cycle in classifiers:
            clf_duty = train_cycle_classifier(classifier_cycle,x_res, y_res)
            x_test["recognized_label"] = recog_state_labels_DS1[i]

            detection_x_test = boundaries_cycles(x_test)

            data_sequences = find_non_consecutive_sequences(x_test, detection_x_test,delta,column_name="recognized_label")
            test_features = [le_state.transform(seq['non_consecutive_labels']) for seq in data_sequences]
            test_features_padded = pad_sequences(test_features, max_sequence_length)

            #test
            y_pred = clf_duty.predict(test_features_padded)

            # save the results in a file
            df_temp= pd.DataFrame()
            df_temp["start"]=detection_x_test["start"]
            df_temp["end"]=detection_x_test["end"]
            df_temp["label"]=le_cycle.inverse_transform(y_pred)

            if flag_save_results:
                folder_path = dir_exp1_approach3+classifier_cycle+"/"+classifier_state+"/"
                os.makedirs(folder_path, exist_ok=True)

                create_reference_label_file(folder_path+file_name_cycles_DS1[i],df_temp)

## performance

In [None]:
df_results_exp1 = pd.DataFrame(columns=['cycle_classifier', 'state_classifier', 
                                        'Abnormal mean F1-score','Abnormal mean Precision','Abnormal mean Recall',
                                        'Normal mean F1-score','Normal mean Precision','Normal mean Recall',
                                        'Overall mean F1-score','Overall mean Precision','Overall mean Recall',
                                        'Abnormal std F1-score','Abnormal std Precision','Abnormal std Recall',
                                        'Normal std F1-score','Normal std Precision','Normal std Recall',
                                        'Overall std F1-score','Overall std Precision','Overall std Recall'])

seeds=list(range(0,10))
classifiers=["xgboost","rf","dt","nb","xtree","mlp"]
classifiers_w_seed=["rf","dt","xtree","mlp"]
classifiers_wo_seed=["xgboost","nb"]
dir = './results/reference_cycle_labels/'
reference_path = dir
collar = 202.75


# 1) classifier_cycle with seed - classifier_state with seed
for classifier_cycle in classifiers_w_seed:
    for classifier_state in classifiers_w_seed:
        detection_files_abnormal, f1score_files_abnormal, precision_files_abnormal, recall_files_abnormal = [],[],[],[]
        detection_files_normal, f1score_files_normal, precision_files_normal, recall_files_normal = [],[],[],[]
        f1score_files_overall, precision_files_overall, recall_files_overall = [],[],[]
        for seed_cycle in seeds:
            for seed_state in seeds:
                result_path = dir_exp1_approach3+classifier_cycle+"/"+str(seed_cycle)+"/"+classifier_state+"/"+str(seed_state)+"/"
                f1score_file, precision_file, recall_file, f1score_abnormal, precision_abnormal, recall_abnormal, f1score_normal, precision_normal, recall_normal = compute_classification_sedeval(reference_path,result_path,collar)
                f1score_files_overall.append(f1score_file)
                precision_files_overall.append(precision_file)
                recall_files_overall.append(recall_file)
                f1score_files_abnormal.append(f1score_abnormal)
                precision_files_abnormal.append(precision_abnormal)
                recall_files_abnormal.append(recall_abnormal)
                f1score_files_normal.append(f1score_normal)
                precision_files_normal.append(precision_normal)
                recall_files_normal.append(recall_normal)

        dflocal = pd.DataFrame({'cycle_classifier':classifier_cycle, 'state_classifier':classifier_state, 
                            'Abnormal mean F1-score':np.mean(f1score_files_abnormal)*100,
                            'Abnormal mean Precision':np.mean(precision_files_abnormal)*100,
                            'Abnormal mean Recall':np.mean(recall_files_abnormal)*100,
                            'Normal mean F1-score':np.mean(f1score_files_normal)*100,
                            'Normal mean Precision':np.mean(precision_files_normal)*100,
                            'Normal mean Recall':np.mean(recall_files_normal)*100,
                            'Overall mean F1-score':np.mean(f1score_files_overall)*100,
                            'Overall mean Precision':np.mean(precision_files_overall)*100,
                            'Overall mean Recall':np.mean(recall_files_overall)*100,
                            'Abnormal std F1-score':np.std(f1score_files_abnormal)*100,
                            'Abnormal std Precision':np.std(precision_files_abnormal)*100,
                            'Abnormal std Recall':np.std(recall_files_abnormal)*100,
                            'Normal std F1-score':np.std(f1score_files_normal)*100,
                            'Normal std Precision':np.std(precision_files_normal)*100,
                            'Normal std Recall':np.std(recall_files_normal)*100,
                            'Overall std F1-score':np.std(f1score_files_overall)*100,
                            'Overall std Precision':np.std(precision_files_overall)*100,
                            'Overall std Recall':np.std(recall_files_overall)*100},index=[0])
        
        df_results_exp1 = pd.concat([df_results_exp1, dflocal], ignore_index=True)

# 2) classifier_cycle with seed - classifier_state without seed
for classifier_cycle in classifiers_w_seed:
    for classifier_state in classifiers_wo_seed:
        detection_files_abnormal, f1score_files_abnormal, precision_files_abnormal, recall_files_abnormal = [],[],[],[]
        detection_files_normal, f1score_files_normal, precision_files_normal, recall_files_normal = [],[],[],[]
        f1score_files_overall, precision_files_overall, recall_files_overall = [],[],[]
        for seed_cycle in seeds:
            result_path = dir_exp1_approach3+classifier_cycle+"/"+str(seed_cycle)+"/"+classifier_state+"/"
            f1score_file, precision_file, recall_file, f1score_abnormal, precision_abnormal, recall_abnormal, f1score_normal, precision_normal, recall_normal = compute_classification_sedeval(reference_path,result_path,collar)
            f1score_files_overall.append(f1score_file)
            precision_files_overall.append(precision_file)
            recall_files_overall.append(recall_file)
            f1score_files_abnormal.append(f1score_abnormal)
            precision_files_abnormal.append(precision_abnormal)
            recall_files_abnormal.append(recall_abnormal)
            f1score_files_normal.append(f1score_normal)
            precision_files_normal.append(precision_normal)
            recall_files_normal.append(recall_normal)

        dflocal = pd.DataFrame({'cycle_classifier':classifier_cycle, 'state_classifier':classifier_state, 
                            'Abnormal mean F1-score':np.mean(f1score_files_abnormal)*100,
                            'Abnormal mean Precision':np.mean(precision_files_abnormal)*100,
                            'Abnormal mean Recall':np.mean(recall_files_abnormal)*100,
                            'Normal mean F1-score':np.mean(f1score_files_normal)*100,
                            'Normal mean Precision':np.mean(precision_files_normal)*100,
                            'Normal mean Recall':np.mean(recall_files_normal)*100,
                            'Overall mean F1-score':np.mean(f1score_files_overall)*100,
                            'Overall mean Precision':np.mean(precision_files_overall)*100,
                            'Overall mean Recall':np.mean(recall_files_overall)*100,
                            'Abnormal std F1-score':np.std(f1score_files_abnormal)*100,
                            'Abnormal std Precision':np.std(precision_files_abnormal)*100,
                            'Abnormal std Recall':np.std(recall_files_abnormal)*100,
                            'Normal std F1-score':np.std(f1score_files_normal)*100,
                            'Normal std Precision':np.std(precision_files_normal)*100,
                            'Normal std Recall':np.std(recall_files_normal)*100,
                            'Overall std F1-score':np.std(f1score_files_overall)*100,
                            'Overall std Precision':np.std(precision_files_overall)*100,
                            'Overall std Recall':np.std(recall_files_overall)*100},index=[0])
        
        df_results_exp1 = pd.concat([df_results_exp1, dflocal], ignore_index=True)

# 3) classifier_cycle without seed - classifier_state with seed
for classifier_cycle in classifiers_wo_seed:
    for classifier_state in classifiers_w_seed:
        detection_files_abnormal, f1score_files_abnormal, precision_files_abnormal, recall_files_abnormal = [],[],[],[]
        f1score_files_overall, precision_files_overall, recall_files_overall = [],[],[]
        for seed_state in seeds:
            result_path = dir_exp1_approach3+classifier_cycle+"/"+classifier_state+"/"+str(seed_state)+"/"
            f1score_file, precision_file, recall_file, f1score_abnormal, precision_abnormal, recall_abnormal, f1score_normal, precision_normal, recall_normal = compute_classification_sedeval(reference_path,result_path,collar)
            f1score_files_overall.append(f1score_file)
            precision_files_overall.append(precision_file)
            recall_files_overall.append(recall_file)
            f1score_files_abnormal.append(f1score_abnormal)
            precision_files_abnormal.append(precision_abnormal)
            recall_files_abnormal.append(recall_abnormal)
            f1score_files_normal.append(f1score_normal)
            precision_files_normal.append(precision_normal)
            recall_files_normal.append(recall_normal)

        dflocal = pd.DataFrame({'cycle_classifier':classifier_cycle, 'state_classifier':classifier_state, 
                            'Abnormal mean F1-score':np.mean(f1score_files_abnormal)*100,
                            'Abnormal mean Precision':np.mean(precision_files_abnormal)*100,
                            'Abnormal mean Recall':np.mean(recall_files_abnormal)*100,
                            'Normal mean F1-score':np.mean(f1score_files_normal)*100,
                            'Normal mean Precision':np.mean(precision_files_normal)*100,
                            'Normal mean Recall':np.mean(recall_files_normal)*100,
                            'Overall mean F1-score':np.mean(f1score_files_overall)*100,
                            'Overall mean Precision':np.mean(precision_files_overall)*100,
                            'Overall mean Recall':np.mean(recall_files_overall)*100,
                            'Abnormal std F1-score':np.std(f1score_files_abnormal)*100,
                            'Abnormal std Precision':np.std(precision_files_abnormal)*100,
                            'Abnormal std Recall':np.std(recall_files_abnormal)*100,
                            'Normal std F1-score':np.std(f1score_files_normal)*100,
                            'Normal std Precision':np.std(precision_files_normal)*100,
                            'Normal std Recall':np.std(recall_files_normal)*100,
                            'Overall std F1-score':np.std(f1score_files_overall)*100,
                            'Overall std Precision':np.std(precision_files_overall)*100,
                            'Overall std Recall':np.std(recall_files_overall)*100},index=[0])
        
        df_results_exp1 = pd.concat([df_results_exp1, dflocal], ignore_index=True)

# 4) classifier_cycle without seed - classifier_state without seed
for classifier_cycle in classifiers_wo_seed:
    for classifier_state in classifiers_wo_seed:
        result_path = dir_exp1_approach3+classifier_cycle+"/"+classifier_state+"/"
        f1score_file, precision_file, recall_file, f1score_abnormal, precision_abnormal, recall_abnormal, f1score_normal, precision_normal, recall_normal = compute_classification_sedeval(reference_path,result_path,collar)
        dflocal = pd.DataFrame({'cycle_classifier':classifier_cycle, 'state_classifier':classifier_state, 
                            'Abnormal mean F1-score':f1score_abnormal*100,
                                'Abnormal mean Precision':precision_abnormal*100,
                                'Abnormal mean Recall':recall_abnormal*100,
                                'Normal mean F1-score':(f1score_normal)*100,
                                'Normal mean Precision':(precision_normal)*100,
                                'Normal mean Recall':(recall_normal)*100,
                                'Overall mean F1-score':f1score_file*100,
                                'Overall mean Precision':precision_file*100,
                                'Overall mean Recall':recall_file*100},index=[0])
        
        df_results_exp1 = pd.concat([df_results_exp1, dflocal], ignore_index=True)

df_results_exp1.to_csv(dir_exp1_approach3 + 'experiment1_results.csv',index=False)

# Experiment 2
Train in DS1 and test in DS2

I need to train the duty-cycle classifier using the same process for training the state classifier, i.e. using the four mounts of data.
For testing, I will use the state_labels, previously classified.
Therefore, it will be multiple combinations of duty-cycle classification with state classification. Each duty-cycle classifier will be tested in all state classifier.

In [None]:
dfs_ref_state= pd.concat(data_DS1)
dfs_ref_cycle = pd.concat(timestamps_cycle_DS1)



Since the results path depend on if the classifier use or not the seed parameters, there are 4 combination of classifiers: "classifiers_state" w/ and w/o seed and "classifier_cycle" w/ and w/o seed.

1) classifier_cycle with seed - classifier_state with seed

In [None]:
seeds=list(range(0,10))
classifiers=["rf","dt","xtree","mlp"]

for seed_state in seeds:
    for classifier_state in classifiers:
        folder_path = dir_exp1_approach2+classifier_state+"/"+str(seed_state)+"/"
        aux= df_timestamps(pd.read_csv(folder_path+file_name_states_DS1[0],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
        recog_label_jun21 = ndarray_labels(data_DS1[0].index[0],data_DS1[0].index[-1],aux,downsampled_freq)
        aux = df_timestamps(pd.read_csv(folder_path+file_name_states_DS1[1],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
        recog_label_okt21 = ndarray_labels(data_DS1[1].index[0],data_DS1[1].index[-1],aux,downsampled_freq)
        aux = df_timestamps(pd.read_csv(folder_path+file_name_states_DS1[2],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
        recog_label_jan22 = ndarray_labels(data_DS1[2].index[0],data_DS1[2].index[-1],aux,downsampled_freq)
        aux = df_timestamps(pd.read_csv(folder_path+file_name_states_DS1[3],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
        recog_label_april22 = ndarray_labels(data_DS1[3].index[0],data_DS1[3].index[-1],aux,downsampled_freq)
        dfs_ref_state["recog_label"]=np.concatenate([recog_label_jun21, recog_label_okt21,recog_label_jan22,recog_label_april22])

        train_sequences = find_non_consecutive_sequences(dfs_ref_state, dfs_ref_cycle,delta,column_name="recog_label")

        # # Determine the maximum sequence length for padding and add some extra value
        max_sequence_length = max(len(seq['non_consecutive_labels']) for seq in train_sequences)+3

        # # Encode the sequences for training
        train_features = [le_state.transform(seq['non_consecutive_labels']) for seq in train_sequences]
        X_res = pad_sequences(train_features, max_sequence_length)
        y_res = le_cycle.transform([seq['label'] for seq in train_sequences])

        #import classified state_labels
        folder_path = dir_exp2_approach2+classifier_state+"/"+str(seed_state)
        aux = df_timestamps(pd.read_csv(folder_path+"/jun23_state.txt",sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
        data_csv_jun23["recognized_label"] = ndarray_labels(data_csv_jun23.index[0],data_csv_jun23.index[-1],aux,downsampled_freq)
        aux = df_timestamps(pd.read_csv(folder_path+"/aug23_state.txt",sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
        data_csv_aug23["recognized_label"] = ndarray_labels(data_csv_aug23.index[0],data_csv_aug23.index[-1],aux,downsampled_freq)
        aux = df_timestamps(pd.read_csv(folder_path+"/okt23_state.txt",sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
        data_csv_okt23["recognized_label"] = ndarray_labels(data_csv_okt23.index[0],data_csv_okt23.index[-1],aux,downsampled_freq)
        aux = df_timestamps(pd.read_csv(folder_path+"/dec23_state.txt",sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
        data_csv_dec23["recognized_label"] = ndarray_labels(data_csv_dec23.index[0],data_csv_dec23.index[-1],aux,downsampled_freq)


        #compute the detection of duty-cycle for each month
        data_csv_jun23['detected_cycles']=data_csv_jun23["Speed_order3"].apply(lambda x: detect_cycle(x))
        data_csv_aug23['detected_cycles']=data_csv_aug23["Speed_order3"].apply(lambda x: detect_cycle(x))
        data_csv_okt23['detected_cycles']=data_csv_okt23["Speed_order3"].apply(lambda x: detect_cycle(x))
        data_csv_dec23['detected_cycles']=data_csv_dec23["Speed_order3"].apply(lambda x: detect_cycle(x))

        detection_jun23 = boundaries_cycles(data_csv_jun23)
        detection_aug23 = boundaries_cycles(data_csv_aug23)
        detection_okt23 = boundaries_cycles(data_csv_okt23)
        detection_dec23 = boundaries_cycles(data_csv_dec23)


        #merge test data
        dfs_data = [data_csv_jun23,data_csv_aug23,data_csv_okt23,data_csv_dec23]
        dfs_detection_cycle = [detection_jun23,detection_aug23,detection_okt23,detection_dec23]

        file_name= ["jun23_cycle.txt" ,"aug23_cycle.txt","okt23_cycle.txt","dec23_cycle.txt"]


        for classifier_cycle in classifiers:
            for seed_cycle in seeds:
                #train
                clf_duty = train_cycle_classifier(classifier_cycle,X_res, y_res,seed_cycle)
                
                #compute the sequence for each month of data
                all_sequences = []
                for i in range(len(dfs_data)):
                    data_sequences = find_non_consecutive_sequences(dfs_data[i], dfs_detection_cycle[i],delta,column_name="recognized_label")
                    all_sequences.append(data_sequences)

                for i in range(len(dfs_data)):
                    test_sequences = all_sequences[i]

                    # Encode the sequences for testing
                    test_features = [le_state.transform(seq['non_consecutive_labels']) for seq in test_sequences]
                    test_features_padded = pad_sequences(test_features, max_sequence_length)

                #test the duty-cycle classifier over the labels classifier with the 
                # different classifiers evaluated for recognize states

                    #test
                    y_pred = clf_duty.predict(test_features_padded)

                    # save the results in a file
                    df_temp= pd.DataFrame()
                    df_temp["start"]=dfs_detection_cycle[i]["start"]
                    df_temp["end"]=dfs_detection_cycle[i]["end"]
                    df_temp["label"]=le_cycle.inverse_transform(y_pred)

                    if flag_save_results:
                        folder_path = dir_exp2_approach3+classifier_cycle+"/"+str(seed_cycle)+"/"+classifier_state+"/"+str(seed_state)+"/"
                        os.makedirs(folder_path, exist_ok=True)

                        create_reference_label_file(folder_path+file_name[i],df_temp)


2) classifier_cycle with seed - classifier_state without seed

In [None]:
seeds=list(range(0,10))
classifiers_cycle=["rf","dt","xtree","mlp"]
classifiers_state=["xgboost","nb"]

for classifier_state in classifiers_state:
    folder_path = dir_exp1_approach2+classifier_state+"/"
    aux= df_timestamps(pd.read_csv(folder_path+file_name_states_DS1[0],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
    recog_label_jun21 = ndarray_labels(data_DS1[0].index[0],data_DS1[0].index[-1],aux,downsampled_freq)
    aux = df_timestamps(pd.read_csv(folder_path+file_name_states_DS1[1],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
    recog_label_okt21 = ndarray_labels(data_DS1[1].index[0],data_DS1[1].index[-1],aux,downsampled_freq)
    aux = df_timestamps(pd.read_csv(folder_path+file_name_states_DS1[2],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
    recog_label_jan22 = ndarray_labels(data_DS1[2].index[0],data_DS1[2].index[-1],aux,downsampled_freq)
    aux = df_timestamps(pd.read_csv(folder_path+file_name_states_DS1[3],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
    recog_label_april22 = ndarray_labels(data_DS1[3].index[0],data_DS1[3].index[-1],aux,downsampled_freq)
    dfs_ref_state["recog_label"]=np.concatenate([recog_label_jun21, recog_label_okt21,recog_label_jan22,recog_label_april22])

    train_sequences = find_non_consecutive_sequences(dfs_ref_state, dfs_ref_cycle,delta,column_name="recog_label")

    # # Determine the maximum sequence length for padding and add some extra value
    max_sequence_length = max(len(seq['non_consecutive_labels']) for seq in train_sequences)+3

    # # Encode the sequences for training
    train_features = [le_state.transform(seq['non_consecutive_labels']) for seq in train_sequences]
    X_res = pad_sequences(train_features, max_sequence_length)
    y_res = le_cycle.transform([seq['label'] for seq in train_sequences])

    #import classified state_labels
    folder_path = dir_exp2_approach2+classifier_state
    aux = df_timestamps(pd.read_csv(folder_path+"/jun23_state.txt",sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
    data_csv_jun23["recognized_label"] = ndarray_labels(data_csv_jun23.index[0],data_csv_jun23.index[-1],aux,downsampled_freq)
    aux = df_timestamps(pd.read_csv(folder_path+"/aug23_state.txt",sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
    data_csv_aug23["recognized_label"] = ndarray_labels(data_csv_aug23.index[0],data_csv_aug23.index[-1],aux,downsampled_freq)
    aux = df_timestamps(pd.read_csv(folder_path+"/okt23_state.txt",sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
    data_csv_okt23["recognized_label"] = ndarray_labels(data_csv_okt23.index[0],data_csv_okt23.index[-1],aux,downsampled_freq)
    aux = df_timestamps(pd.read_csv(folder_path+"/dec23_state.txt",sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
    data_csv_dec23["recognized_label"] = ndarray_labels(data_csv_dec23.index[0],data_csv_dec23.index[-1],aux,downsampled_freq)


    #compute the detection of duty-cycle for each month
    data_csv_jun23['detected_cycles']=data_csv_jun23["Speed_order3"].apply(lambda x: detect_cycle(x))
    data_csv_aug23['detected_cycles']=data_csv_aug23["Speed_order3"].apply(lambda x: detect_cycle(x))
    data_csv_okt23['detected_cycles']=data_csv_okt23["Speed_order3"].apply(lambda x: detect_cycle(x))
    data_csv_dec23['detected_cycles']=data_csv_dec23["Speed_order3"].apply(lambda x: detect_cycle(x))

    detection_jun23 = boundaries_cycles(data_csv_jun23)
    detection_aug23 = boundaries_cycles(data_csv_aug23)
    detection_okt23 = boundaries_cycles(data_csv_okt23)
    detection_dec23 = boundaries_cycles(data_csv_dec23)


    #merge test data
    dfs_data = [data_csv_jun23,data_csv_aug23,data_csv_okt23,data_csv_dec23]
    dfs_detection_cycle = [detection_jun23,detection_aug23,detection_okt23,detection_dec23]

    file_name= ["jun23_cycle.txt" ,"aug23_cycle.txt","okt23_cycle.txt","dec23_cycle.txt"]


    for seed_cycle in seeds:
        for classifier_cycle in classifiers_cycle:
            #train
            clf_duty = train_cycle_classifier(classifier_cycle,X_res, y_res,seed_cycle)
            
            #test the duty-cycle classifier over the labels classifier with the 
            # different classifiers evaluated for recognize states
        

            #compute the sequence for each month of data
            all_sequences = []
            for i in range(len(dfs_data)):
                data_sequences = find_non_consecutive_sequences(dfs_data[i], dfs_detection_cycle[i],delta,column_name="recognized_label")
                all_sequences.append(data_sequences)

            for i in range(len(dfs_data)):
                test_sequences = all_sequences[i]

                # Encode the sequences for testing
                test_features = [le_state.transform(seq['non_consecutive_labels']) for seq in test_sequences]
                test_features_padded = pad_sequences(test_features, max_sequence_length)

                #test
                y_pred = clf_duty.predict(test_features_padded)

                # save the results in a file
                df_temp= pd.DataFrame()
                df_temp["start"]=dfs_detection_cycle[i]["start"]
                df_temp["end"]=dfs_detection_cycle[i]["end"]
                df_temp["label"]=le_cycle.inverse_transform(y_pred)

                if flag_save_results:
                    folder_path = dir_exp2_approach3+classifier_cycle+"/"+str(seed_cycle)+"/"+classifier_state+"/"
                    os.makedirs(folder_path, exist_ok=True)

                    create_reference_label_file(folder_path+file_name[i],df_temp)


3) classifier_cycle without seed - classifier_state with seed

In [None]:
seeds=list(range(0,10))
classifiers_cycle=["nb","xgboost"]
classifiers_state=["rf","dt","xtree","mlp"]

for seed in seeds:
    for classifier_state in classifiers_state:
        folder_path = dir_exp1_approach2+classifier_state+"/"+str(seed)+"/"
        aux= df_timestamps(pd.read_csv(folder_path+file_name_states_DS1[0],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
        recog_label_jun21 = ndarray_labels(data_DS1[0].index[0],data_DS1[0].index[-1],aux,downsampled_freq)
        aux = df_timestamps(pd.read_csv(folder_path+file_name_states_DS1[1],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
        recog_label_okt21 = ndarray_labels(data_DS1[1].index[0],data_DS1[1].index[-1],aux,downsampled_freq)
        aux = df_timestamps(pd.read_csv(folder_path+file_name_states_DS1[2],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
        recog_label_jan22 = ndarray_labels(data_DS1[2].index[0],data_DS1[2].index[-1],aux,downsampled_freq)
        aux = df_timestamps(pd.read_csv(folder_path+file_name_states_DS1[3],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
        recog_label_april22 = ndarray_labels(data_DS1[3].index[0],data_DS1[3].index[-1],aux,downsampled_freq)
        dfs_ref_state["recog_label"]=np.concatenate([recog_label_jun21, recog_label_okt21,recog_label_jan22,recog_label_april22])

        train_sequences = find_non_consecutive_sequences(dfs_ref_state, dfs_ref_cycle,delta,column_name="recog_label")

        # # Determine the maximum sequence length for padding and add some extra value
        max_sequence_length = max(len(seq['non_consecutive_labels']) for seq in train_sequences)+3

        # # Encode the sequences for training
        train_features = [le_state.transform(seq['non_consecutive_labels']) for seq in train_sequences]
        X_res = pad_sequences(train_features, max_sequence_length)
        y_res = le_cycle.transform([seq['label'] for seq in train_sequences])

        #import classified state_labels
        folder_path = dir_exp2_approach2+classifier_state+"/"+str(seed)
        aux = df_timestamps(pd.read_csv(folder_path+"/jun23_state.txt",sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
        data_csv_jun23["recognized_label"] = ndarray_labels(data_csv_jun23.index[0],data_csv_jun23.index[-1],aux,downsampled_freq)
        aux = df_timestamps(pd.read_csv(folder_path+"/aug23_state.txt",sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
        data_csv_aug23["recognized_label"] = ndarray_labels(data_csv_aug23.index[0],data_csv_aug23.index[-1],aux,downsampled_freq)
        aux = df_timestamps(pd.read_csv(folder_path+"/okt23_state.txt",sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
        data_csv_okt23["recognized_label"] = ndarray_labels(data_csv_okt23.index[0],data_csv_okt23.index[-1],aux,downsampled_freq)
        aux = df_timestamps(pd.read_csv(folder_path+"/dec23_state.txt",sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
        data_csv_dec23["recognized_label"] = ndarray_labels(data_csv_dec23.index[0],data_csv_dec23.index[-1],aux,downsampled_freq)


        #compute the detection of duty-cycle for each month
        data_csv_jun23['detected_cycles']=data_csv_jun23["Speed_order3"].apply(lambda x: detect_cycle(x))
        data_csv_aug23['detected_cycles']=data_csv_aug23["Speed_order3"].apply(lambda x: detect_cycle(x))
        data_csv_okt23['detected_cycles']=data_csv_okt23["Speed_order3"].apply(lambda x: detect_cycle(x))
        data_csv_dec23['detected_cycles']=data_csv_dec23["Speed_order3"].apply(lambda x: detect_cycle(x))

        detection_jun23 = boundaries_cycles(data_csv_jun23)
        detection_aug23 = boundaries_cycles(data_csv_aug23)
        detection_okt23 = boundaries_cycles(data_csv_okt23)
        detection_dec23 = boundaries_cycles(data_csv_dec23)


        #merge test data
        dfs_data = [data_csv_jun23,data_csv_aug23,data_csv_okt23,data_csv_dec23]
        dfs_detection_cycle = [detection_jun23,detection_aug23,detection_okt23,detection_dec23]

        file_name= ["jun23_cycle.txt" ,"aug23_cycle.txt","okt23_cycle.txt","dec23_cycle.txt"]

        for classifier_cycle in classifiers_cycle:
            
            #train
            clf_duty = train_cycle_classifier(classifier_cycle,X_res, y_res)
    
    #test the duty-cycle classifier over the labels classifier with the 
    # different classifiers evaluated for recognize states            

            #compute the sequence for each month of data
            all_sequences = []
            for i in range(len(dfs_data)):
                data_sequences = find_non_consecutive_sequences(dfs_data[i], dfs_detection_cycle[i],delta,column_name="recognized_label")
                all_sequences.append(data_sequences)

            for i in range(len(dfs_data)):
                test_sequences = all_sequences[i]

                # Encode the sequences for testing
                test_features = [le_state.transform(seq['non_consecutive_labels']) for seq in test_sequences]
                test_features_padded = pad_sequences(test_features, max_sequence_length)

                #test
                y_pred = clf_duty.predict(test_features_padded)

                # save the results in a file
                df_temp= pd.DataFrame()
                df_temp["start"]=dfs_detection_cycle[i]["start"]
                df_temp["end"]=dfs_detection_cycle[i]["end"]
                df_temp["label"]=le_cycle.inverse_transform(y_pred)

                if flag_save_results:
                    folder_path = dir_exp2_approach3+classifier_cycle+"/"+classifier_state+"/"+str(seed)+"/"
                    os.makedirs(folder_path, exist_ok=True)

                    create_reference_label_file(folder_path+file_name[i],df_temp)

4) classifier_cycle without seed - classifier_state without seed

In [None]:
classifiers=["xgboost","nb"]
for classifier_state in classifiers:
    folder_path = dir_exp1_approach2+classifier_state+"/"
    aux= df_timestamps(pd.read_csv(folder_path+file_name_states_DS1[0],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
    recog_label_jun21 = ndarray_labels(data_DS1[0].index[0],data_DS1[0].index[-1],aux,downsampled_freq)
    aux = df_timestamps(pd.read_csv(folder_path+file_name_states_DS1[1],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
    recog_label_okt21 = ndarray_labels(data_DS1[1].index[0],data_DS1[1].index[-1],aux,downsampled_freq)
    aux = df_timestamps(pd.read_csv(folder_path+file_name_states_DS1[2],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
    recog_label_jan22 = ndarray_labels(data_DS1[2].index[0],data_DS1[2].index[-1],aux,downsampled_freq)
    aux = df_timestamps(pd.read_csv(folder_path+file_name_states_DS1[3],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
    recog_label_april22 = ndarray_labels(data_DS1[3].index[0],data_DS1[3].index[-1],aux,downsampled_freq)
    dfs_ref_state["recog_label"]=np.concatenate([recog_label_jun21, recog_label_okt21,recog_label_jan22,recog_label_april22])

    train_sequences = find_non_consecutive_sequences(dfs_ref_state, dfs_ref_cycle,delta,column_name="recog_label")

    # # Determine the maximum sequence length for padding and add some extra value
    max_sequence_length = max(len(seq['non_consecutive_labels']) for seq in train_sequences)+3

    # # Encode the sequences for training
    train_features = [le_state.transform(seq['non_consecutive_labels']) for seq in train_sequences]
    X_res = pad_sequences(train_features, max_sequence_length)
    y_res = le_cycle.transform([seq['label'] for seq in train_sequences])

#import classified state_labels
    folder_path = dir_exp2_approach2+classifier_state
    aux = df_timestamps(pd.read_csv(folder_path+"/jun23_state.txt",sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
    data_csv_jun23["recognized_label"] = ndarray_labels(data_csv_jun23.index[0],data_csv_jun23.index[-1],aux,downsampled_freq)
    aux = df_timestamps(pd.read_csv(folder_path+"/aug23_state.txt",sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
    data_csv_aug23["recognized_label"] = ndarray_labels(data_csv_aug23.index[0],data_csv_aug23.index[-1],aux,downsampled_freq)
    aux = df_timestamps(pd.read_csv(folder_path+"/okt23_state.txt",sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
    data_csv_okt23["recognized_label"] = ndarray_labels(data_csv_okt23.index[0],data_csv_okt23.index[-1],aux,downsampled_freq)
    aux = df_timestamps(pd.read_csv(folder_path+"/dec23_state.txt",sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
    data_csv_dec23["recognized_label"] = ndarray_labels(data_csv_dec23.index[0],data_csv_dec23.index[-1],aux,downsampled_freq)


    #compute the detection of duty-cycle for each month
    data_csv_jun23['detected_cycles']=data_csv_jun23["Speed_order3"].apply(lambda x: detect_cycle(x))
    data_csv_aug23['detected_cycles']=data_csv_aug23["Speed_order3"].apply(lambda x: detect_cycle(x))
    data_csv_okt23['detected_cycles']=data_csv_okt23["Speed_order3"].apply(lambda x: detect_cycle(x))
    data_csv_dec23['detected_cycles']=data_csv_dec23["Speed_order3"].apply(lambda x: detect_cycle(x))

    detection_jun23 = boundaries_cycles(data_csv_jun23)
    detection_aug23 = boundaries_cycles(data_csv_aug23)
    detection_okt23 = boundaries_cycles(data_csv_okt23)
    detection_dec23 = boundaries_cycles(data_csv_dec23)


    #merge test data
    dfs_data = [data_csv_jun23,data_csv_aug23,data_csv_okt23,data_csv_dec23]
    dfs_detection_cycle = [detection_jun23,detection_aug23,detection_okt23,detection_dec23]

    file_name= ["jun23_cycle.txt" ,"aug23_cycle.txt","okt23_cycle.txt","dec23_cycle.txt"]


    for classifier_cycle in classifiers:
        #train
        clf_duty = train_cycle_classifier(classifier_cycle,X_res, y_res)
    
    #test the duty-cycle classifier over the labels classifier with the 
    # different classifiers evaluated for recognize states

        #compute the sequence for each month of data
        all_sequences = []
        for i in range(len(dfs_data)):
            data_sequences = find_non_consecutive_sequences(dfs_data[i], dfs_detection_cycle[i],delta,column_name="recognized_label")
            all_sequences.append(data_sequences)

        for i in range(len(dfs_data)):
            test_sequences = all_sequences[i]

            # Encode the sequences for testing
            test_features = [le_state.transform(seq['non_consecutive_labels']) for seq in test_sequences]
            test_features_padded = pad_sequences(test_features, max_sequence_length)

            # # Normalize features
            # test_features_padded = scaler_cycle.fit_transform(test_features_padded)

            #test
            y_pred = clf_duty.predict(test_features_padded)

            # save the results in a file
            df_temp= pd.DataFrame()
            df_temp["start"]=dfs_detection_cycle[i]["start"]
            df_temp["end"]=dfs_detection_cycle[i]["end"]
            df_temp["label"]=le_cycle.inverse_transform(y_pred)

            if flag_save_results:
                folder_path = dir_exp2_approach3+classifier_cycle+"/"+classifier_state+"/"
                os.makedirs(folder_path, exist_ok=True)

                create_reference_label_file(folder_path+file_name[i],df_temp)


## performance

In [None]:
df_results_exp2 = pd.DataFrame(columns=['cycle_classifier', 'state_classifier', 
                                        'Abnormal mean F1-score','Abnormal mean Precision','Abnormal mean Recall',
                                        'Normal mean F1-score','Normal mean Precision','Normal mean Recall',
                                        'Overall mean F1-score','Overall mean Precision','Overall mean Recall',
                                        'Abnormal std F1-score','Abnormal std Precision','Abnormal std Recall',
                                        'Normal std F1-score','Normal std Precision','Normal std Recall',
                                        'Overall std F1-score','Overall std Precision','Overall std Recall'])

seeds=list(range(0,10))
classifiers=["xgboost","rf","dt","nb","xtree","mlp"]
classifiers_w_seed=["rf","dt","xtree","mlp"]
classifiers_wo_seed=["xgboost","nb"]
dir = './results/reference_cycle_labels/'
reference_path = dir
collar = 202.75


# 1) classifier_cycle with seed - classifier_state with seed
for classifier_cycle in classifiers_w_seed:
    for classifier_state in classifiers_w_seed:
        detection_files_abnormal, f1score_files_abnormal, precision_files_abnormal, recall_files_abnormal = [],[],[],[]
        detection_files_normal, f1score_files_normal, precision_files_normal, recall_files_normal = [],[],[],[]
        f1score_files_overall, precision_files_overall, recall_files_overall = [],[],[]
        for seed_cycle in seeds:
            for seed_state in seeds:
                result_path = dir_exp2_approach3+classifier_cycle+"/"+str(seed_cycle)+"/"+classifier_state+"/"+str(seed_state)+"/"
                f1score_file, precision_file, recall_file, f1score_abnormal, precision_abnormal, recall_abnormal, f1score_normal, precision_normal, recall_normal = compute_classification_sedeval(reference_path,result_path,collar)
                check_nan=(f1score_file, precision_file, recall_file, f1score_abnormal, precision_abnormal, recall_abnormal, f1score_normal, precision_normal, recall_normal)
                if all(check_nan):
                    f1score_files_overall.append(f1score_file)
                    precision_files_overall.append(precision_file)
                    recall_files_overall.append(recall_file)
                    f1score_files_abnormal.append(f1score_abnormal)
                    precision_files_abnormal.append(precision_abnormal)
                    recall_files_abnormal.append(recall_abnormal)
                    f1score_files_normal.append(f1score_normal)
                    precision_files_normal.append(precision_normal)
                    recall_files_normal.append(recall_normal)

        dflocal = pd.DataFrame({'cycle_classifier':classifier_cycle, 'state_classifier':classifier_state, 
                            'Abnormal mean F1-score':np.mean(f1score_files_abnormal)*100,
                            'Abnormal mean Precision':np.mean(precision_files_abnormal)*100,
                            'Abnormal mean Recall':np.mean(recall_files_abnormal)*100,
                            'Normal mean F1-score':np.mean(f1score_files_normal)*100,
                            'Normal mean Precision':np.mean(precision_files_normal)*100,
                            'Normal mean Recall':np.mean(recall_files_normal)*100,
                            'Overall mean F1-score':np.mean(f1score_files_overall)*100,
                            'Overall mean Precision':np.mean(precision_files_overall)*100,
                            'Overall mean Recall':np.mean(recall_files_overall)*100,
                            'Abnormal std F1-score':np.std(f1score_files_abnormal)*100,
                            'Abnormal std Precision':np.std(precision_files_abnormal)*100,
                            'Abnormal std Recall':np.std(recall_files_abnormal)*100,
                            'Normal std F1-score':np.std(f1score_files_normal)*100,
                            'Normal std Precision':np.std(precision_files_normal)*100,
                            'Normal std Recall':np.std(recall_files_normal)*100,
                            'Overall std F1-score':np.std(f1score_files_overall)*100,
                            'Overall std Precision':np.std(precision_files_overall)*100,
                            'Overall std Recall':np.std(recall_files_overall)*100},index=[0])
        
        df_results_exp2 = pd.concat([df_results_exp2, dflocal], ignore_index=True)

# 2) classifier_cycle with seed - classifier_state without seed
for classifier_cycle in classifiers_w_seed:
    for classifier_state in classifiers_wo_seed:
        detection_files_abnormal, f1score_files_abnormal, precision_files_abnormal, recall_files_abnormal = [],[],[],[]
        detection_files_normal, f1score_files_normal, precision_files_normal, recall_files_normal = [],[],[],[]
        f1score_files_overall, precision_files_overall, recall_files_overall = [],[],[]
        for seed_cycle in seeds:
            result_path = dir_exp2_approach3+classifier_cycle+"/"+str(seed_cycle)+"/"+classifier_state+"/"
            f1score_file, precision_file, recall_file, f1score_abnormal, precision_abnormal, recall_abnormal, f1score_normal, precision_normal, recall_normal = compute_classification_sedeval(reference_path,result_path,collar)
            check_nan=(f1score_file, precision_file, recall_file, f1score_abnormal, precision_abnormal, recall_abnormal, f1score_normal, precision_normal, recall_normal)
            if all(check_nan):
                f1score_files_overall.append(f1score_file)
                precision_files_overall.append(precision_file)
                recall_files_overall.append(recall_file)
                f1score_files_abnormal.append(f1score_abnormal)
                precision_files_abnormal.append(precision_abnormal)
                recall_files_abnormal.append(recall_abnormal)
                f1score_files_normal.append(f1score_normal)
                precision_files_normal.append(precision_normal)
                recall_files_normal.append(recall_normal)

        dflocal = pd.DataFrame({'cycle_classifier':classifier_cycle, 'state_classifier':classifier_state, 
                            'Abnormal mean F1-score':np.mean(f1score_files_abnormal)*100,
                            'Abnormal mean Precision':np.mean(precision_files_abnormal)*100,
                            'Abnormal mean Recall':np.mean(recall_files_abnormal)*100,
                            'Normal mean F1-score':np.mean(f1score_files_normal)*100,
                            'Normal mean Precision':np.mean(precision_files_normal)*100,
                            'Normal mean Recall':np.mean(recall_files_normal)*100,
                            'Overall mean F1-score':np.mean(f1score_files_overall)*100,
                            'Overall mean Precision':np.mean(precision_files_overall)*100,
                            'Overall mean Recall':np.mean(recall_files_overall)*100,
                            'Abnormal std F1-score':np.std(f1score_files_abnormal)*100,
                            'Abnormal std Precision':np.std(precision_files_abnormal)*100,
                            'Abnormal std Recall':np.std(recall_files_abnormal)*100,
                            'Normal std F1-score':np.std(f1score_files_normal)*100,
                            'Normal std Precision':np.std(precision_files_normal)*100,
                            'Normal std Recall':np.std(recall_files_normal)*100,
                            'Overall std F1-score':np.std(f1score_files_overall)*100,
                            'Overall std Precision':np.std(precision_files_overall)*100,
                            'Overall std Recall':np.std(recall_files_overall)*100},index=[0])
        
        df_results_exp2 = pd.concat([df_results_exp2, dflocal], ignore_index=True)

# 3) classifier_cycle without seed - classifier_state with seed
for classifier_cycle in classifiers_wo_seed:
    for classifier_state in classifiers_w_seed:
        detection_files_abnormal, f1score_files_abnormal, precision_files_abnormal, recall_files_abnormal = [],[],[],[]
        f1score_files_overall, precision_files_overall, recall_files_overall = [],[],[]
        for seed_state in seeds:
            result_path = dir_exp2_approach3+classifier_cycle+"/"+classifier_state+"/"+str(seed_state)+"/"
            f1score_file, precision_file, recall_file, f1score_abnormal, precision_abnormal, recall_abnormal, f1score_normal, precision_normal, recall_normal = compute_classification_sedeval(reference_path,result_path,collar)
            check_nan=(f1score_file, precision_file, recall_file, f1score_abnormal, precision_abnormal, recall_abnormal, f1score_normal, precision_normal, recall_normal)
            if all(check_nan):
                f1score_files_overall.append(f1score_file)
                precision_files_overall.append(precision_file)
                recall_files_overall.append(recall_file)
                f1score_files_abnormal.append(f1score_abnormal)
                precision_files_abnormal.append(precision_abnormal)
                recall_files_abnormal.append(recall_abnormal)
                f1score_files_normal.append(f1score_normal)
                precision_files_normal.append(precision_normal)
                recall_files_normal.append(recall_normal)

        dflocal = pd.DataFrame({'cycle_classifier':classifier_cycle, 'state_classifier':classifier_state, 
                            'Abnormal mean F1-score':np.mean(f1score_files_abnormal)*100,
                            'Abnormal mean Precision':np.mean(precision_files_abnormal)*100,
                            'Abnormal mean Recall':np.mean(recall_files_abnormal)*100,
                            'Normal mean F1-score':np.mean(f1score_files_normal)*100,
                            'Normal mean Precision':np.mean(precision_files_normal)*100,
                            'Normal mean Recall':np.mean(recall_files_normal)*100,
                            'Overall mean F1-score':np.mean(f1score_files_overall)*100,
                            'Overall mean Precision':np.mean(precision_files_overall)*100,
                            'Overall mean Recall':np.mean(recall_files_overall)*100,
                            'Abnormal std F1-score':np.std(f1score_files_abnormal)*100,
                            'Abnormal std Precision':np.std(precision_files_abnormal)*100,
                            'Abnormal std Recall':np.std(recall_files_abnormal)*100,
                            'Normal std F1-score':np.std(f1score_files_normal)*100,
                            'Normal std Precision':np.std(precision_files_normal)*100,
                            'Normal std Recall':np.std(recall_files_normal)*100,
                            'Overall std F1-score':np.std(f1score_files_overall)*100,
                            'Overall std Precision':np.std(precision_files_overall)*100,
                            'Overall std Recall':np.std(recall_files_overall)*100},index=[0])
        
        df_results_exp2 = pd.concat([df_results_exp2, dflocal], ignore_index=True)

# 4) classifier_cycle without seed - classifier_state without seed
for classifier_cycle in classifiers_wo_seed:
    for classifier_state in classifiers_wo_seed:
        result_path = dir_exp2_approach3+classifier_cycle+"/"+classifier_state+"/"
        f1score_file, precision_file, recall_file, f1score_abnormal, precision_abnormal, recall_abnormal, f1score_normal, precision_normal, recall_normal = compute_classification_sedeval(reference_path,result_path,collar)
        check_nan=(f1score_file, precision_file, recall_file, f1score_abnormal, precision_abnormal, recall_abnormal, f1score_normal, precision_normal, recall_normal)
        if all(check_nan):
            dflocal = pd.DataFrame({'cycle_classifier':classifier_cycle, 'state_classifier':classifier_state, 
                                'Abnormal mean F1-score':f1score_abnormal*100,
                                'Abnormal mean Precision':precision_abnormal*100,
                                'Abnormal mean Recall':recall_abnormal*100,
                                'Normal mean F1-score':(f1score_normal)*100,
                                'Normal mean Precision':(precision_normal)*100,
                                'Normal mean Recall':(recall_normal)*100,
                                'Overall mean F1-score':f1score_file*100,
                                'Overall mean Precision':precision_file*100,
                                'Overall mean Recall':recall_file*100},index=[0])
        
            df_results_exp2 = pd.concat([df_results_exp2, dflocal], ignore_index=True)

df_results_exp2.to_csv(dir_exp2_approach3+'experiment2_results.csv',index=False)

# Deployment on MCU

Data preparation and pre-processing

In [None]:
df_dataset= pd.concat([data_csv_jun21, data_csv_okt21,data_csv_jan22,data_csv_april22])

df_dataset["ref_label"]= np.concatenate((true_label_jun21, true_label_okt21,true_label_jan22,true_label_april22), axis=0)    
df_dataset["ref_label_cycle"]= np.concatenate((true_label_cycle_jun21, true_label_cycle_okt21,true_label_cycle_jan22,true_label_cycle_april22), axis=0)    

removed_indices = df_dataset[df_dataset['ref_label'].isnull()].index.tolist()
df_dataset = df_dataset[df_dataset['ref_label'].notnull()]
df_dataset=df_dataset.reset_index()

# remove the recognized_label column added in the experiment1 or experiment2
if 'recognized_label' in df_dataset.columns:
    df_dataset = df_dataset.drop('recognized_label', axis=1)

x = df_dataset[df_dataset.columns[1:-2]]
y_cycle = df_dataset[df_dataset.columns[-1]]
y_state = df_dataset[df_dataset.columns[-2]]

y_state [y_state=='E']='B'

# normalize feature to range [0;1]
scaler = MinMaxScaler(clip=True)
scaler.fit(x)
x_train = pd.DataFrame(scaler.transform(x), columns=x.columns)

y_state [y_state=='E']='B'

balance ds1 for training

In [None]:
x_train_balanced, y_train_balanced,_ = balance_dataset(x_train,y_state)

# Print balanced dataset
unique_values, counts = np.unique(y_state, return_counts=True)
value_counts = dict(zip(unique_values, counts))
value_porcentages = dict(zip(unique_values, counts/sum(counts)*100))
print("Value class-counts in Unbalanced dataset:",value_counts)
print("Value class-porcentage in Unbalanced dataset:",value_porcentages)

unique_values, counts = np.unique(y_train_balanced, return_counts=True)
value_counts = dict(zip(unique_values, counts))
value_porcentages = dict(zip(unique_values, counts/sum(counts)*100))
print("Value class-counts in Balanced dataset:",value_counts)
print("Value class-porcentage in Unbalanced dataset:",value_porcentages)

del unique_values,counts,value_counts,value_porcentages

In [None]:
print("SCALED VALUES TO BE ADDED IN THE C-CODE")
print(f"Minimun: ",scaler.data_min_)
print(f"Maximun: ",scaler.data_max_)

train and test data

In [None]:
#test data (DS2)
df_testset= pd.concat([data_csv_jun23, data_csv_aug23,data_csv_okt23,data_csv_dec23])
x_test_float = pd.DataFrame(scaler.transform(df_testset), columns=df_testset.columns,index=df_testset.index)

x_train_float= x_train_balanced.copy()
x_train_float.describe()

In [None]:
dfs_ref_state= pd.concat(data_DS1)
dfs_ref_cycle = pd.concat(timestamps_cycle_DS1)

data_csv_jun23_float=pd.DataFrame(scaler.transform(data_csv_jun23), columns=data_csv_jun23.columns,index=data_csv_jun23.index)
data_csv_aug23_float=pd.DataFrame(scaler.transform(data_csv_aug23), columns=data_csv_aug23.columns,index=data_csv_aug23.index)
data_csv_okt23_float=pd.DataFrame(scaler.transform(data_csv_okt23), columns=data_csv_okt23.columns,index=data_csv_okt23.index)
data_csv_dec23_float=pd.DataFrame(scaler.transform(data_csv_dec23), columns=data_csv_dec23.columns,index=data_csv_dec23.index)

## mlp + xtree

In [None]:
import tensorflow as tf
from tensorflow.keras import activations
from tensorflow.keras import layers,metrics
print(tf.__version__)
import os
os.environ["CUDA_VISIBLE_DEVICES"] = '-1' 
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'

In [None]:
classifier_state="mlp"
classifier_cycle="xtree"
seed=0

dir_model_pred = "./results/approach3/MCU/"
model_path = "./results/c_code/approach3/"
TF_MODEL = "mlp_float.keras"
TF_MODEL_I8="mlp_int8.tflite"

tf.random.set_seed(seed)

In [None]:
clf_state_float = train_state_supervised_classifier(classifier_state,x_train_float, y_train_balanced,seed)
clf_state_float

In [None]:
# Reproduce in TensorFlow the MLP obtained with Sklearn
model = tf.keras.Sequential()
model.add(layers.Dense(12, input_dim=len(x_train_float.columns), activation='relu')) 
model.add(layers.Dense(4, activation='softmax'))
optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)
model.compile(optimizer=optimizer,loss='SparseCategoricalCrossentropy',
              metrics=['sparse_categorical_accuracy'])

model.summary()

In [None]:
NUM_EPOCHS=50
history = model.fit(x=x_train_float, y=y_train_balanced,epochs=NUM_EPOCHS,shuffle=True)
model.save(model_path+TF_MODEL)

### float32

convert model

In [None]:
model_load = tf.keras.models.load_model(model_path+TF_MODEL)

In [None]:
#conversion without quantization
TFL_MODEL_FILE= model_path +"mlp_float.tflite"
converter = tf.lite.TFLiteConverter.from_keras_model(model_load)
tflite_model = converter.convert()
open(TFL_MODEL_FILE, "wb").write(tflite_model)

In [None]:
interpreter = tf.lite.Interpreter(model_path=TFL_MODEL_FILE)
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
interpreter.allocate_tensors()

In [None]:
output_data = []

x_test_float32 = x_test_float.to_numpy().astype("float32")

for x in x_test_float32:
  interpreter.set_tensor(input_details[0]['index'], x.reshape(input_details[0]['shape']))
  interpreter.invoke()
  output_data.append(interpreter.get_tensor(output_details[0]['index'])[0])

y_test_pred_lite = np.array(output_data)

In [None]:
print(input_details[0])
print(output_details[0])

classify states

In [None]:
#test state-cycles
y_predict = np.argmax(y_test_pred_lite, axis=1)
y_pred_smoothed = smooth_labels(y_predict,3)     #apply 3rd median filter
y_recognized=le_state.inverse_transform(y_pred_smoothed.astype(int))
df_temp=df_testset.copy()
df_temp["recognized_label"]=y_recognized
df_temp=df_temp.reset_index()

#classify duty-cycle
df_recognized_states_jun23 = create_segments_state(datetime.datetime(2023, 6, 1),datetime.datetime(2023, 7, 1),df_temp)
df_recognized_states_aug23 = create_segments_state(datetime.datetime(2023, 8, 1),datetime.datetime(2023, 9, 1),df_temp)
df_recognized_states_okt23 = create_segments_state(datetime.datetime(2023, 10, 1),datetime.datetime(2023, 11, 1),df_temp)
df_recognized_states_dec23 = create_segments_state(datetime.datetime(2023, 12, 1),datetime.datetime(2024, 1, 1),df_temp)

folder_path = dir_model_pred+"/float"
os.makedirs(folder_path, exist_ok=True)

if flag_save_results:
    create_reference_label_file(folder_path+"/jun23_state.txt",df_recognized_states_jun23)
    create_reference_label_file(folder_path+"/aug23_state.txt",df_recognized_states_aug23)
    create_reference_label_file(folder_path+"/okt23_state.txt",df_recognized_states_okt23)
    create_reference_label_file(folder_path+"/dec23_state.txt",df_recognized_states_dec23)        

classify cycle

In [None]:
folder_path_train_labels = dir_exp1_approach2+classifier_state+"/"+str(seed)+"/"
aux= df_timestamps(pd.read_csv(folder_path_train_labels+file_name_states_DS1[0],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
recog_label_jun21 = ndarray_labels(data_DS1[0].index[0],data_DS1[0].index[-1],aux,downsampled_freq)
aux = df_timestamps(pd.read_csv(folder_path_train_labels+file_name_states_DS1[1],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
recog_label_okt21 = ndarray_labels(data_DS1[1].index[0],data_DS1[1].index[-1],aux,downsampled_freq)
aux = df_timestamps(pd.read_csv(folder_path_train_labels+file_name_states_DS1[2],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
recog_label_jan22 = ndarray_labels(data_DS1[2].index[0],data_DS1[2].index[-1],aux,downsampled_freq)
aux = df_timestamps(pd.read_csv(folder_path_train_labels+file_name_states_DS1[3],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
recog_label_april22 = ndarray_labels(data_DS1[3].index[0],data_DS1[3].index[-1],aux,downsampled_freq)
dfs_ref_state["recog_label"]=np.concatenate([recog_label_jun21, recog_label_okt21,recog_label_jan22,recog_label_april22])

train_sequences = find_non_consecutive_sequences(dfs_ref_state, dfs_ref_cycle,delta,column_name="recog_label")

# # Determine the maximum sequence length for padding and add some extra value
max_sequence_length = max(len(seq['non_consecutive_labels']) for seq in train_sequences)+3

# # Encode the sequences for training
train_features = [le_state.transform(seq['non_consecutive_labels']) for seq in train_sequences]
train_features_padded = pad_sequences(train_features, max_sequence_length)
train_labels = le_cycle.transform([seq['label'] for seq in train_sequences])
X_res, y_res = train_features_padded, train_labels

#import classified state_labels
aux = df_timestamps(pd.read_csv(folder_path+"/jun23_state.txt",sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
data_csv_jun23_float["recognized_label"] = ndarray_labels(data_csv_jun23_float.index[0],data_csv_jun23_float.index[-1],aux,downsampled_freq)
aux = df_timestamps(pd.read_csv(folder_path+"/aug23_state.txt",sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
data_csv_aug23_float["recognized_label"] = ndarray_labels(data_csv_aug23_float.index[0],data_csv_aug23_float.index[-1],aux,downsampled_freq)
aux = df_timestamps(pd.read_csv(folder_path+"/okt23_state.txt",sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
data_csv_okt23_float["recognized_label"] = ndarray_labels(data_csv_okt23_float.index[0],data_csv_okt23_float.index[-1],aux,downsampled_freq)
aux = df_timestamps(pd.read_csv(folder_path+"/dec23_state.txt",sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
data_csv_dec23_float["recognized_label"] = ndarray_labels(data_csv_dec23_float.index[0],data_csv_dec23_float.index[-1],aux,downsampled_freq)

In [None]:
#compute the detection of duty-cycle for each month
threshold_speed = 2.5
threshold_speed_normalized = (threshold_speed-scaler.data_min_[2])/scaler.data_range_[2]
# threshold_speed_quant = (threshold_speed_normalized*(2**32 - 1)).astype('float')
data_csv_jun23_float.loc[data_csv_jun23_float['Speed_order3'] > threshold_speed_normalized ,'detected_cycles'] = 'Cycle'
data_csv_aug23_float.loc[data_csv_aug23_float['Speed_order3'] > threshold_speed_normalized ,'detected_cycles'] = 'Cycle'
data_csv_okt23_float.loc[data_csv_okt23_float['Speed_order3'] > threshold_speed_normalized ,'detected_cycles'] = 'Cycle'
data_csv_dec23_float.loc[data_csv_dec23_float['Speed_order3'] > threshold_speed_normalized ,'detected_cycles'] = 'Cycle'

detection_jun23 = boundaries_cycles(data_csv_jun23_float)
detection_aug23 = boundaries_cycles(data_csv_aug23_float)
detection_okt23 = boundaries_cycles(data_csv_okt23_float)
detection_dec23 = boundaries_cycles(data_csv_dec23_float)

#merge test data
dfs_data = [data_csv_jun23_float,data_csv_aug23_float,data_csv_okt23_float,data_csv_dec23_float]
dfs_detection_cycle = [detection_jun23,detection_aug23,detection_okt23,detection_dec23]

file_name= ["jun23_cycle.txt" ,"aug23_cycle.txt", 
            "okt23_cycle.txt","dec23_cycle.txt"]

#train
clf_duty_float = train_cycle_classifier(classifier_cycle,X_res, y_res,seed)

#compute the sequence for each month of data
all_sequences = []
for i in range(len(dfs_data)):
    data_sequences = find_non_consecutive_sequences(dfs_data[i], dfs_detection_cycle[i],delta,column_name="recognized_label")
    all_sequences.append(data_sequences)

for i in range(len(dfs_data)):
    test_sequences = all_sequences[i]

    # Encode the sequences for testing
    test_features = [le_state.transform(seq['non_consecutive_labels']) for seq in test_sequences]
    test_features_padded = pad_sequences(test_features, max_sequence_length)

    #test
    y_pred = clf_duty_float.predict(test_features_padded)

    # save the results in a file
    df_temp= pd.DataFrame()
    df_temp["start"]=dfs_detection_cycle[i]["start"]
    df_temp["end"]=dfs_detection_cycle[i]["end"]
    df_temp["label"]=le_cycle.inverse_transform(y_pred)

    if flag_save_results:
        create_reference_label_file(folder_path+"/"+file_name[i],df_temp)

convert model to C

In [None]:
!xxd -i ./results/c_code/approach3/mlp_float.tflite > ./results/c_code/approach3/mlp_float.h
!cat ./results/c_code/approach3/mlp_float.h

In [None]:
from emlearn import convert

cmodel = convert(clf_duty_float, method='inline',dtype='float')
cmodel.save(file = model_path+"clf_duty_float.h", name='clf_duty')

### int8

In [None]:
def representative_data_gen():
  for i_value in tf.data.Dataset.from_tensor_slices(x_test_float).batch(1).take(int(len(x_test_float)/2)):
    i_value_f32 = tf.dtypes.cast(i_value, tf.float32)
    yield [i_value_f32]

In [None]:
converter = tf.lite.TFLiteConverter.from_keras_model(model_load)
converter.representative_dataset = tf.lite.RepresentativeDataset(representative_data_gen)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.int8
converter.inference_output_type = tf.int8

In [None]:
tflite_model_int_quant = converter.convert()
TF_MODEL_I8= model_path + "mlp_int8.tflite"
open(TF_MODEL_I8, "wb").write(tflite_model_int_quant)

In [None]:
interpreter = tf.lite.Interpreter(model_path=TF_MODEL_I8)
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
interpreter.allocate_tensors()

# Here we manually quantize the float32 data to provide int8 inputs
input_scale, input_zero_point = input_details[0]['quantization']
np_x_test = x_test_float32 / input_scale + input_zero_point

output_data = []

for x in np_x_test:
  interpreter.set_tensor(input_details[0]['index'], x.reshape(input_details[0]['shape']).astype("int8"))
  interpreter.invoke()
  output_data.append(interpreter.get_tensor(output_details[0]['index'])[0])

y_test_pred_lite_iquant = np.array(output_data)

In [None]:
print(input_details[0])
print(output_details[0])

classify states

In [None]:
#test state-cycles
y_predict = np.argmax(y_test_pred_lite_iquant, axis=1)
y_pred_smoothed = smooth_labels(y_predict,3)     #apply 3rd median filter
y_recognized=le_state.inverse_transform(y_pred_smoothed.astype(int))
df_temp=df_testset.copy()
df_temp["recognized_label"]=y_recognized
df_temp=df_temp.reset_index()

#classify duty-cycle
df_recognized_states_jun23 = create_segments_state(datetime.datetime(2023, 6, 1),datetime.datetime(2023, 7, 1),df_temp)
df_recognized_states_aug23 = create_segments_state(datetime.datetime(2023, 8, 1),datetime.datetime(2023, 9, 1),df_temp)
df_recognized_states_okt23 = create_segments_state(datetime.datetime(2023, 10, 1),datetime.datetime(2023, 11, 1),df_temp)
df_recognized_states_dec23 = create_segments_state(datetime.datetime(2023, 12, 1),datetime.datetime(2024, 1, 1),df_temp)

folder_path = dir_model_pred+"int8"
os.makedirs(folder_path, exist_ok=True)

if True:
    create_reference_label_file(folder_path+"/jun23_state.txt",df_recognized_states_jun23)
    create_reference_label_file(folder_path+"/aug23_state.txt",df_recognized_states_aug23)
    create_reference_label_file(folder_path+"/okt23_state.txt",df_recognized_states_okt23)
    create_reference_label_file(folder_path+"/dec23_state.txt",df_recognized_states_dec23)        

classify cycle

In [None]:
folder_path_train_labels = dir_exp1_approach2+classifier_state+"/"+str(seed)+"/"
aux= df_timestamps(pd.read_csv(folder_path_train_labels+file_name_states_DS1[0],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
recog_label_jun21 = ndarray_labels(data_DS1[0].index[0],data_DS1[0].index[-1],aux,downsampled_freq)
aux = df_timestamps(pd.read_csv(folder_path_train_labels+file_name_states_DS1[1],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
recog_label_okt21 = ndarray_labels(data_DS1[1].index[0],data_DS1[1].index[-1],aux,downsampled_freq)
aux = df_timestamps(pd.read_csv(folder_path_train_labels+file_name_states_DS1[2],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
recog_label_jan22 = ndarray_labels(data_DS1[2].index[0],data_DS1[2].index[-1],aux,downsampled_freq)
aux = df_timestamps(pd.read_csv(folder_path_train_labels+file_name_states_DS1[3],sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
recog_label_april22 = ndarray_labels(data_DS1[3].index[0],data_DS1[3].index[-1],aux,downsampled_freq)
dfs_ref_state["recog_label"]=np.concatenate([recog_label_jun21, recog_label_okt21,recog_label_jan22,recog_label_april22])

train_sequences = find_non_consecutive_sequences(dfs_ref_state, dfs_ref_cycle,delta,column_name="recog_label")

# # Determine the maximum sequence length for padding and add some extra value
max_sequence_length = max(len(seq['non_consecutive_labels']) for seq in train_sequences)+3

# # Encode the sequences for training
train_features = [le_state.transform(seq['non_consecutive_labels']) for seq in train_sequences]
train_features_padded = pad_sequences(train_features, max_sequence_length)
train_labels = le_cycle.transform([seq['label'] for seq in train_sequences])
X_res, y_res = train_features_padded, train_labels

#import classified state_labels
aux = df_timestamps(pd.read_csv(folder_path+"/jun23_state.txt",sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
data_csv_jun23_float["recognized_label"] = ndarray_labels(data_csv_jun23_float.index[0],data_csv_jun23_float.index[-1],aux,downsampled_freq)
aux = df_timestamps(pd.read_csv(folder_path+"/aug23_state.txt",sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
data_csv_aug23_float["recognized_label"] = ndarray_labels(data_csv_aug23_float.index[0],data_csv_aug23_float.index[-1],aux,downsampled_freq)
aux = df_timestamps(pd.read_csv(folder_path+"/okt23_state.txt",sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
data_csv_okt23_float["recognized_label"] = ndarray_labels(data_csv_okt23_float.index[0],data_csv_okt23_float.index[-1],aux,downsampled_freq)
aux = df_timestamps(pd.read_csv(folder_path+"/dec23_state.txt",sep='\t',names=["Time(Seconds)","Length(Seconds)","Label(string)"]))
data_csv_dec23_float["recognized_label"] = ndarray_labels(data_csv_dec23_float.index[0],data_csv_dec23_float.index[-1],aux,downsampled_freq)

In [None]:
#compute the detection of duty-cycle for each month
threshold_speed = 2.5
threshold_speed_normalized = (threshold_speed-scaler.data_min_[2])/scaler.data_range_[2]
# threshold_speed_quant = (threshold_speed_normalized*(2**32 - 1)).astype('float')
data_csv_jun23_float.loc[data_csv_jun23_float['Speed_order3'] > threshold_speed_normalized ,'detected_cycles'] = 'Cycle'
data_csv_aug23_float.loc[data_csv_aug23_float['Speed_order3'] > threshold_speed_normalized ,'detected_cycles'] = 'Cycle'
data_csv_okt23_float.loc[data_csv_okt23_float['Speed_order3'] > threshold_speed_normalized ,'detected_cycles'] = 'Cycle'
data_csv_dec23_float.loc[data_csv_dec23_float['Speed_order3'] > threshold_speed_normalized ,'detected_cycles'] = 'Cycle'

detection_jun23 = boundaries_cycles(data_csv_jun23_float)
detection_aug23 = boundaries_cycles(data_csv_aug23_float)
detection_okt23 = boundaries_cycles(data_csv_okt23_float)
detection_dec23 = boundaries_cycles(data_csv_dec23_float)

#merge test data
dfs_data = [data_csv_jun23_float,data_csv_aug23_float,data_csv_okt23_float,data_csv_dec23_float]
dfs_detection_cycle = [detection_jun23,detection_aug23,detection_okt23,detection_dec23]

file_name= ["jun23_cycle.txt" ,"aug23_cycle.txt", 
            "okt23_cycle.txt","dec23_cycle.txt"]

#train
clf_duty_uint8 = train_cycle_classifier(classifier_cycle,X_res, y_res,seed)

#compute the sequence for each month of data
all_sequences = []
for i in range(len(dfs_data)):
    data_sequences = find_non_consecutive_sequences(dfs_data[i], dfs_detection_cycle[i],delta,column_name="recognized_label")
    all_sequences.append(data_sequences)

for i in range(len(dfs_data)):
    test_sequences = all_sequences[i]

    # Encode the sequences for testing
    test_features = [le_state.transform(seq['non_consecutive_labels']) for seq in test_sequences]
    test_features_padded = pad_sequences(test_features, max_sequence_length)

    #test
    y_pred = clf_duty_uint8.predict(test_features_padded)

    # save the results in a file
    df_temp= pd.DataFrame()
    df_temp["start"]=dfs_detection_cycle[i]["start"]
    df_temp["end"]=dfs_detection_cycle[i]["end"]
    df_temp["label"]=le_cycle.inverse_transform(y_pred)

    if flag_save_results:
        folder_path = dir_model_pred + "int8/"
        os.makedirs(folder_path, exist_ok=True)

        create_reference_label_file(folder_path+file_name[i],df_temp)

convert model to C

In [None]:
!xxd -i  ./results/c_code/approach3/mlp_int8.tflite > ./results/c_code/approach3/mlp_int8.h

In [None]:
from emlearn import convert

cmodel = convert(clf_duty_uint8, method='inline',dtype='uint8_t')
cmodel.save(file = model_path + "clf_duty_uint8.h", name='clf_duty')

### performance

In [None]:
df_results = pd.DataFrame(columns=['quantization','state_classifier', "detection mean",
                                    'Abnormal mean F1-score','Normal mean F1-score','Overall mean F1-score'])

dir = './results/reference_cycle_labels/'
reference_path = dir
collar = 202.75

classifier_states=["mlp"]
classifier_cycles=["xtree"]
quantizations=["float","int8"]
for classifier_state,classifier_cycle in zip(classifier_states,classifier_cycles):
    for q in quantizations:
        result_path = dir_model_pred +q+"/"
        f1score_file, precision_file, recall_file, f1score_abnormal, precision_abnormal, recall_abnormal , f1score_normal, precision_normal, recall_normal= compute_classification_sedeval(reference_path,result_path,collar)
        detection_file_overall = compute_detection_sedeval(reference_path,result_path,collar)
        print("---------- "+classifier_state+ " - "+q+" ----------")
        print("DETECTION: "+ str(detection_file_overall*100) )
        print("ABNORMAL:")
        print("F1-score: "+ str(f1score_abnormal*100) )
        print("Precision: "+ str(precision_abnormal*100) )
        print("Recall: "+ str(recall_abnormal*100) )
        print("NORMAL:")
        print("F1-score: "+ str(f1score_normal*100) )
        print("Precision: "+ str(precision_normal*100) )
        print("Recall: "+ str(recall_normal*100) )
        print("OVERALL:")
        print("F1-score: "+ str(f1score_file*100)  )
        print("Precision: "+ str(precision_file*100) )
        print("Recall: "+ str(recall_file*100) )

        dflocal = pd.DataFrame({'quantization':q,
                                'state_classifier':classifier_state, 
                                "detection mean": np.mean(detection_file_overall)*100,
                                'Abnormal mean F1-score':np.mean(f1score_abnormal)*100,
                                'Normal mean F1-score':np.mean(f1score_normal)*100,
                                'Overall mean F1-score':np.mean(f1score_file)*100},index=[0])
            
        df_results = pd.concat([df_results, dflocal], ignore_index=True)

df_results.to_csv(dir_model_pred + 'results_mcu.csv',index=False)