In [2]:
import datetime
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import Recall, Precision, AUC
from sklearn.metrics import confusion_matrix, recall_score, precision_score,f1_score,roc_auc_score, precision_recall_curve
import seaborn as sns
from matplotlib import pyplot as plt
from hyperopt import hp, fmin, tpe, Trials
from sklearn.model_selection import TimeSeriesSplit
from sklearn.utils.class_weight import compute_class_weight

In [3]:
# from importlib import reload
# import EuroTruck.ProcessEuroTruckData.process_ets_data
# reload(EuroTruck.ProcessEuroTruckData.process_ets_data)
from EuroTruck.ProcessEuroTruckData.process_ets_data import data_rq_cc
from EuroTruck.ProcessEuroTruckData.process_ets_data import data_michele_cc
from EuroTruck.ProcessEuroTruckData.process_ets_data import data_sara_cc

In [4]:
np.random.seed(78)

In [15]:
def seperate_ground_truth(data, size, step):
    awake_window = []
    light_drowsy_window = []
    drowsy_window = []
    general_window = []
    time = next(iter(data.table))
    while time + datetime.timedelta(seconds=size) <= next(reversed(data.table)):
        awake_count = 0
        light_drowsy_count = 0
        drowsy_count = 0
        for i in range(size):
            check_time = time + datetime.timedelta(seconds=i)
            if check_time not in data.table:
                break
            if data.table[check_time]["groud_truth"] == [1]:
                awake_count += 1
            if data.table[check_time]["groud_truth"] == [2] or data.table[check_time]["groud_truth"] == [3]:
                light_drowsy_count += 1
            if data.table[check_time]["groud_truth"] == [4]:
                drowsy_count += 1

        if awake_count == size:
            general_window.append([time + datetime.timedelta(seconds=i) for i in range(size)])
            awake_window.append([time + datetime.timedelta(seconds=i) for i in range(size)])
        if light_drowsy_count == size:
            general_window.append([time + datetime.timedelta(seconds=i) for i in range(size)])
            light_drowsy_window.append([time + datetime.timedelta(seconds=i) for i in range(size)])
        if drowsy_count == size:
            general_window.append([time + datetime.timedelta(seconds=i) for i in range(size)])
            drowsy_window.append([time + datetime.timedelta(seconds=i) for i in range(size)])

        step_seconds = datetime.timedelta(seconds=step)
        time += step_seconds
    
    # 因为这里awake——window只会用来决定某个窗口的ground truth，所以转换成set可以方便查找    
    awake_window=set([tuple(window) for window in awake_window])
    light_drowsy_window=set([tuple(window) for window in light_drowsy_window])
    drowsy_window=set([tuple(window) for window in drowsy_window])
    
    # drowsy window is a list of list of datetime objects which is the key
    return awake_window, light_drowsy_window, drowsy_window, general_window


def define_feature_matrix(data, awake_window, light_drowsy_window, drowsy_window, general_window,sampling_rate,size):
    if general_window == []:
        return np.empty((0, sampling_rate*size, 4))
    feature_matrix = []
    label = []
    for window in general_window:
        feature_matrix_per_window = []

        SWA_colunm = []
        for time in window:
            SWA_colunm.extend(data.table[time]["SWA_data"])
        feature_matrix_per_window.append(SWA_colunm)

        SWV_column = []
        for time in window:
            SWV_column.extend(data.table[time]["SWV_data"])
        feature_matrix_per_window.append(SWV_column)

        LD_column = []
        for time in window:
            LD_column.extend(data.table[time]["lateral_displacement_data"])
        feature_matrix_per_window.append(LD_column)

        LA_column = []
        for time in window:
            LA_column.extend(data.table[time]["lateral_acceleration_data"])
        feature_matrix_per_window.append(LA_column)

        feature_matrix_per_window = np.transpose(np.array(feature_matrix_per_window))
        feature_matrix.append(feature_matrix_per_window)
        if tuple(window) in awake_window:
            label.append(0)
        if tuple(window) in light_drowsy_window:
            label.append(1)
        if tuple(window) in drowsy_window:
            label.append(2)
        
    feature_matrix = np.array(feature_matrix)
    label = np.array(label)
    return feature_matrix, label

size= 10
step = 1
sample_rate = data_rq_cc.get_min_sampling_rate()

X_rq,y_rq = define_feature_matrix(data_rq_cc, *seperate_ground_truth(data_rq_cc, size, step), sample_rate, size)
_, _, _, general_window_michele = seperate_ground_truth(data_michele_cc, size, step)
X_michele,y_michele = define_feature_matrix(data_michele_cc, *seperate_ground_truth(data_michele_cc, size, step), sample_rate, size)
X_sara,y_sara = define_feature_matrix(data_sara_cc, *seperate_ground_truth(data_sara_cc, size, step), sample_rate, size)
X_train = np.concatenate((X_rq,X_sara),axis=0)
y_train = np.concatenate((y_rq,y_sara),axis=0)
X_rest = X_michele
y_rest = y_michele
X_test=X_rest[:int(X_rest.shape[0]/2)]
y_test=y_rest[:int(y_rest.shape[0]/2)]
X_val=X_rest[int(X_rest.shape[0]/2):]
y_val=y_rest[int(y_rest.shape[0]/2):]


classes = np.unique(y_train)
class_weights = compute_class_weight(class_weight='balanced', classes=classes, y=y_train)
class_weight_dict = dict(zip(classes, class_weights))

space = {
    'conv_filters_1': hp.choice('conv_filters_1', [32, 64, 128]),
    'conv_filters_2': hp.choice('conv_filters_2', [32, 64, 128]),
    'conv_filters_3': hp.choice('conv_filters_3', [32, 64, 128]),
    'conv_filters_4': hp.choice('conv_filters_4', [32, 64, 128]),
    'mini_batch_size': hp.choice('mini_batch_size', [32, 64, 128]),
    'learning_rate': hp.choice('learning_rate', [0.001, 0.005, 0.01]),
    'pooling_size_hyperopt': hp.choice('pooling_size_hyperopt', [2,3]),
    'neurons_lstm': hp.choice('neurons_lstm', [32, 64, 128]),
    'neurons_gru': hp.choice('neurons_gru', [32, 64, 128])
}

space_params = {
    'conv_filters_1': [32, 64, 128],
    'conv_filters_2': [32, 64, 128],
    'conv_filters_3': [32, 64, 128],
    'conv_filters_4': [32, 64, 128],
    'learning_rate': [0.001, 0.005, 0.01],
    'mini_batch_size': [32, 64, 128],
    'pooling_size_hyperopt': [2,3],
    'neurons_lstm': [32, 64, 128],
    'neurons_gru': [32, 64, 128]
}


def confusion_matrix_on_test_data(model,model_name, X_test, y_test):
    # labels=[0,1,2]
    labels=[0,1]
    y_pred=[]
    for row in model.predict(X_test):
        y_pred.append(np.argmax(row))
    y_pred=np.array(y_pred) 
    cm = confusion_matrix(y_test, y_pred,labels=labels)
    # sns.heatmap(cm, annot=True, cmap='Blues', fmt='g', xticklabels=['Class 1', 'Class 2', 'Class 3'], yticklabels=['Class 1', 'Class 2', 'Class 3'])
    sns.heatmap(cm, annot=True, cmap='Blues', fmt='g', xticklabels=['Class 1', 'Class 2'], yticklabels=['Class 1', 'Class 2'])
    plt.xlabel('Predicted Label')
    plt.ylabel('True Label')
    plt.title('Confusion Matrix of '+model_name)
    plt.show()

In [6]:

def make_model(space,best_params):
    Input_layer = tf.keras.layers.Input(shape=(X_train.shape[1], X_train.shape[2]))
    model= tf.keras.models.Sequential([
        Input_layer,
        # filters, kernel_size, activation, input_shape
        tf.keras.layers.Conv1D(space['conv_filters_1'][best_params['conv_filters_1']], 3, activation=None,kernel_initializer='he_normal'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Activation('relu'),
        # window size
        tf.keras.layers.MaxPooling1D(space['pooling_size_hyperopt'][best_params['pooling_size_hyperopt']]),
        
        tf.keras.layers.Conv1D(space['conv_filters_2'][best_params['conv_filters_2']], 3, activation=None,kernel_initializer='he_normal'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Activation('relu'),
        tf.keras.layers.MaxPooling1D(space['pooling_size_hyperopt'][best_params['pooling_size_hyperopt']]),
        
        tf.keras.layers.Conv1D(space['conv_filters_3'][best_params['conv_filters_3']], 3, activation=None,kernel_initializer='he_normal'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Activation('relu'),
        tf.keras.layers.MaxPooling1D(space['pooling_size_hyperopt'][best_params['pooling_size_hyperopt']]),
        
        # tf.keras.layers.Conv1D(space['conv_filters_4'][best_params['conv_filters_4']], 3, activation=None),
        # tf.keras.layers.BatchNormalization(),
        # tf.keras.layers.Activation('relu'),
        # tf.keras.layers.MaxPooling1D(space['pooling_size_hyperopt'][best_params['pooling_size_hyperopt']]),
        
        # tf.keras.layers.GRU(space['neurons_gru'][best_params['neurons_gru']]),
        tf.keras.layers.LSTM(space['neurons_lstm'][best_params['neurons_lstm']]),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(2, activation='softmax',kernel_initializer='he_normal')
    ])
    
    model.compile(optimizer=Adam(learning_rate=space['learning_rate'][best_params['learning_rate']]), loss=tf.keras.losses.CategoricalFocalCrossentropy(alpha=list(class_weight_dict), gamma=2), metrics=['categorical_accuracy'])
    return model

In [124]:

def objective(params):
    Input_layer = tf.keras.layers.Input(shape=(X_train.shape[1], X_train.shape[2]))
    model = tf.keras.models.Sequential([
        Input_layer,
        tf.keras.layers.Conv1D(params['conv_filters_1'], 3, activation=None,kernel_initializer='he_normal'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Activation('relu'),
        tf.keras.layers.MaxPooling1D(params['pooling_size_hyperopt']),
        
        tf.keras.layers.Conv1D(params['conv_filters_2'], 3, activation=None,kernel_initializer='he_normal'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Activation('relu'),
        tf.keras.layers.MaxPooling1D(params['pooling_size_hyperopt']),
        
        tf.keras.layers.Conv1D(params['conv_filters_3'], 3, activation=None,kernel_initializer='he_normal'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Activation('relu'),
        tf.keras.layers.MaxPooling1D(params['pooling_size_hyperopt']),
        
        # tf.keras.layers.Conv1D(params['conv_filters_4'], 3, activation=None),
        # tf.keras.layers.BatchNormalization(),
        # tf.keras.layers.Activation('relu'),
        # tf.keras.layers.MaxPooling1D(params['pooling_size_hyperopt']),
        
        tf.keras.layers.GRU(params['neurons_gru']),
        # tf.keras.layers.Dropout(0.25),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(2, activation='softmax',kernel_initializer='he_normal')
    ])
    
    model.compile(optimizer=Adam(learning_rate=params['learning_rate']), loss=tf.keras.losses.CategoricalFocalCrossentropy(alpha=0.25, gamma=2), metrics=['categorical_accuracy'])
    
    # auc_roc = []
    # valid_folds = 0
    # tscv = TimeSeriesSplit(n_splits=10)
    # for train_index, test_index in tscv.split(X_rest):
    #     X_train_train, X_train_test = X_rest[train_index], X_rest[test_index]
    #     y_train_train, y_train_test = y_rest[train_index], y_rest[test_index]
    #     
    #     # 检查y_train_test中是否包含两个类别
    #     if len(np.unique(y_train_test)) < 2:
    #         continue
    #     
    #     y_train_train_one_hot = tf.keras.utils.to_categorical(y_train_train, num_classes=2)
    #     y_train_test_one_hot = tf.keras.utils.to_categorical(y_train_test, num_classes=2)
    #     
    #     model.fit(X_train_train, y_train_train_one_hot, epochs=100, batch_size=params['mini_batch_size'], validation_data=(X_train_test, y_train_test_one_hot), verbose=0, callbacks=[tf.keras.callbacks.EarlyStopping(patience=20, restore_best_weights=True)])
    #     
    #     y_pred_prob = model.predict(X_train_test, verbose=0)
    #     
    #     auc_roc.append(roc_auc_score(y_train_test, y_pred_prob[:, 1]))
    #     valid_folds += 1
    # 
    # # 如果没有有效的折叠，则返回一个较差的分数
    # if valid_folds == 0:
    #     return {'loss': 1, 'status': 'fail'}
    
    model.fit(X_train, tf.keras.utils.to_categorical(y_train, num_classes=2), epochs=100, batch_size=params['mini_batch_size'], validation_data=(X_val, tf.keras.utils.to_categorical(y_val, num_classes=2)), verbose=0, callbacks=[tf.keras.callbacks.EarlyStopping(patience=20, restore_best_weights=True)])
    auc_roc = roc_auc_score(y_val, model.predict(X_val,verbose=0)[:, 1])
    # f1=f1_score(y_val, np.argmax(model.predict(X_val,verbose=0),axis=1))
    
    return {'loss': -auc_roc, 'status': 'ok'}

# 定义Trials对象
trials = Trials()

# 运行超参数优化
best_params = fmin(fn=objective, space=space, algo=tpe.suggest, max_evals=10, trials=trials)

print("Best hyperparameters:", best_params)
# good results I got here are 0.75
#best loss: -0.7492643194955335
#Best hyperparameters: {'conv_filters_1': 2, 'conv_filters_2': 0, 'conv_filters_3': 0, 'conv_filters_4': 0, 'learning_rate': 0, 'mini_batch_size': 2, 'neurons_gru': 1, 'neurons_lstm': 2, 'pooling_size_hyperopt': 1}

100%|██████████| 10/10 [09:07<00:00, 54.76s/trial, best loss: -0.7492643194955335]
Best hyperparameters: {'conv_filters_1': 2, 'conv_filters_2': 0, 'conv_filters_3': 0, 'conv_filters_4': 0, 'learning_rate': 0, 'mini_batch_size': 2, 'neurons_gru': 1, 'neurons_lstm': 2, 'pooling_size_hyperopt': 1}


In [10]:
best_params= {'conv_filters_1': 2, 'conv_filters_2': 0, 'conv_filters_3': 0, 'conv_filters_4': 0, 'learning_rate': 0, 'mini_batch_size': 2, 'neurons_gru': 1, 'neurons_lstm': 2, 'pooling_size_hyperopt': 1}
best_model = make_model(space_params, best_params)
best_model.fit(X_train, tf.keras.utils.to_categorical(y_train, num_classes=2), epochs=200, batch_size=space_params['mini_batch_size'][best_params['mini_batch_size']], validation_data=(X_val, tf.keras.utils.to_categorical(y_val, num_classes=2)), verbose=0, callbacks=[tf.keras.callbacks.EarlyStopping(patience=20, restore_best_weights=True)])
auc_roc = roc_auc_score(y_val, best_model.predict(X_val,verbose=0)[:, 1])

# confusion_matrix_on_test_data(best_model, 'best_weighted_model', X_test, y_test)

In [186]:
#best till now could be 0.75
def objective_1(params):
    Input_layer = tf.keras.layers.Input(shape=(X_train.shape[1], X_train.shape[2]))
    model = tf.keras.models.Sequential([
        Input_layer,
        tf.keras.layers.Conv1D(params['conv_filters_1'], 3, activation=None),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Activation('relu'),
        tf.keras.layers.MaxPooling1D(params['pooling_size_hyperopt']),
        
        tf.keras.layers.Conv1D(params['conv_filters_2'], 3, activation=None),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Activation('relu'),
        tf.keras.layers.MaxPooling1D(params['pooling_size_hyperopt']),
        
        tf.keras.layers.Conv1D(params['conv_filters_3'], 3, activation=None),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Activation('relu'),
        tf.keras.layers.MaxPooling1D(params['pooling_size_hyperopt']),
        
        # tf.keras.layers.Conv1D(params['conv_filters_4'], 3, activation=None),
        # tf.keras.layers.BatchNormalization(),
        # tf.keras.layers.Activation('relu'),
        # tf.keras.layers.MaxPooling1D(params['pooling_size_hyperopt']),
        
        tf.keras.layers.GRU(params['neurons_gru']),
        # tf.keras.layers.Dropout(0.25),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(2, activation='softmax')
    ])
    
    model.compile(optimizer=Adam(learning_rate=params['learning_rate']), loss=tf.keras.losses.CategoricalFocalCrossentropy(alpha=list(class_weight_dict), gamma=2), metrics=['categorical_accuracy'])
    
    model.fit(X_train, tf.keras.utils.to_categorical(y_train, num_classes=2), epochs=100, batch_size=params['mini_batch_size'], validation_data=(X_val, tf.keras.utils.to_categorical(y_val, num_classes=2)), verbose=0, callbacks=[tf.keras.callbacks.EarlyStopping(patience=20, restore_best_weights=True)])
    auc_roc = roc_auc_score(y_val, model.predict(X_val,verbose=0)[:, 1])
    # f1=f1_score(y_val, np.argmax(model.predict(X_val,verbose=0),axis=1))
    
    return {'loss': -auc_roc, 'status': 'ok'}

# 定义Trials对象
trials = Trials()

# 运行超参数优化
best_params_1 = fmin(fn=objective_1, space=space, algo=tpe.suggest, max_evals=20, trials=trials)

print("Best hyperparameters:", best_params_1)

best_model_1 = make_model(space_params, best_params_1)
best_model_1.fit(X_train, tf.keras.utils.to_categorical(y_train, num_classes=2), epochs=100, batch_size=space_params['mini_batch_size'][best_params['mini_batch_size']], validation_data=(X_val, tf.keras.utils.to_categorical(y_val, num_classes=2)), verbose=0, callbacks=[tf.keras.callbacks.EarlyStopping(patience=20, restore_best_weights=True)])

100%|██████████| 20/20 [25:17<00:00, 75.88s/trial, best loss: -0.6718274038737858] 
Best hyperparameters: {'conv_filters_1': 2, 'conv_filters_2': 2, 'conv_filters_3': 2, 'conv_filters_4': 0, 'learning_rate': 0, 'mini_batch_size': 2, 'neurons_gru': 1, 'neurons_lstm': 2, 'pooling_size_hyperopt': 1}


<keras.src.callbacks.history.History at 0x78c79b790>

In [None]:
def objective_2(params):
    Input_layer = tf.keras.layers.Input(shape=(X_train.shape[1], X_train.shape[2]))
    model = tf.keras.models.Sequential([
        Input_layer,
        tf.keras.layers.Conv1D(params['conv_filters_1'], 3, activation=None),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Activation('relu'),
        tf.keras.layers.MaxPooling1D(params['pooling_size_hyperopt']),
        
        tf.keras.layers.Conv1D(params['conv_filters_2'], 3, activation=None),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Activation('relu'),
        tf.keras.layers.MaxPooling1D(params['pooling_size_hyperopt']),
        
        tf.keras.layers.Conv1D(params['conv_filters_3'], 3, activation=None),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Activation('relu'),
        tf.keras.layers.MaxPooling1D(params['pooling_size_hyperopt']),
        
        # tf.keras.layers.Conv1D(params['conv_filters_4'], 3, activation=None),
        # tf.keras.layers.BatchNormalization(),
        # tf.keras.layers.Activation('relu'),
        # tf.keras.layers.MaxPooling1D(params['pooling_size_hyperopt']),
        
        tf.keras.layers.LSTM(params['neurons_lstm']),
        tf.keras.layers.Flatten(),
        
        tf.keras.layers.Dense(2, activation='softmax')
    ])
    
    model.compile(optimizer=Adam(learning_rate=params['learning_rate']), loss=tf.keras.losses.CategoricalFocalCrossentropy(alpha=list(class_weight_dict), gamma=2), metrics=['categorical_accuracy'])
    
    model.fit(X_train, tf.keras.utils.to_categorical(y_train, num_classes=2), epochs=150, batch_size=params['mini_batch_size'], validation_data=(X_val, tf.keras.utils.to_categorical(y_val, num_classes=2)), verbose=0, callbacks=[tf.keras.callbacks.EarlyStopping(patience=20, restore_best_weights=True)])
    auc_roc = roc_auc_score(y_val, model.predict(X_val,verbose=0)[:, 1])
    # f1=f1_score(y_val, np.argmax(model.predict(X_val,verbose=0),axis=1))
    
    return {'loss': -auc_roc, 'status': 'ok'}

# 定义Trials对象
trials = Trials()

# 运行超参数优化
best_params_2 = fmin(fn=objective_2, space=space, algo=tpe.suggest, max_evals=50, trials=trials)

In [8]:

print("Best hyperparameters:", best_params_2)

Best hyperparameters: {'conv_filters_1': 0, 'conv_filters_2': 1, 'conv_filters_3': 0, 'conv_filters_4': 0, 'learning_rate': 0, 'mini_batch_size': 0, 'neurons_gru': 2, 'neurons_lstm': 2, 'pooling_size_hyperopt': 1}


In [10]:

best_model_2 = make_model(space_params, best_params_2)
best_model_2.fit(X_train, tf.keras.utils.to_categorical(y_train, num_classes=2), epochs=150, batch_size=space_params['mini_batch_size'][best_params_2['mini_batch_size']], validation_data=(X_val, tf.keras.utils.to_categorical(y_val, num_classes=2)), verbose=0, callbacks=[tf.keras.callbacks.EarlyStopping(patience=20, restore_best_weights=True)])

<keras.src.callbacks.history.History at 0x3a5354d10>

In [11]:

y_pred_proba = best_model.predict(X_val, verbose=0)[:, 1]  # 获取正例的概率

# 计算不同阈值下的 precision 和 recall
precision, recall, thresholds = precision_recall_curve(y_val, y_pred_proba)

precision = precision[:-3]  # 去掉最后一个元素
recall = recall[:-3]  # 去掉最后一个元素

f1_scores = 2 * precision * recall / (precision + recall)


# 找到 F1-score 最大的阈值
best_threshold = thresholds[np.argmax(f1_scores)]

print(f'Best threshold: {best_threshold}')

# 根据最佳阈值进行分类
y_pred = (y_pred_proba >= best_threshold).astype(int)

# 计算混淆矩阵和其他指标
cm = confusion_matrix(y_val, y_pred)
precision_best = precision_score(y_val, y_pred)
recall_best = recall_score(y_val, y_pred)
f1_best = f1_score(y_val, y_pred)
roc_auc = roc_auc_score(y_val, y_pred_proba)

print('Confusion Matrix:\n', cm)
print('Precision: ', precision_best)
print('Recall: ', recall_best)
print('F1 Score: ', f1_best)
print('AUC-ROC: ', roc_auc)


Best threshold: 0.9996359348297119
Confusion Matrix:
 [[627  65]
 [ 76  34]]
Precision:  0.3434343434343434
Recall:  0.3090909090909091
F1 Score:  0.3253588516746411
AUC-ROC:  0.6660667367314766


In [12]:
y_pred_proba_test = best_model.predict(X_test, verbose=0)[:, 1]  # 获取正例的概率

# 计算不同阈值下的 precision 和 recall
precision, recall, thresholds = precision_recall_curve(y_test, y_pred_proba_test)

precision = precision[:-3]  # 去掉最后一个元素
recall = recall[:-3]  # 去掉最后一个元素

f1_scores = 2 * precision * recall / (precision + recall)


# 找到 F1-score 最大的阈值
best_threshold = thresholds[np.argmax(f1_scores)]

print(f'Best threshold: {best_threshold}')

# 根据最佳阈值进行分类
y_pred_test = (y_pred_proba_test >= best_threshold).astype(int)

# 计算混淆矩阵和其他指标

print('Confusion Matrix:\n', confusion_matrix(y_test, y_pred_test))
print('Precision: ', precision_score(y_test, y_pred_test))
print('Recall: ', recall_score(y_test, y_pred_test))
print('F1 Score: ', f1_score(y_test, y_pred_test))
print('AUC-ROC: ', roc_auc_score(y_test, y_pred_proba_test))

Best threshold: 0.9991941452026367
Confusion Matrix:
 [[ 41 507]
 [  2 252]]
Precision:  0.33201581027667987
Recall:  0.9921259842519685
F1 Score:  0.49753208292201384
AUC-ROC:  0.508621185125582


In [37]:
y_pred_proba_val = best_model.predict(X_val, verbose=0)[:, 1]
y_pred_proba_test = best_model.predict(X_test, verbose=0)[:, 1]
threshold=0.9991941452026367
y_pred_val = (y_pred_proba_val >= threshold).astype(int)
y_pred_test = (y_pred_proba_test >= threshold).astype(int)
y_pred=np.concatenate((y_pred_val,y_pred_test),axis=0)

timestamps=[window[0] for window in general_window_michele]

import plotly.express as px
import pandas as pd

df = pd.DataFrame({
    'Timestamp': timestamps,
    'Predicted': y_pred,
    'Ground Truth': y_rest
})
df.set_index('Timestamp', inplace=True)
full_timestamps= pd.date_range(start=timestamps[0], end=timestamps[-1], freq='s')
df_full = df.reindex(full_timestamps)

fig = px.line(df_full, x=df_full.index, y=['Predicted', 'Ground Truth'])

fig.show()
