In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

In [2]:
import pandas as pd

df = pd.read_parquet('../data/dataset.parquet', engine='pyarrow')

In [3]:
df.drop(columns=['session_counter', 'time_to_failure'], inplace=True)
df.set_index('Timestamp', inplace=True)

In [4]:
import numpy as np

label = np.array(['alert_11'])
features = np.array(df.columns.difference(label))

In [5]:
X = df[features].to_numpy()
y = df[label].to_numpy().flatten()

In [6]:
import numpy as np

x_wins_shape = None

def window(X_data, y_data, width: int, shift: int):
    
    X_wins, y_wins = [], []

    for index, (X, y) in enumerate(zip(X_data, y_data)):
        if (index + width + shift) <= X_data.shape[0]:

            window = slice((index + width), (index + width + shift))

            X_wins.append(X_data[index: index + width])

            y_values_shift = y_data[window]
            y_wins.append(int(np.any(y_values_shift == 1)))

    X_wins = np.array(X_wins)
    x_wins_shape = X_wins.shape
    y_wins = np.array(y_wins)
    return X_wins.reshape(X_wins.shape[0], -1), y_wins.flatten()

In [7]:
X_wins, y_wins = window(X, y, width=120, shift=180)

In [8]:
from imblearn.under_sampling import RandomUnderSampler

rus = RandomUnderSampler(random_state=0)

X_res, y_res = rus.fit_resample(X_wins, y_wins)
X_res = X_res.reshape(X_res.shape[0], 1, X_res.shape[1])
print(X_res.shape)

(6648, 1, 1560)


## K-Fold Cross Validation

In [9]:
from sklearn.model_selection import StratifiedKFold

kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
fold_metrics = []

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Bidirectional, BatchNormalization, Dense, Dropout
from tensorflow.keras.regularizers import L2
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.metrics import Precision, Recall

for fold, (train_idx, val_idx) in enumerate(kf.split(X_res, y_res)):
    print(f"Training fold {fold+1}...")

    X_train, X_val = X_res[train_idx], X_res[val_idx]
    y_train, y_val = y_res[train_idx], y_res[val_idx]

    model = Sequential()
    model.add(Bidirectional(LSTM(128, return_sequences=True, kernel_regularizer=L2(0.001)), input_shape=(X_train.shape[1], X_train.shape[2])))
    model.add(BatchNormalization())
    model.add(Bidirectional(LSTM(64, return_sequences=True, kernel_regularizer=L2(0.001))))
    model.add(BatchNormalization())
    model.add(Bidirectional(LSTM(128, return_sequences=False, kernel_regularizer=L2(0.001))))
    model.add(BatchNormalization())
    model.add(Dense(units=64, activation="relu", kernel_regularizer=L2(0.001)))
    model.add(Dropout(0.3))
    model.add(Dense(units=32, activation="relu", kernel_regularizer=L2(0.001)))
    model.add(Dense(units=1, activation="sigmoid"))

    model.compile(
        loss='binary_crossentropy',
        optimizer='adam',
        metrics=['accuracy', Precision(), Recall()]
    )
    
    early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    history = model.fit(
        X_train, y_train,
        epochs=10,
        batch_size=32,
        validation_data=(X_val, y_val),
        callbacks=[early_stop],
        verbose=1
    )
    
    score = model.evaluate(X_val, y_val, batch_size=32, verbose=0)
    fold_metrics.append({
        'fold': fold + 1,
        'loss': score[0],
        'accuracy': score[1],
        'precision': score[2],
        'recall': score[3]
    })

Training fold 1...
Epoch 1/10

In [None]:
metrics_df = pd.DataFrame(fold_metrics)

print("\nCross-Validation Results:")
print(metrics_df)
print("\nAverage metrics across all folds:")
print(metrics_df.mean())