In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

In [2]:
import pandas as pd

df = pd.read_parquet('../data/dataset.parquet', engine='pyarrow')

In [3]:
df.drop(columns=['session_counter', 'time_to_failure'], inplace=True)
df.set_index('Timestamp', inplace=True)

In [4]:
import numpy as np

label = np.array(['alert_11'])
features = np.array(df.columns.difference(label))

In [5]:
X = df[features].to_numpy()
y = df[label].to_numpy().flatten()

In [6]:
import numpy as np

x_wins_shape = None

def window(X_data, y_data, width: int, shift: int):
    
    X_wins, y_wins = [], []

    for index, (X, y) in enumerate(zip(X_data, y_data)):
        if (index + width + shift) <= X_data.shape[0]:

            window = slice((index + width), (index + width + shift))

            X_wins.append(X_data[index: index + width])

            y_values_shift = y_data[window]
            y_wins.append(int(np.any(y_values_shift == 1)))

    X_wins = np.array(X_wins)
    x_wins_shape = X_wins.shape
    y_wins = np.array(y_wins)
    return X_wins.reshape(X_wins.shape[0], -1), y_wins.flatten()

In [7]:
X_wins, y_wins = window(X, y, width=120, shift=180)

In [8]:
from imblearn.under_sampling import RandomUnderSampler

rus = RandomUnderSampler(random_state=0)

X_res, y_res = rus.fit_resample(X_wins, y_wins)
X_res = X_res.reshape(X_res.shape[0], 1, X_res.shape[1])
print(X_res.shape)

(6648, 1, 1560)


In [9]:
train_size = int(1 - 0.3 * y_res.shape[0])
X_train, X_test = np.array(X_res[0: train_size]), np.array(X_res[train_size:])
y_train, y_test = y_res[0: train_size], y_res[train_size:]

print(X_train.shape)

(4655, 1, 1560)


In [14]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

model = Sequential()
model.add(LSTM(units=128, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(LSTM(units=64, return_sequences=True))
model.add(LSTM(units=64, return_sequences=False))
model.add(Dense(units=1))

In [15]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [16]:
model.fit(X_train, y_train, epochs=1, batch_size=1, verbose=3)

<keras.callbacks.History at 0x75caaa3f3880>

In [17]:
score = model.evaluate(X_test, y_test, batch_size=32)
print("Test loss:", score[0])
print("Test accuracy:", score[1])

Test loss: 0.786829948425293
Test accuracy: 0.47165077924728394
