## Preprocessing

In [1]:
import pandas as pd
df = pd.read_parquet('../data/dataset.parquet', engine='pyarrow')

Remove **time_to_failure** and **session_counter** and set **Timestamp** as the index.

In [2]:
df.drop(columns=['session_counter', 'time_to_failure'], inplace=True)
df.set_index('Timestamp', inplace=True)

## Modelling

In [3]:
import numpy as np

label = np.array(['alert_11'])
features = np.array(df.columns.difference(label))

print(f"-> Label:\t{label.shape}")
print(f"\t{label}")
print(f"-> Features:\t{features.shape}")
print(f"\t{features}")

-> Label:	(1,)
	['alert_11']
-> Features:	(13,)
	['Current speed cart [%]' 'Flag roping' 'Lifting motor speed [RPM]'
 'Lifting speed rotation [M/MIN]' 'Platform Motor frequency [HZ]'
 'Platform Position [째]' 'Platform motor speed [%]'
 'Platform rotation speed [RPM]' 'Slave rotation speed [M/MIN]'
 'Temperature hoist drive [째C]' 'Temperature platform drive [째C]'
 'Temperature slave drive [째C]' 'Tensione totale film [%]']


In [4]:
X = df[features].to_numpy()
y = df[label].to_numpy().flatten()

print(f"-> X:\t{X.shape}")
print(f"-> y:\t{y.shape}")

-> X:	(679045, 13)
-> y:	(679045,)


## MTS Sliding Window

In [5]:
import numpy as np 

def window(X_data, y_data, width: int, shift: int):
    
    X_wins, y_wins = [], []

    for index, (X, y) in enumerate(zip(X_data, y_data)):
        if (index + width + shift) <= X_data.shape[0]:

            window = slice((index + width), (index + width + shift))

            X_wins.append(X_data[index: index + width])

            y_values_shift = y_data[window]
            y_wins.append(int(np.any(y_values_shift == 1)))

    X_wins = np.array(X_wins)
    y_wins = np.array(y_wins)
    return X_wins.reshape(X_wins.shape[0], -1), y_wins.flatten()

In [6]:
X_wins, y_wins = window(X, y, width=120, shift=180)

In [7]:
print(f"-> X:\t{X_wins.shape}")
print(f"-> y:\t{y_wins.shape}")

-> X:	(678746, 1560)
-> y:	(678746,)


## Undersampling

In [8]:
from imblearn.under_sampling import RandomUnderSampler

rus = RandomUnderSampler(random_state=0)

X_res, y_res = rus.fit_resample(X_wins, y_wins)

In [9]:
classes, distribution = np.unique(y_res, return_counts=True)

In [10]:
print(f"-> X_res:\t{X_res.shape}")
print(f"-> y_res:\t{y_res.shape}")

-> X_res:	(6648, 1560)
-> y_res:	(6648,)


In [11]:
print(f"-> {classes[0]}:\t{distribution[0]}")
print(f"-> {classes[1]}:\t{distribution[1]}")

-> 0:	3324
-> 1:	3324


## Models

Random Forest

In [12]:
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(n_estimators=100, random_state=1234)

SVM (Support Vector Machine).

In [13]:
from sklearn.svm import SVC

svc = SVC(random_state=1234, C=1)

LSTM (Long Short-Term Memory)

In [None]:
X = df[features].to_numpy()
y = df[label].to_numpy().flatten()

X_wins, y_wins = window(X, y, width=120, shift=180)
X_res, y_res = rus.fit_resample(X_wins, y_wins)

train_size = int(1 - 0.3 * y_res.shape[0])
X_train, X_test = np.array(X_res[0: train_size]), np.array(X_res[train_size:])
y_train, y_test = y_res[0: train_size], y_res[train_size:]

In [None]:
print(f"-> X_train, X_test:\t{X_train.shape}, {X_test.shape}")
print(f"-> y_train, y_test:\t{y_train.shape}, {y_test.shape}")

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense


model = Sequential()
model.add(LSTM(units=128, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))

model.add(LSTM(units=64, return_sequences=True))
model.add(LSTM(units=64, return_sequences=True))

model.add(Dense(units=1))

In [None]:
model.compile(loss='mean_squared_error', optimizer='adam')

In [None]:
model.fit(X_train, y_train, epochs=100, batch_size=1, verbose=2)

In [None]:
trainScore = model.evaluate(X_train, y_train, verbose=0)
print('Train Score: %.2f MSE (%.2f RMSE)' % (trainScore, math.sqrt(trainScore)))
testScore = model.evaluate(X_test, y_test, verbose=0)
print('Test Score: %.2f MSE (%.2f RMSE)' % (testScore, math.sqrt(testScore)))

## K-Fold Cross Validation

In [33]:
from sklearn.model_selection import cross_val_score, StratifiedKFold

skf = StratifiedKFold(n_splits=5)

scores = cross_val_score(rf, X_res, y_res, cv=skf)
print(f"RF:\t{scores.mean()}")

scores = cross_val_score(svc, X_res, y_res, cv=skf)
print(f"SVC:\t{scores.mean()}")

# scores = cross_val_score(lstm, X_res, y_res, cv=skf)
# print(f"LSTM:\t{scores.mean()}")

RF:	0.5670844153272572
SVC:	0.5914213298483229
