## Preprocessing

In [1]:
import pandas as pd

df = pd.read_parquet('../data/dataset.parquet', engine='pyarrow')

Remove **time_to_failure** and **session_counter** and set **Timestamp** as the index.

In [2]:
df.drop(columns=['session_counter', 'time_to_failure', 'Timestamp'], inplace=True)
# df.set_index('Timestamp', inplace=True)

In [3]:
df.info()
df.describe()

<class 'pandas.core.frame.DataFrame'>
Index: 679045 entries, 0 to 8796486
Data columns (total 14 columns):
 #   Column                           Non-Null Count   Dtype  
---  ------                           --------------   -----  
 0   Flag roping                      679045 non-null  float64
 1   Platform Position [°]            679045 non-null  float64
 2   Platform Motor frequency [HZ]    679045 non-null  float64
 3   Temperature platform drive [°C]  679045 non-null  float64
 4   Temperature slave drive [°C]     679045 non-null  float64
 5   Temperature hoist drive [°C]     679045 non-null  float64
 6   Tensione totale film [%]         679045 non-null  float64
 7   Current speed cart [%]           679045 non-null  float64
 8   Platform motor speed [%]         679045 non-null  float64
 9   Lifting motor speed [RPM]        679045 non-null  float64
 10  Platform rotation speed [RPM]    679045 non-null  float64
 11  Slave rotation speed [M/MIN]     679045 non-null  float64
 12  Liftin

Unnamed: 0,Flag roping,Platform Position [°],Platform Motor frequency [HZ],Temperature platform drive [°C],Temperature slave drive [°C],Temperature hoist drive [°C],Tensione totale film [%],Current speed cart [%],Platform motor speed [%],Lifting motor speed [RPM],Platform rotation speed [RPM],Slave rotation speed [M/MIN],Lifting speed rotation [M/MIN],alert_11
count,679045.0,679045.0,679045.0,679045.0,679045.0,679045.0,679045.0,679045.0,679045.0,679045.0,679045.0,679045.0,679045.0,679045.0
mean,30.999361,163.937436,694.531602,23.766837,23.444327,21.303289,165.357471,2.150718,13.383201,116.676833,10.785796,11.759514,0.933471,0.002051
std,0.140758,111.206953,1578.580228,11.435006,9.969298,9.974128,211.047433,12.512806,30.393794,685.563601,25.029436,32.598768,5.634515,0.045246
min,0.0,0.0,0.0,0.0,0.0,0.0,-100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,31.0,62.0,0.0,15.0,16.0,13.0,144.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,31.0,165.0,0.0,22.0,22.0,19.0,179.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,31.0,260.0,0.0,32.0,30.0,28.0,201.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,31.0,359.0,5200.0,61.0,55.0,55.0,9900.0,100.0,100.0,5500.0,88.0,184.0,73.0,1.0


## Modelling

In [4]:
df_1 = df[df['alert_11'] == 1].index
print(df_1)

Index([  45902,   45904,   45906,   45908,   45910,   45912,   45914,   45916,
         45918,   45920,
       ...
       7554522, 7554528, 7554535, 7554542, 7554547, 7554553, 7554561, 7554565,
       7554568, 7554571],
      dtype='int64', length=1393)


In [5]:
import numpy as np

def window(data: pd.DataFrame, width: int, shift: int, label='alert_11'):
    X, y = [], []
    num_rows, num_features = data.shape
    
    for start in range(0, num_rows - width + 1):
        if (start + width + shift) <= len(data):
            window = data.drop(columns=[label]).iloc[start:start + width].values
            X.append(window.flatten())
            
            # Label is 1 if any value in the label column within the window is 1
            label_value = 1 if data[label].iloc[start:start + width + shift].max() == 1 else 0
            y.append(label_value)
    
    return X, np.array(y)

In [6]:
df_debug = df[17253:17257]
df_debug
print(window(df_debug, 2, 2))


([array([ 31.,  25.,   0.,  40.,  39.,  28., 204.,   0.,   0.,   0.,   0.,
         0.,   0.,  31.,  25.,   0.,  40.,  39.,  28., 204.,   0.,   0.,
         0.,   0.,   0.,   0.])], array([1]))


In [None]:
X, y = window(df, 180, 120)

## Train and Test

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

## Random Forest Classifier

In [None]:
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier(random_state=42)

## K-Fold Cross Validation

In [None]:
from sklearn.model_selection import cross_val_score

scores = cross_val_score(model, X, y, cv=5)
scores
print(np.average(scores))