# Model training on  Hydraulic Systems Dataset

## Load the dataset

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import RidgeClassifierCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import f1_score

from sktime.transformations.panel.rocket import MiniRocket
from sktime.transformations.panel.rocket import MiniRocketMultivariate
from sktime.datatypes import convert_to
from sktime.datasets import load_from_tsfile

from timeit import default_timer as timer
from datetime import timedelta
import warnings

warnings.simplefilter('ignore')
%matplotlib inline
SEED=42

### Load the features

In [15]:
data_path = "../data/Predictive-Maintenance-Of-Hydraulics-System/"

features_path = "hydraulic-systems-monitoring/hydraulic-systems-monitoring.ts"

X = load_from_tsfile(full_file_path_and_name=data_path+features_path, return_y=False)
X.head(3)

Unnamed: 0,dim_0,dim_1,dim_2,dim_3,dim_4,dim_5,dim_6,dim_7,dim_8,dim_9,dim_10,dim_11,dim_12,dim_13,dim_14,dim_15,dim_16
0,0 30.363 1 30.375 2 30.367 3 3...,0 35.570 1 35.492 2 35.469 3 3...,0 40.961 1 40.949 2 40.965 3 4...,0 9.84138 1 9.83686 2 9.83819 3 ...,0 176.8996 1 184.6420 2 189.8996 3...,0 9.96442 1 9.95969 2 9.96422 3 ...,0 68.039 1 0.000 2 0.000 3 ...,0 0.18954 1 0.00000 2 0.00000 3 ...,0 38.320 1 38.332 2 38.320 3 3...,0 0.604 1 0.605 2 0.611 3 0.60...,0 2.184 1 2.184 2 2.184 3 2.18...,0 47.202 1 47.273 2 47.250 3 4...,0 0.0 1 0.0 2 0.0 3 0.0 4 ...,0 10.1691 1 10.1704 2 10.1641 3 ...,0 1.0413 1 0.0007 2 0.0023 3 0...,0 11.86685 1 0.18786 2 0.0213...,0 2613.468 1 2917.282 2 2861.840 3...
1,0 33.648 1 33.723 2 33.723 3 3...,0 36.156 1 36.094 2 35.992 3 3...,0 41.258 1 41.277 2 41.262 3 4...,0 9.60095 1 9.59831 2 9.58497 3 ...,0 174.2707 1 191.3529 2 191.3024 3...,0 9.71038 1 9.70451 2 9.69738 3 ...,0 68.264 1 0.000 2 0.000 3 ...,0 0.18663 1 0.00000 2 0.00000 3 ...,0 38.680 1 38.672 2 38.668 3 3...,0 0.590 1 0.610 2 0.626 3 0.62...,0 1.414 1 1.384 2 1.385 3 1.38...,0 29.208 1 28.822 2 28.805 3 2...,0 0.0 1 0.0 2 0.0 3 0.0 4 ...,0 10.4113 1 10.4047 2 10.3989 3 ...,0 1.0458 1 0.0024 2 0.0006 3 0...,0 12.28756 1 0.22033 2 0.0241...,0 2582.128 1 2939.828 2 2944.482 3...
2,0 35.113 1 35.121 2 35.098 3 3...,0 37.488 1 37.391 2 37.340 3 3...,0 42.129 1 42.105 2 42.117 3 4...,0 9.49861 1 9.48868 2 9.47973 3 ...,0 174.0188 1 191.2384 2 191.2183 3...,0 9.60278 1 9.59102 2 9.58121 3 ...,0 68.595 1 0.000 2 0.000 3 ...,0 0.17820 1 0.00000 2 0.00000 3 ...,0 39.234 1 39.246 2 39.246 3 3...,0 0.578 1 0.603 2 0.638 3 0.65...,0 1.159 1 1.157 2 1.157 3 1.13...,0 23.554 1 23.521 2 23.527 3 2...,0 0.0 1 0.0 2 0.0 3 0.0 4 ...,0 10.4085 1 10.4089 2 10.3852 3 ...,0 1.0418 1 0.0019 2 0.0006 3 0...,0 11.82779 1 0.14628 2 0.0004...,0 2569.696 1 2928.872 2 2934.702 3...


### Load the target variables

```
1: Cooler condition / %:
        3: close to total failure
        20: reduced effifiency
        100: full efficiency
2: Valve condition / %:
        100: optimal switching behavior
        90: small lag
        80: severe lag
        73: close to total failure
3: Internal pump leakage:
        0: no leakage
        1: weak leakage
        2: severe leakage
4: Hydraulic accumulator / bar:
        130: optimal pressure
        115: slightly reduced pressure
        100: severely reduced pressure
        90: close to total failure
5: stable flag:
        0: conditions were stable
        1: static conditions might not have been reached yet
```

In [16]:
targets_path = "profile_targets.csv"
targets = pd.read_csv(data_path+targets_path)
targets.head()

Unnamed: 0,target_cooler_condition,target_valve_condition,target_internal_pump_leakage,target_hydraulic_accumulator_pressure,target_stable_flag
0,3,100,0,130,1
1,3,100,0,130,1
2,3,100,0,130,1
3,3,100,0,130,1
4,3,100,0,130,1


In [17]:
y = targets["target_hydraulic_accumulator_pressure"]
y

0       130
1       130
2       130
3       130
4       130
       ... 
2200     90
2201     90
2202     90
2203     90
2204     90
Name: target_hydraulic_accumulator_pressure, Length: 2205, dtype: int64

### Data splitting

In [18]:
X_train, X_test, y_train, y_test = train_test_split(X, 
                                                    y, 
                                                    test_size=0.2, 
                                                    random_state=SEED, 
                                                    stratify=y, 
                                                    shuffle=True)

print("Data shape: (examples, channels, series_length)\n")
print("X_train shape:\t", X_train.shape + X_train.iloc[0,0].shape)
print("y_train shape:\t", y_train.shape)

print("X_test shape:\t", X_test.shape + X_test.iloc[0,0].shape)
print("y_test shape:\t", y_test.shape)

Data shape: (examples, channels, series_length)

X_train shape:	 (1764, 17, 60)
y_train shape:	 (1764,)
X_test shape:	 (441, 17, 60)
y_test shape:	 (441,)


## Helper methods

In [9]:
def transform_data(X_train, X_test):
    print("Starting data transformation ...")
    
    start = timer()
    minirocket_multi = MiniRocketMultivariate(n_jobs=4, random_state=SEED)
    minirocket_multi.fit(X_train)
    print("Elapsed time for MiniRocket fitting:", (timedelta(seconds=timer()-start)).seconds, "seconds")
    
    start = timer()
    X_train_transform = minirocket_multi.transform(X_train)
    print("Elapsed time for the transformation of X_train:", (timedelta(seconds=timer()-start)).seconds, "seconds")
    
    start = timer()
    X_test_transform = minirocket_multi.transform(X_test)
    print("Elapsed time for the transformation of X_test:", (timedelta(seconds=timer()-start)).seconds, "seconds \n")
    
    return X_train_transform, X_test_transform


def get_model_name(model):
    return type(model).__name__


def train_model(model, X_train, y_train):
    model_name = get_model_name(model)
    
    print(f"Starting {model_name} model training ...")
    start = timer()
    model = model.fit(X_train, y_train)
    print(f"Elapsed time for the {model_name} model training:", 
          (timedelta(seconds=timer()-start)).seconds, 
          "seconds \n")
    return model

def evaluate_model(model, X_test, y_test):
    model_name = get_model_name(model)
    print(f"Starting {model_name} model evaluation ...")

    start = timer()
    y_pred = model.predict(X_test_transform)
    print(f"Elapsed time for the {model_name} model prediction:", 
      (timedelta(seconds=timer()-start)).seconds, 
      "seconds \n")
    
    print(f"Classification Report for the {model_name} model \n")
    print(classification_report(y_test, y_pred, digits=4))
    
    print("")
    # print("F1 binary:\t", f1_score(y_test, y_pred))
    print("F1 micro:\t", round(f1_score(y_test, y_pred, average="micro"), 4))
    print("F1 macro:\t", round(f1_score(y_test, y_pred, average="macro"), 4))
    print("F1 weighted:\t", round(f1_score(y_test, y_pred, average="weighted"), 4))
    print("")

## Data transformation using MiniROCKET

In [10]:
%%time 

X_train_transform, X_test_transform = transform_data(X_train, X_test)

Starting data transformation ...
Elapsed time for MiniRocket fitting: 17 seconds
Elapsed time for the transformation of X_train: 20 seconds
Elapsed time for the transformation of X_test: 4 seconds 

CPU times: user 49.9 s, sys: 70.4 ms, total: 50 s
Wall time: 43.1 s


In [11]:
X_train_transform.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,9986,9987,9988,9989,9990,9991,9992,9993,9994,9995
0,0.366667,0.783333,0.15,0.533333,0.9,0.283333,0.683333,0.05,0.416667,0.833333,...,0.7,0.0,0.25,1.0,0.216667,0.6,1.0,0.0,1.0,0.0
1,0.383333,0.783333,0.15,0.566667,0.9,0.3,0.683333,0.05,0.433333,0.833333,...,0.683333,0.0,0.25,1.0,0.233333,0.6,0.916667,0.0,1.0,0.0
2,0.383333,0.766667,0.15,0.533333,0.9,0.3,0.683333,0.05,0.433333,0.833333,...,0.7,0.0,0.0,1.0,0.233333,0.6,0.916667,0.0,1.0,0.0
3,0.383333,0.766667,0.15,0.5,0.9,0.3,0.666667,0.05,0.45,0.8,...,0.7,0.0,0.0,1.0,0.25,0.583333,0.916667,0.0,1.0,0.0
4,0.383333,0.783333,0.15,0.533333,0.9,0.283333,0.683333,0.066667,0.433333,0.833333,...,0.7,0.0,0.0,1.0,0.233333,0.6,1.0,0.0,1.0,0.0


## MiniROCKET features + RidgeClassifierCV

In [110]:
%%time 

model = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), normalize=True)
model = train_model(model, X_train_transform, y_train)
evaluate_model(model, X_test_transform, y_test)


Starting RidgeClassifierCV model training ...
Elapsed time for the RidgeClassifierCV model training: 4 seconds

Starting RidgeClassifierCV model evaluation ...
Elapsed time for the RidgeClassifierCV model prediction: 0 seconds

Classification Report for the RidgeClassifierCV model 

              precision    recall  f1-score   support

          90     0.9755    0.9876    0.9815       161
         100     0.9740    0.9375    0.9554        80
         115     0.9756    1.0000    0.9877        80
         130     1.0000    0.9917    0.9958       120

    accuracy                         0.9819       441
   macro avg     0.9813    0.9792    0.9801       441
weighted avg     0.9819    0.9819    0.9818       441


F1 micro:	 0.9819
F1 macro:	 0.9801
F1 weighted:	 0.9818

CPU times: user 18.5 s, sys: 12 s, total: 30.5 s
Wall time: 4.48 s


## MiniROCKET features + RandomForestClassifier

In [111]:
%%time 

model = RandomForestClassifier(n_estimators=1000, random_state=SEED, n_jobs=4)
model = train_model(model, X_train_transform, y_train)
evaluate_model(model, X_test_transform, y_test)


Starting RandomForestClassifier model training ...
Elapsed time for the RandomForestClassifier model training: 5 seconds

Starting RandomForestClassifier model evaluation ...
Elapsed time for the RandomForestClassifier model prediction: 0 seconds

Classification Report for the RandomForestClassifier model 

              precision    recall  f1-score   support

          90     0.9699    1.0000    0.9847       161
         100     1.0000    0.9125    0.9542        80
         115     0.9756    1.0000    0.9877        80
         130     1.0000    1.0000    1.0000       120

    accuracy                         0.9841       441
   macro avg     0.9864    0.9781    0.9817       441
weighted avg     0.9846    0.9841    0.9839       441


F1 micro:	 0.9841
F1 macro:	 0.9817
F1 weighted:	 0.9839

CPU times: user 18.6 s, sys: 1.35 s, total: 19.9 s
Wall time: 5.81 s
