# Functions

In [None]:

import matplotlib.pyplot as plt
import random
import datetime, os, time
import io
import seaborn as sns
import pickle
import pandas as pd
import numpy as np

from sklearn.metrics import mean_squared_error, r2_score, accuracy_score, classification_report, confusion_matrix
import tensorflow as tf
from keras import backend as K
from keras.models import Model
from keras.initializers import glorot_normal,  he_normal
from tensorflow import keras
from tensorflow.keras.layers import SimpleRNN, Flatten, Input, Dense, LSTM, RepeatVector, Bidirectional, Masking, Dropout, Layer, BatchNormalization, Conv1D
from google.colab import drive

from tensorflow.python.framework.ops import disable_eager_execution
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn.preprocessing import StandardScaler
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier, GradientBoostingClassifier
from sklearn.mixture import GaussianMixture
from sklearn.model_selection import cross_val_score, GroupShuffleSplit, GridSearchCV, RandomizedSearchCV,RepeatedStratifiedKFold, train_test_split
from sklearn.svm import SVC

drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
from utils import plot_rul_one_engine
from utils import get_predictions_ensemble
from utils import  get_predictions_ensemble1
from utils import  plot_sorted
from utils import  rul_col_series
from utils import  add_operating_condition
from utils import  add_health_index
from utils import  condition_scaler
from utils import  exponential_smoothing
from utils import  gen_train_data
from utils import  gen_labels
from utils import  gen_test_data
from utils import  gen_data_wrapper
from utils import  gen_label_wrapper
from utils import  process_data_standard

In [None]:
from utils import process_data_standard2

# Get train and val data

In [None]:
# Name of columns.

columns = ["engine_no", "cycles", "opc1", "opc2", "opc3", "tfan_in_tot", "tLPC_out_tot", "tHPC_out_tot",
           "tLPT_out_tot", "pfan_in", "pbypass_tot", "pHPC_out_tot", "fan_speed_physical", "core_speed_physical",
           "P50_P2_ratio", "pHPC_out_stat", "fuel_flow_to_Ps30", "fan_speed_corrected", "core_speed_corrected",
           "bypass_ratio", "burnerfuel_air_ratio", "bleed_enthalpy", "fan_speed_demanded", "fan_speed_demanded_corrected" ,
           "HPT_coolant_bleed", "LPT_coolant_bleed"]

In [None]:
dataset = 'FD002'
# files
dir_path = '/content/drive/MyDrive/CMAPSSData/'



# data readout
sensor_names  = ["tfan_in_tot", "tLPC_out_tot", "tHPC_out_tot",
           "tLPT_out_tot", "pfan_in", "pbypass_tot", "pHPC_out_tot", "fan_speed_physical", "core_speed_physical",
           "P50_P2_ratio", "pHPC_out_stat", "fuel_flow_to_Ps30", "fan_speed_corrected", "core_speed_corrected",
           "bypass_ratio", "burnerfuel_air_ratio", "bleed_enthalpy", "fan_speed_demanded", "fan_speed_demanded_corrected" ,
           "HPT_coolant_bleed", "LPT_coolant_bleed"]



sensors = ['tLPC_out_tot', 'tHPC_out_tot', 'tLPT_out_tot', 'pbypass_tot',
       'pHPC_out_tot', 'fan_speed_physical', 'core_speed_physical',
       'pHPC_out_stat', 'fuel_flow_to_Ps30', 'fan_speed_corrected',
       'core_speed_corrected', 'bypass_ratio', 'bleed_enthalpy',
       'HPT_coolant_bleed', 'LPT_coolant_bleed']


# dataset operating condition 1
x_train_cl1, y_train_cl1, x_val_cl1, y_val_cl1, x_test_cl1, y_test_cl1, X_train_pre_cl1, X_test_pre_cl1 = process_data_standard2('FD002',
                                                                                                                                sensors,  dir_path,
                                                                                                                                 condition ='35.0_0.84_100.0',
                                                                                                                                 algo='clf', clust=-1, get_by_cond=True,sequence_length = 1,
                                                                                                                                 )

In [None]:
# dataset operating condition 2
x_train_cl2, y_train_cl2, x_val_cl2, y_val_cl2, x_test_cl2, y_test_cl2, X_train_pre_cl2, X_test_pre_cl2 = process_data_standard2('FD002',
                                                                                                 sensors,
                                                                                                 dir_path,
                                                                                                 condition ='42.0_0.84_100.0',
                                                                                                 algo='clf',
                                                                                                 clust=-1,
                                                                                                get_by_cond=True,sequence_length = 1)

In [None]:
# dataset operating condition 3
x_train_cl3, y_train_cl3, x_val_cl3, y_val_cl3, x_test_cl3, y_test_cl3, X_train_pre_cl3, X_test_pre_cl3 = process_data_standard2('FD002',

                                                                                                 sensors,
                                                                                                 dir_path,
                                                                                                 condition ='25.0_0.62_60.0',
                                                                                                 algo='clf',
                                                                                                 clust=-1 , get_by_cond=True, sequence_length = 1,)


In [None]:
# dataset operating condition 4
x_train_cl4, y_train_cl4, x_val_cl4, y_val_cl4, x_test_cl4, y_test_cl4, X_train_pre_cl4, X_test_pre_cl4 = process_data_standard2('FD002',

                                                                                                 sensors,
                                                                                                 dir_path,
                                                                                                 condition ='20.0_0.7_100.0',
                                                                                                 algo='clf',
                                                                                                 clust=-1, get_by_cond=True, sequence_length = 1,)


In [None]:
# dataset operating condition 5
x_train_cl5, y_train_cl5, x_val_cl5, y_val_cl5, x_test_cl5, y_test_cl5, X_train_pre_cl5, X_test_pre_cl5 = process_data_standard2('FD002',

                                                                                                 sensors,
                                                                                                 dir_path,
                                                                                                 condition ='0.0_0.0_100.0',
                                                                                                 algo='clf',
                                                                                                 clust=-1, get_by_cond=True, sequence_length = 1,)


In [None]:
# dataset operating condition 6
x_train_cl6, y_train_cl6, x_val_cl6, y_val_cl6, x_test_cl6, y_test_cl6, X_train_pre_cl6, X_test_pre_cl6 = process_data_standard2('FD002',

                                                                                                 sensors,
                                                                                                 dir_path,
                                                                                                 condition ='10.0_0.25_100.0',
                                                                                                 algo='clf',
                                                                                                 clust=-1, get_by_cond=True,  sequence_length = 1,)


In [None]:
# whole dataset 1
x_train1, y_train1, x_val1, y_val1, x_test1, y_test1, X_train_pre1, X_test_pre1 = process_data_standard2('FD001',
                                                                                                 sensors,
                                                                                                 dir_path,
                                                                                                 condition ='10.0_0.25_100.0',
                                                                                                 algo='clf',
                                                                                                 clust=-1)


In [None]:
# whole dataset 2
x_train2, y_train2, x_val2, y_val2, x_test2, y_test2, X_train_pre2, X_test_pre2 = process_data_standard2('FD002',
                                                                                                 sensors,
                                                                                                 dir_path,
                                                                                                 condition ='10.0_0.25_100.0',
                                                                                                 algo='clf',
                                                                                                 clust=-1)


In [None]:
# whole dataset 3
x_train3, y_train3, x_val3, y_val3, x_test3, y_test3, X_train_pre3, X_test_pre3 = process_data_standard2('FD003',
                                                                                                 sensors,
                                                                                                 dir_path,
                                                                                                 condition ='10.0_0.25_100.0',
                                                                                                 algo='clf',
                                                                                                 clust=-1)


In [None]:
# whole dataset 4
x_train4, y_train4, x_val4, y_val4, x_test4, y_test4, X_train_pre4, X_test_pre4 = process_data_standard2('FD004',
                                                                                                 sensors,
                                                                                                 dir_path,
                                                                                                 condition ='10.0_0.25_100.0',
                                                                                                 algo='clf',
                                                                                                 clust=-1)


In [None]:
# data split by fault mode - label 0

x_train_f0, y_train_f0, x_val_f0, y_val_f0, x_test_f0, y_test_f0, X_train_pre_f0, X_test_pre_f0 = process_data_standard2('FD003',

                                                                                                 sensors,
                                                                                                 dir_path,
                                                                                                 condition ='10.0_0.25_100.0',
                                                                                                 algo='clf',
                                                                                                 sequence_length=1,
                                                                                                 clust=0)


In [None]:
# data split by fault mode - label 1
x_train_f1, y_train_f1, x_val_f1, y_val_f1, x_test_f1, y_test_f1, X_train_pre_f1, X_test_pre_f1 = process_data_standard2('FD003',
                                                                                                 sensors,
                                                                                                 dir_path,
                                                                                                 condition ='10.0_0.25_100.0',
                                                                                                 algo='clf',
                                                                                                 sequence_length=1,
                                                                                                 clust=1)

In [None]:
y_test1 = add_health_index(y_test1).health.values
y_test2 = add_health_index(y_test2).health.values
y_test3 = add_health_index(y_test3).health.values
y_test4 = add_health_index(y_test4).health.values
y_testf1  = add_health_index(y_test_f1).health.values
y_testf0  = add_health_index(y_test_f0).health.values
y_testcl1 = add_health_index(y_test_cl1).health.values
y_testcl2 = add_health_index(y_test_cl2).health.values
y_testcl3 = add_health_index(y_test_cl3).health.values
y_testcl4 = add_health_index(y_test_cl4).health.values
y_testcl5 = add_health_index(y_test_cl5).health.values
y_testcl6 = add_health_index(y_test_cl6).health.values

x_test11 = [x[-1] for x in x_test1]
x_test12 = [x[-1] for x in x_test2]
x_test13 = [x[-1] for x in x_test3]
x_test14 = [x[-1] for x in x_test4]
x_test1f1 = [x[-1] for x in x_test_f1]
x_test1f0 = [x[-1] for x in x_test_f0]
x_test1cl1 = [x[-1] for x in x_test_cl1]
x_test1cl2 = [x[-1] for x in x_test_cl2]
x_test1cl3 = [x[-1] for x in x_test_cl3]
x_test1cl4 = [x[-1] for x in x_test_cl4]
x_test1cl5 = [x[-1] for x in x_test_cl5]
x_test1cl6 = [x[-1] for x in x_test_cl6]


# Random Forest


In [None]:
from utils import classif_rf

### Create Random Forest classifier for each of the 4 main dataset.

In [None]:
rf1, rft1, rfp1 = classif_rf(x_train1, y_train1, x_val1, y_val1)

Model accuracy score with 10 decision-trees : 0.7060

The scores for cross validation are:
 [0.70773639 0.78223496 0.78223496 0.65329513 0.72492837 0.64183381
 0.76790831 0.67908309 0.61604585 0.52148997]

The mean score is: 0.6877

              precision    recall  f1-score   support

         0.0       0.89      0.83      0.86      1020
         1.0       0.66      0.80      0.72      1479
         2.0       0.59      0.52      0.55       837
         3.0       0.00      0.00      0.00       154

    accuracy                           0.71      3490
   macro avg       0.53      0.54      0.53      3490
weighted avg       0.68      0.71      0.69      3490



In [None]:
# Prediction on test
rfp1_t = rf1.predict(x_test11)

In [None]:
accuracy_score(y_test1, rfp1_t)

0.73

In [None]:
rf2, rft2, rfp2 = classif_rf(x_train2, y_train2, x_val2, y_val2)

Model accuracy score with 10 decision-trees : 0.6910

The scores for cross validation are:
 [0.64505119 0.70648464 0.76791809 0.6894198  0.72468714 0.70079636
 0.73037543 0.72437358 0.7118451  0.64123007]

The mean score is: 0.7042

              precision    recall  f1-score   support

         0.0       0.90      0.79      0.84      2652
         1.0       0.65      0.77      0.71      3820
         2.0       0.53      0.53      0.53      1944
         3.0       0.25      0.00      0.01       371

    accuracy                           0.69      8787
   macro avg       0.58      0.52      0.52      8787
weighted avg       0.68      0.69      0.68      8787



In [None]:
rfp2_t = rf2.predict(x_test12)

In [None]:
rf3, rft3, rfp3 = classif_rf(x_train3, y_train3, x_val3, y_val3)

Model accuracy score with 10 decision-trees : 0.6393

The scores for cross validation are:
 [0.66828087 0.52058111 0.61016949 0.69249395 0.6779661  0.29539952
 0.72154964 0.66343826 0.65048544 0.45873786]

The mean score is: 0.5959

              precision    recall  f1-score   support

         0.0       0.92      0.85      0.88      1020
         1.0       0.63      0.75      0.69      1500
         2.0       0.43      0.35      0.38      1029
         3.0       0.52      0.50      0.51       579

    accuracy                           0.64      4128
   macro avg       0.62      0.61      0.61      4128
weighted avg       0.64      0.64      0.63      4128



In [None]:
rfp3_t = rf3.predict(x_test13)

In [None]:
rf4, rft4, rfp4 = classif_rf(x_train4, y_train4, x_val4, y_val4)

Model accuracy score with 10 decision-trees : 0.5833

The scores for cross validation are:
 [0.51569933 0.53282588 0.43958135 0.5432921  0.54804948 0.59714286
 0.64380952 0.55428571 0.55333333 0.5       ]

The mean score is: 0.5428

              precision    recall  f1-score   support

         0.0       0.83      0.81      0.82      2550
         1.0       0.54      0.71      0.61      3680
         2.0       0.43      0.32      0.36      2509
         3.0       0.51      0.37      0.43      1766

    accuracy                           0.58     10505
   macro avg       0.58      0.55      0.56     10505
weighted avg       0.58      0.58      0.57     10505



In [None]:
rfp4_t = rf4.predict(x_test14)

### Create Random Forest classifier for the 6 subdataset of dataset 2

In [None]:
rfs1, rfopct1, rfopcp1 = classif_rf(x_train_cl1, y_train_cl1, x_val_cl1, y_val_cl1)

Model accuracy score with 10 decision-trees : 0.6212

The scores for cross validation are:
 [0.64       0.65333333 0.59333333 0.68       0.72483221 0.6442953
 0.74496644 0.6442953  0.65771812 0.59731544]

The mean score is: 0.6580

              precision    recall  f1-score   support

         0.0       0.88      0.77      0.82       375
         1.0       0.61      0.58      0.59       574
         2.0       0.50      0.69      0.58       436
         3.0       0.40      0.07      0.12       109

    accuracy                           0.62      1494
   macro avg       0.60      0.53      0.53      1494
weighted avg       0.63      0.62      0.61      1494



In [None]:
rfopcp1_t = rfs1.predict(x_test1cl1)

In [None]:
rfs2, rfopct2, rfopcp2 = classif_rf(x_train_cl2, y_train_cl2, x_val_cl2, y_val_cl2)

Model accuracy score with 10 decision-trees : 0.6324

The scores for cross validation are:
 [0.61302682 0.63461538 0.75384615 0.59230769 0.69230769 0.62307692
 0.65384615 0.68076923 0.61538462 0.57692308]

The mean score is: 0.6436

              precision    recall  f1-score   support

         0.0       0.90      0.78      0.83       684
         1.0       0.60      0.59      0.60       948
         2.0       0.52      0.71      0.60       768
         3.0       0.20      0.02      0.04       201

    accuracy                           0.63      2601
   macro avg       0.56      0.53      0.52      2601
weighted avg       0.62      0.63      0.62      2601



In [None]:
rfopcp2_t = rfs2.predict(x_test1cl2)

In [None]:
rfs3, rfopct3, rfopcp3 = classif_rf(x_train_cl3, y_train_cl3, x_val_cl3, y_val_cl3)

Model accuracy score with 10 decision-trees : 0.6347

The scores for cross validation are:
 [0.61538462 0.65384615 0.60645161 0.62580645 0.72258065 0.61290323
 0.70967742 0.66451613 0.74193548 0.58709677]

The mean score is: 0.6540

              precision    recall  f1-score   support

         0.0       0.89      0.76      0.82       406
         1.0       0.60      0.63      0.61       598
         2.0       0.53      0.70      0.60       435
         3.0       0.20      0.01      0.02       113

    accuracy                           0.63      1552
   macro avg       0.55      0.52      0.51      1552
weighted avg       0.63      0.63      0.62      1552



In [None]:
rfopcp3_t = rfs3.predict(x_test1cl3)

In [None]:
rfs4, rfopct4, rfopcp4 = classif_rf(x_train_cl4, y_train_cl4, x_val_cl4, y_val_cl4)

Model accuracy score with 10 decision-trees : 0.6623

The scores for cross validation are:
 [0.67924528 0.67295597 0.7672956  0.64150943 0.71698113 0.63522013
 0.68553459 0.62025316 0.62025316 0.63291139]

The mean score is: 0.6672

              precision    recall  f1-score   support

         0.0       0.91      0.81      0.86       420
         1.0       0.66      0.61      0.63       600
         2.0       0.54      0.73      0.62       454
         3.0       0.32      0.11      0.16       113

    accuracy                           0.66      1587
   macro avg       0.61      0.56      0.57      1587
weighted avg       0.67      0.66      0.66      1587



In [None]:
rfopcp4_t = rfs4.predict(x_test1cl4)

In [None]:
rfs5, rfopct5, rfopcp5 = classif_rf(x_train_cl5, y_train_cl5, x_val_cl5, y_val_cl5)

Model accuracy score with 10 decision-trees : 0.6304

The scores for cross validation are:
 [0.56493506 0.5974026  0.68831169 0.61688312 0.66233766 0.66233766
 0.62987013 0.60784314 0.67973856 0.62745098]

The mean score is: 0.6337

              precision    recall  f1-score   support

         0.0       0.90      0.79      0.84       393
         1.0       0.64      0.59      0.62       609
         2.0       0.47      0.70      0.56       408
         3.0       0.50      0.08      0.14       127

    accuracy                           0.63      1537
   macro avg       0.63      0.54      0.54      1537
weighted avg       0.65      0.63      0.62      1537



In [None]:
rfopcp5_t = rfs5.predict(x_test1cl5)

In [None]:
rfs6, rfopct6, rfopcp6 = classif_rf(x_train_cl6, y_train_cl6, x_val_cl6, y_val_cl6)

Model accuracy score with 10 decision-trees : 0.6522

The scores for cross validation are:
 [0.5751634  0.60130719 0.69281046 0.64705882 0.73026316 0.64473684
 0.67105263 0.66447368 0.66447368 0.57894737]

The mean score is: 0.6470

              precision    recall  f1-score   support

         0.0       0.91      0.78      0.84       374
         1.0       0.62      0.67      0.64       571
         2.0       0.57      0.68      0.62       462
         3.0       0.24      0.08      0.12       117

    accuracy                           0.65      1524
   macro avg       0.58      0.55      0.55      1524
weighted avg       0.65      0.65      0.64      1524



In [None]:
rfopcp6_t = rfs6.predict(x_test1cl6)

### Create Random Forest classifier for the 2 subdataset of dataset 3 through faultmode.


In [None]:
rff0, rfcl0t, rfcl0p = classif_rf(x_train_f0, y_train_f0, x_val_f0, y_val_f0)

Model accuracy score with 10 decision-trees : 0.6019

The scores for cross validation are:
 [0.68965517 0.55603448 0.42241379 0.36206897 0.5        0.51293103
 0.5974026  0.71428571 0.77922078 0.47619048]

The mean score is: 0.5610

              precision    recall  f1-score   support

         0.0       0.79      0.90      0.84       510
         1.0       0.51      0.67      0.58       750
         2.0       0.54      0.45      0.49       686
         3.0       0.72      0.33      0.45       370

    accuracy                           0.60      2316
   macro avg       0.64      0.59      0.59      2316
weighted avg       0.61      0.60      0.59      2316



In [None]:
rfcl0p_t = rff0.predict(x_test1f0)

In [None]:
rff1, rfcl1t, rfcl1p = classif_rf(x_train_f1, y_train_f1, x_val_f1, y_val_f1)

Model accuracy score with 10 decision-trees : 0.6147

The scores for cross validation are:
 [0.68032787 0.50819672 0.29098361 0.6352459  0.68032787 0.52868852
 0.73360656 0.4691358  0.63786008 0.64609053]

The mean score is: 0.5810

              precision    recall  f1-score   support

         0.0       0.81      0.90      0.85       612
         1.0       0.53      0.73      0.61       896
         2.0       0.55      0.40      0.46       695
         3.0       0.83      0.08      0.15       234

    accuracy                           0.61      2437
   macro avg       0.68      0.53      0.52      2437
weighted avg       0.63      0.61      0.59      2437



In [None]:
rfcl1p_t = rff1.predict(x_test1f1)

## Hyperparameter tuning.

In [None]:
from utils import parameter_tuning

In [None]:
# Define the hyperparameter grid.
param_grid = {
    'n_estimators': [10, 50, 100, 150],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 10],
    'max_features': ['sqrt', 'log2']
}

cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=1, random_state=237)

# Initialize the random forest model.
forest = RandomForestClassifier(random_state=237)

### Parameter tuning for dataset 1.

In [None]:
best_model_rfa1 = parameter_tuning(forest, param_grid, x_train1, y_train1, x_val1, y_val1, 10)

Accuracy: 0.714


In [None]:
best_model_rfa1.best_params_

{'max_depth': 20,
 'max_features': 'sqrt',
 'min_samples_split': 10,
 'n_estimators': 150}

### Parameter tuning for dataset 2.

In [None]:
best_model_rfa2 = parameter_tuning(forest, param_grid, x_train2, y_train2, x_val2, y_val2, 10)

Accuracy: 0.704


In [None]:
best_model_rfa2.best_params_

{'max_depth': 10,
 'max_features': 'sqrt',
 'min_samples_split': 5,
 'n_estimators': 150}

### Parameter tuning for dataset 3.

In [None]:
best_model_rfa3 = parameter_tuning(forest, param_grid, x_train3, y_train3, x_val3, y_val3, 10)

Accuracy: 0.636


In [None]:
best_model_rfa3.best_params_

{'max_depth': 10,
 'max_features': 'sqrt',
 'min_samples_split': 5,
 'n_estimators': 100}

### Parameter tuning for dataset 4.

In [None]:
best_model_rfa4 = parameter_tuning(forest, param_grid, x_train4, y_train4, x_val4, y_val4, 10)

Accuracy: 0.578


In [None]:
best_model_rfa4.best_params_

{'max_depth': 20,
 'max_features': 'sqrt',
 'min_samples_split': 10,
 'n_estimators': 150}

### Parameter tuning for dataset clustered by fault mode of dataset 3 with label 0.

In [None]:
best_model_rfa_cluster0 = parameter_tuning(forest, param_grid, x_train_f0, y_train_f0, x_val_f0, y_val_f0, 10)

Accuracy: 0.608


In [None]:
best_model_rfa_cluster0.best_params_

{'max_depth': 10,
 'max_features': 'sqrt',
 'min_samples_split': 2,
 'n_estimators': 150}

### Parameter tuning for dataset clustered by fault mode of dataset 3 with label 1.

In [None]:
best_model_rfa_cluster1 = parameter_tuning(forest, param_grid, x_train_f1, y_train_f1, x_val_f1, y_val_f1, 10)

Accuracy: 0.520


In [None]:
best_model_rfa_cluster1.best_params_

{'max_depth': 10,
 'max_features': 'sqrt',
 'min_samples_split': 10,
 'n_estimators': 50}

In [None]:
rfmat = [mat1, mat2, mat3, mat4, mats1, mats2, mats3, mats4, mats5, mats6, matf1, matf0 ]
rfreport = [report1, report2, report3, report4, reports1, reports2, reports3, reports4, reports5, reports6, reportf1, reportf0]
filehandler = open(b"/content/drive/MyDrive/classiff_results/rf_preds.pkl","wb")
pickle.dump((rfmat, rfreport),filehandler)


# SVM classifying

## Model

In [None]:
from utils import classif_svm

### Create SVM classifier for each of the 4 main dataset.

In [None]:
svm1, svmt1, svmp1 = classif_svm(x_train1, y_train1, x_val1, y_val1)

Model accuracy score with default hyperparameters: 0.7252

The scores for cross validation are:
 [0.77077364 0.83381089 0.81661891 0.68481375 0.7277937  0.65329513
 0.78223496 0.72492837 0.61318052 0.55587393]

The mean score is: 0.7163
              precision    recall  f1-score   support

         0.0       0.92      0.82      0.87      1020
         1.0       0.66      0.86      0.75      1479
         2.0       0.64      0.51      0.57       837
         3.0       0.00      0.00      0.00       154

    accuracy                           0.73      3490
   macro avg       0.56      0.55      0.55      3490
weighted avg       0.70      0.73      0.71      3490



In [None]:
svmp1_t = svm1.predict(x_test11)

In [None]:
svm2, svmt2, svmp2 = classif_svm(x_train2, y_train2, x_val2, y_val2)

Model accuracy score with default hyperparameters: 0.6978

The scores for cross validation are:
 [0.64163823 0.69852105 0.74516496 0.6996587  0.73720137 0.7258248
 0.71786121 0.71640091 0.71298405 0.64236902]

The mean score is: 0.7038
              precision    recall  f1-score   support

         0.0       0.93      0.77      0.84      2652
         1.0       0.66      0.79      0.72      3820
         2.0       0.54      0.55      0.54      1944
         3.0       0.00      0.00      0.00       371

    accuracy                           0.70      8787
   macro avg       0.53      0.53      0.53      8787
weighted avg       0.68      0.70      0.69      8787



In [None]:
svmp2_t = svm2.predict(x_test12)

In [None]:
svm3, svmt3, svmp3 = classif_svm(x_train3, y_train3, x_val3, y_val3)

Model accuracy score with default hyperparameters: 0.6747

The scores for cross validation are:
 [0.71912833 0.54237288 0.73365617 0.79661017 0.67070218 0.31234867
 0.74818402 0.77723971 0.83009709 0.50242718]

The mean score is: 0.6633
              precision    recall  f1-score   support

         0.0       0.95      0.84      0.89      1020
         1.0       0.65      0.77      0.71      1500
         2.0       0.51      0.33      0.40      1029
         3.0       0.56      0.75      0.64       579

    accuracy                           0.67      4128
   macro avg       0.67      0.67      0.66      4128
weighted avg       0.67      0.67      0.67      4128



In [None]:
svmp3_t = svm3.predict(x_test13)

In [None]:
svm4, svmt4, svmp4 = classif_svm(x_train4, y_train4, x_val4, y_val4)

Model accuracy score with default hyperparameters: 0.5876

The scores for cross validation are:
 [0.53853473 0.5356803  0.44053283 0.5509039  0.56707897 0.59809524
 0.6952381  0.56952381 0.56571429 0.47333333]

The mean score is: 0.5535
              precision    recall  f1-score   support

         0.0       0.83      0.82      0.83      2550
         1.0       0.54      0.71      0.61      3680
         2.0       0.42      0.34      0.38      2509
         3.0       0.53      0.35      0.42      1766

    accuracy                           0.59     10505
   macro avg       0.58      0.56      0.56     10505
weighted avg       0.58      0.59      0.58     10505



In [None]:
svmp4_t = svm4.predict(x_test14)

### Create SVM classifier for the 6 subdataset of dataset 2.

In [None]:
svms1, svmopct1, svmopcp1 = classif_svm(x_train_cl1, y_train_cl1, x_val_cl1, y_val_cl1)

Model accuracy score with default hyperparameters: 0.6446

The scores for cross validation are:
 [0.62       0.67333333 0.60666667 0.69333333 0.75838926 0.63758389
 0.72483221 0.60402685 0.69127517 0.59060403]

The mean score is: 0.6600
              precision    recall  f1-score   support

         0.0       0.91      0.78      0.84       375
         1.0       0.64      0.59      0.62       574
         2.0       0.51      0.75      0.61       436
         3.0       0.00      0.00      0.00       109

    accuracy                           0.64      1494
   macro avg       0.52      0.53      0.52      1494
weighted avg       0.62      0.64      0.63      1494



In [None]:
svmsopcp1_t = svms1.predict(x_test1cl1)

In [None]:
svms2, svmopct2, svmopcp2 = classif_svm(x_train_cl2, y_train_cl2, x_val_cl2, y_val_cl2)

Model accuracy score with default hyperparameters: 0.6432

The scores for cross validation are:
 [0.66666667 0.59615385 0.77307692 0.6        0.71538462 0.66153846
 0.63846154 0.69230769 0.61538462 0.55384615]

The mean score is: 0.6513
              precision    recall  f1-score   support

         0.0       0.93      0.78      0.85       684
         1.0       0.61      0.60      0.60       948
         2.0       0.53      0.75      0.62       768
         3.0       0.00      0.00      0.00       201

    accuracy                           0.64      2601
   macro avg       0.51      0.53      0.52      2601
weighted avg       0.62      0.64      0.62      2601



In [None]:
svmsopcp2_t = svms2.predict(x_test1cl2)

In [None]:
svms3, svmopct3, svmopcp3 = classif_svm(x_train_cl3, y_train_cl3, x_val_cl3, y_val_cl3)

Model accuracy score with default hyperparameters: 0.6534

The scores for cross validation are:
 [0.62820513 0.66025641 0.65806452 0.65806452 0.70322581 0.61935484
 0.69032258 0.58709677 0.70322581 0.58709677]

The mean score is: 0.6495
              precision    recall  f1-score   support

         0.0       0.92      0.76      0.83       406
         1.0       0.62      0.65      0.64       598
         2.0       0.54      0.72      0.62       435
         3.0       0.00      0.00      0.00       113

    accuracy                           0.65      1552
   macro avg       0.52      0.53      0.52      1552
weighted avg       0.63      0.65      0.64      1552



In [None]:
svmsopcp3_t = svms3.predict(x_test1cl3)

In [None]:
svms4, svmopct4, svmopcp4 = classif_svm(x_train_cl4, y_train_cl4, x_val_cl4, y_val_cl4)


Model accuracy score with default hyperparameters: 0.6730

The scores for cross validation are:
 [0.63522013 0.73584906 0.7672956  0.66037736 0.72955975 0.69811321
 0.70440252 0.63291139 0.6835443  0.63924051]

The mean score is: 0.6887
              precision    recall  f1-score   support

         0.0       0.93      0.81      0.87       420
         1.0       0.67      0.61      0.64       600
         2.0       0.53      0.79      0.64       454
         3.0       0.00      0.00      0.00       113

    accuracy                           0.67      1587
   macro avg       0.53      0.55      0.54      1587
weighted avg       0.65      0.67      0.65      1587



In [None]:
svmsopcp4_t = svms4.predict(x_test1cl4)

In [None]:
svms5, svmopct5, svmopcp5 = classif_svm(x_train_cl5, y_train_cl5, x_val_cl5, y_val_cl5)


Model accuracy score with default hyperparameters: 0.6396

The scores for cross validation are:
 [0.61688312 0.6038961  0.70779221 0.62987013 0.72077922 0.66883117
 0.64935065 0.62091503 0.64705882 0.66013072]

The mean score is: 0.6526
              precision    recall  f1-score   support

         0.0       0.91      0.77      0.84       393
         1.0       0.66      0.59      0.62       609
         2.0       0.49      0.79      0.60       408
         3.0       0.00      0.00      0.00       127

    accuracy                           0.64      1537
   macro avg       0.51      0.54      0.52      1537
weighted avg       0.62      0.64      0.62      1537



In [None]:
svmsopcp5_t = svms5.predict(x_test1cl5)

In [None]:
svms6, svmopct6, svmopcp6 = classif_svm(x_train_cl6, y_train_cl6, x_val_cl6, y_val_cl6)


Model accuracy score with default hyperparameters: 0.6555

The scores for cross validation are:
 [0.67320261 0.64705882 0.66013072 0.64052288 0.77631579 0.63157895
 0.73026316 0.70394737 0.63815789 0.53289474]

The mean score is: 0.6634
              precision    recall  f1-score   support

         0.0       0.93      0.75      0.83       374
         1.0       0.63      0.63      0.63       571
         2.0       0.55      0.77      0.64       462
         3.0       0.00      0.00      0.00       117

    accuracy                           0.66      1524
   macro avg       0.53      0.54      0.53      1524
weighted avg       0.63      0.66      0.64      1524



In [None]:
svmsopcp6_t = svms6.predict(x_test1cl6)

### Create SVM classifier for the 2 subdataset of dataset 3 through faultmode.

In [None]:
svm_cluster0, svmc0t, svmc0p = classif_svm(x_train_f0, y_train_f0, x_val_f0, y_val_f0)

Model accuracy score with default hyperparameters: 0.6110

The scores for cross validation are:
 [0.74568966 0.56896552 0.40086207 0.34051724 0.50862069 0.54741379
 0.63203463 0.71861472 0.84848485 0.65800866]

The mean score is: 0.5969
              precision    recall  f1-score   support

         0.0       0.80      0.89      0.85       510
         1.0       0.52      0.68      0.59       750
         2.0       0.54      0.47      0.50       686
         3.0       0.72      0.34      0.46       370

    accuracy                           0.61      2316
   macro avg       0.65      0.60      0.60      2316
weighted avg       0.62      0.61      0.60      2316



In [None]:
svmc0p_t = svm_cluster0.predict(x_test1f0)


In [None]:
svm_cluster1, svmc1t, svmc1p = classif_svm(x_train_f1, y_train_f1, x_val_f1, y_val_f1)


Model accuracy score with default hyperparameters: 0.6147

The scores for cross validation are:
 [0.7704918  0.52459016 0.27459016 0.67622951 0.73770492 0.61885246
 0.7295082  0.43209877 0.65432099 0.69135802]

The mean score is: 0.6110
              precision    recall  f1-score   support

         0.0       0.83      0.89      0.86       612
         1.0       0.53      0.76      0.63       896
         2.0       0.54      0.39      0.45       695
         3.0       0.00      0.00      0.00       234

    accuracy                           0.61      2437
   macro avg       0.48      0.51      0.49      2437
weighted avg       0.56      0.61      0.58      2437



In [None]:
svmc1p_t = svm_cluster1.predict(x_test1f1)

## Hyperparameter tuning

In [None]:
#Hyperparameter tuning using Gridsearch
param_grid = {
    'C': [0.1, 1.0, 10],
    'gamma': ['scale', 'auto'],
    #'gamma': [1.0, 0.1, 0.01],
    'kernel': ['poly', 'rbf']
    }

cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=1, random_state=237)

# Initialize svm model.
svm = SVC(random_state=237)

### Parameter tuning for dataset 1.

In [None]:
best_model_svm1 = parameter_tuning(svm, param_grid, x_train1, y_train1, x_val1, y_val1, 10)

Accuracy: 0.733


In [None]:
best_model_svm1.best_params_

{'C': 0.1, 'gamma': 'scale', 'kernel': 'rbf'}

### Parameter tuning for dataset 2.

In [None]:
best_model_svm2 = parameter_tuning(svm, param_grid, x_train2, y_train2, x_val2, y_val2, 10)

Accuracy: 0.702


In [None]:
best_model_svm2.best_params_

{'C': 0.1, 'gamma': 'auto', 'kernel': 'rbf'}

### Parameter tuning for dataset 3.

In [None]:
best_model_svm3 = parameter_tuning(svm, param_grid, x_train3, y_train3, x_val3, y_val3, 10)

Accuracy: 0.660


In [None]:
best_model_svm3.best_params_

{'C': 0.1, 'gamma': 'scale', 'kernel': 'rbf'}

### Parameter tuning for dataset 4.

In [None]:
best_model_svm4 = parameter_tuning(svm, param_grid, x_train4, y_train4, x_val4, y_val4, 10)

Accuracy: 0.588


In [None]:
best_model_svm4.best_params_

{'C': 1.0, 'gamma': 'scale', 'kernel': 'rbf'}

### Parameter tuning for dataset clustered by fault mode of dataset 3 with label 0.

In [None]:
best_model_svm_cluster0 = parameter_tuning(svm, param_grid, x_train_f0, y_train_f0, x_val_f0, y_val_f0, 10)

Accuracy: 0.611


In [None]:
best_model_svm_cluster0.best_params_

{'C': 1.0, 'gamma': 'auto', 'kernel': 'rbf'}

### Parameter tuning for dataset clustered by fault mode of dataset 3 with label 1.

In [None]:
best_model_svm_cluster1 = parameter_tuning(svm, param_grid, x_train_f1, y_train_f1, x_val_f1, y_val_f1, 10)

Accuracy: 0.499


In [None]:
best_model_svm_cluster1.best_params_

{'C': 10, 'gamma': 'scale', 'kernel': 'rbf'}

# Gaussian Linear Discriminent Analysis

## Model

In [None]:
from utils import classif_lda

### Create LDA classifier for each of the 4 main dataset.

In [None]:
lda1, lda1t, lda1p = classif_lda(x_train1, y_train1, x_val1, y_val1)

Model accuracy score with default parameters : 0.7272

The scores for cross validation are:
 [0.77077364 0.82234957 0.80515759 0.67908309 0.72492837 0.57593123
 0.77936963 0.67048711 0.61031519 0.59025788]

The mean score is: 0.7029

              precision    recall  f1-score   support

         0.0       0.91      0.83      0.87      1020
         1.0       0.67      0.85      0.75      1479
         2.0       0.64      0.52      0.57       837
         3.0       0.00      0.00      0.00       154

    accuracy                           0.73      3490
   macro avg       0.55      0.55      0.55      3490
weighted avg       0.70      0.73      0.71      3490



In [None]:
lda1p_t = lda1.predict(x_test11)

In [None]:
lda2, lda2t, lda2p = classif_lda(x_train2, y_train2, x_val2, y_val2)

Model accuracy score with default parameters : 0.6986

The scores for cross validation are:
 [0.60409556 0.69169511 0.74744027 0.7076223  0.72127418 0.72241183
 0.69510808 0.72209567 0.68223235 0.64920273]

The mean score is: 0.6943

              precision    recall  f1-score   support

         0.0       0.92      0.76      0.83      2652
         1.0       0.65      0.81      0.72      3820
         2.0       0.56      0.53      0.54      1944
         3.0       0.00      0.00      0.00       371

    accuracy                           0.70      8787
   macro avg       0.53      0.53      0.52      8787
weighted avg       0.68      0.70      0.69      8787



In [None]:
lda2p_t = lda2.predict(x_test12)

In [None]:
lda3, lda3t, lda3p = classif_lda(x_train3, y_train3, x_val3, y_val3)

Model accuracy score with default parameters : 0.6391

The scores for cross validation are:
 [0.66343826 0.37772397 0.6125908  0.65617433 0.64164649 0.38983051
 0.67312349 0.72639225 0.69902913 0.50242718]

The mean score is: 0.5942

              precision    recall  f1-score   support

         0.0       0.93      0.81      0.87      1020
         1.0       0.63      0.76      0.69      1500
         2.0       0.44      0.30      0.36      1029
         3.0       0.49      0.61      0.54       579

    accuracy                           0.64      4128
   macro avg       0.62      0.62      0.62      4128
weighted avg       0.64      0.64      0.63      4128



In [None]:
lda3p_t = lda3.predict(x_test13)

In [None]:
lda4, lda4t, lda4p = classif_lda(x_train4, y_train4, x_val4, y_val4)

Model accuracy score with default parameters : 0.5648

The scores for cross validation are:
 [0.5432921  0.53092293 0.4490961  0.56137012 0.59086584 0.59904762
 0.62857143 0.55428571 0.55142857 0.46095238]

The mean score is: 0.5470

              precision    recall  f1-score   support

         0.0       0.85      0.78      0.81      2550
         1.0       0.52      0.71      0.60      3680
         2.0       0.38      0.29      0.33      2509
         3.0       0.47      0.34      0.39      1766

    accuracy                           0.56     10505
   macro avg       0.56      0.53      0.53     10505
weighted avg       0.56      0.56      0.55     10505



In [None]:
lda4p_t = lda4.predict(x_test14)

### Create LDA classifier for the 6 subdataset of dataset 2.

In [None]:
ldas1, ldaopc1t, ldaopc1p = classif_lda(x_train_cl1, y_train_cl1, x_val_cl1, y_val_cl1)

Model accuracy score with default parameters : 0.6546

The scores for cross validation are:
 [0.60666667 0.66       0.6        0.66666667 0.72483221 0.63758389
 0.7114094  0.59060403 0.67114094 0.58389262]

The mean score is: 0.6453

              precision    recall  f1-score   support

         0.0       0.92      0.78      0.84       375
         1.0       0.63      0.67      0.65       574
         2.0       0.53      0.70      0.60       436
         3.0       0.00      0.00      0.00       109

    accuracy                           0.65      1494
   macro avg       0.52      0.54      0.52      1494
weighted avg       0.63      0.65      0.64      1494



In [None]:
ldaopc1p_t = ldas1.predict(x_test1cl1)

In [None]:
ldas2, ldaopc2t, ldaopc2p = classif_lda(x_train_cl2, y_train_cl2, x_val_cl2, y_val_cl2)

Model accuracy score with default parameters : 0.6417

The scores for cross validation are:
 [0.651341   0.60769231 0.74230769 0.62307692 0.69615385 0.67692308
 0.61153846 0.67307692 0.61153846 0.56538462]

The mean score is: 0.6459

              precision    recall  f1-score   support

         0.0       0.92      0.76      0.83       684
         1.0       0.59      0.65      0.62       948
         2.0       0.54      0.70      0.61       768
         3.0       0.00      0.00      0.00       201

    accuracy                           0.64      2601
   macro avg       0.51      0.53      0.51      2601
weighted avg       0.62      0.64      0.62      2601



In [None]:
ldaopc2p_t = ldas2.predict(x_test1cl2)

In [None]:
ldas3, ldaopc3t, ldaopc3p = classif_lda(x_train_cl3, y_train_cl3, x_val_cl3, y_val_cl3)

Model accuracy score with default parameters : 0.6372

The scores for cross validation are:
 [0.62179487 0.62179487 0.65806452 0.64516129 0.65806452 0.59354839
 0.69032258 0.63870968 0.69032258 0.57419355]

The mean score is: 0.6392

              precision    recall  f1-score   support

         0.0       0.92      0.73      0.81       406
         1.0       0.59      0.68      0.63       598
         2.0       0.53      0.66      0.59       435
         3.0       0.50      0.01      0.02       113

    accuracy                           0.64      1552
   macro avg       0.63      0.52      0.51      1552
weighted avg       0.65      0.64      0.62      1552



In [None]:
ldaopc3p_t = ldas3.predict(x_test1cl3)

In [None]:
ldas4, ldaopc4t, ldaopc4p = classif_lda(x_train_cl4, y_train_cl4, x_val_cl4, y_val_cl4)

Model accuracy score with default parameters : 0.6528

The scores for cross validation are:
 [0.62893082 0.70440252 0.73584906 0.63522013 0.70440252 0.66037736
 0.64150943 0.62658228 0.53797468 0.61392405]

The mean score is: 0.6489

              precision    recall  f1-score   support

         0.0       0.94      0.78      0.85       420
         1.0       0.61      0.67      0.64       600
         2.0       0.53      0.67      0.59       454
         3.0       1.00      0.01      0.02       113

    accuracy                           0.65      1587
   macro avg       0.77      0.53      0.52      1587
weighted avg       0.70      0.65      0.64      1587



In [None]:
ldaopc4p_t = ldas4.predict(x_test1cl4)

In [None]:
ldas5, ldaopc5t, ldaopc5p = classif_lda(x_train_cl5, y_train_cl5, x_val_cl5, y_val_cl5)

Model accuracy score with default parameters : 0.6344

The scores for cross validation are:
 [0.55194805 0.57142857 0.69480519 0.64935065 0.63636364 0.64935065
 0.62337662 0.64052288 0.63398693 0.63398693]

The mean score is: 0.6285

              precision    recall  f1-score   support

         0.0       0.92      0.75      0.83       393
         1.0       0.63      0.66      0.65       609
         2.0       0.48      0.67      0.56       408
         3.0       0.33      0.02      0.03       127

    accuracy                           0.63      1537
   macro avg       0.59      0.53      0.52      1537
weighted avg       0.64      0.63      0.62      1537



In [None]:
ldaopc5p_t = ldas5.predict(x_test1cl5)

In [None]:
ldas6, ldaopc6t, ldaopc6p = classif_lda(x_train_cl6, y_train_cl6, x_val_cl6, y_val_cl6)

Model accuracy score with default parameters : 0.6516

The scores for cross validation are:
 [0.58169935 0.60130719 0.71895425 0.69281046 0.74342105 0.63157895
 0.71052632 0.67105263 0.61842105 0.51973684]

The mean score is: 0.6490

              precision    recall  f1-score   support

         0.0       0.92      0.78      0.84       374
         1.0       0.62      0.66      0.64       571
         2.0       0.54      0.70      0.61       462
         3.0       0.60      0.03      0.05       117

    accuracy                           0.65      1524
   macro avg       0.67      0.54      0.54      1524
weighted avg       0.67      0.65      0.63      1524



In [None]:
ldaopc6p_t = ldas6.predict(x_test1cl6)

### Create LDA classifier for the 2 subdataset of dataset 3 through fault mode.

In [None]:
lda_cluster0, ldacl0t, ldacl0p = classif_lda(x_train_f0, y_train_f0, x_val_f0, y_val_f0)

Model accuracy score with default parameters : 0.6157

The scores for cross validation are:
 [0.7112069  0.62068966 0.46551724 0.36206897 0.51724138 0.56465517
 0.61471861 0.71861472 0.86580087 0.77489177]

The mean score is: 0.6215

              precision    recall  f1-score   support

         0.0       0.82      0.88      0.84       510
         1.0       0.52      0.75      0.61       750
         2.0       0.58      0.42      0.49       686
         3.0       0.72      0.34      0.46       370

    accuracy                           0.62      2316
   macro avg       0.66      0.60      0.60      2316
weighted avg       0.63      0.62      0.60      2316



In [None]:
ldacl0p_t = lda_cluster0.predict(x_test1f0)

In [None]:
lda_cluster1, ldacl1t, ldacl1p = classif_lda(x_train_f1, y_train_f1, x_val_f1, y_val_f1)

Model accuracy score with default parameters : 0.6192

The scores for cross validation are:
 [0.81967213 0.54508197 0.27868852 0.71721311 0.76229508 0.6147541
 0.79098361 0.43209877 0.71193416 0.67901235]

The mean score is: 0.6352

              precision    recall  f1-score   support

         0.0       0.84      0.87      0.86       612
         1.0       0.53      0.78      0.63       896
         2.0       0.56      0.40      0.47       695
         3.0       1.00      0.00      0.01       234

    accuracy                           0.62      2437
   macro avg       0.73      0.51      0.49      2437
weighted avg       0.66      0.62      0.58      2437



In [None]:
ldacl1p_t = lda_cluster1.predict(x_test1f1)

## Hyperparameter tuning.

In [None]:
# Define the hyperparameter grid.
param_grid = {
    'solver': ['svd', 'lsqr', 'eigen']
}

cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=237)

# Initialize the Gaussian Linear Discriminent Analysis model.
linear = LinearDiscriminantAnalysis()

### Parameter tuning dataset 1.

In [None]:
best_model_lda1 = parameter_tuning(linear, param_grid, x_train1, y_train1, x_val1, y_val1, cv)

Accuracy: 0.727


In [None]:
best_model_lda1.best_params_

{'solver': 'lsqr'}

Gridsearch with shrinkage.

In [None]:
# Define the hyperparameter grid.
grid_lda = {
    'shrinkage': np.arange(0, 1, 0.01)
}

linear_shr = LinearDiscriminantAnalysis(solver='lsqr')

In [None]:
best_model_lda_shr1 = parameter_tuning(linear_shr, grid_lda, x_train1, y_train1, x_val1, y_val1, cv)

Accuracy: 0.728


In [None]:
best_model_lda_shr1.best_params_

{'shrinkage': 0.01}

### Parameter tuning dataset 2.

In [None]:
best_model_lda2 = parameter_tuning(linear, param_grid, x_train2, y_train2, x_val2, y_val2, cv)

Accuracy: 0.699


In [None]:
# Best parameter solver.
best_model_lda2.best_params_

{'solver': 'lsqr'}

In [None]:
best_model_lda_shr2 = parameter_tuning(linear_shr, grid_lda, x_train2, y_train2, x_val2, y_val2, cv)

Accuracy: 0.698


In [None]:
best_model_lda_shr2.best_params_

{'shrinkage': 0.06}

### Parameter tuning dataset 3.

In [None]:
best_model_lda3 = parameter_tuning(linear, param_grid, x_train3, y_train3, x_val3, y_val3, cv)

Accuracy: 0.639


In [None]:
# Best parameter solver.
best_model_lda3.best_params_

{'solver': 'lsqr'}

In [None]:
best_model_lda_shr3 = parameter_tuning(linear_shr, grid_lda, x_train3, y_train3, x_val3, y_val3, cv)

Accuracy: 0.635


In [None]:
best_model_lda_shr3.best_params_

{'shrinkage': 0.1}

### Parameter tuning dataset 4.

In [None]:
best_model_lda4 = parameter_tuning(linear, param_grid, x_train4, y_train4, x_val4, y_val4, cv)

Accuracy: 0.565


In [None]:
# Best parameter solver.
best_model_lda4.best_params_

{'solver': 'svd'}

In [None]:
best_model_lda_shr4 = parameter_tuning(linear_shr, grid_lda, x_train4, y_train4, x_val4, y_val4, cv)

Accuracy: 0.565


In [None]:
best_model_lda_shr4.best_params_

{'shrinkage': 0.07}

### Parameter tuning for dataset clustered by fault mode of dataset 3 with label 0.

In [None]:
best_model_lda_cluster0 = parameter_tuning(linear, param_grid, x_train_f0, y_train_f0, x_val_f0, y_val_f0, cv)

In [None]:
# Best parameter solver.
best_model_lda_cluster0.best_params_

In [None]:
best_model_lda_shr_cluster0 = parameter_tuning(linear_shr, grid_lda, x_train_f0, y_train_f0, x_val_f0, y_val_f0, cv)

In [None]:
best_model_lda_shr_cluster0.best_params_

### Parameter tuning for dataset clustered by fault mode of dataset 3 with label 1.

In [None]:
best_model_lda_cluster1 = parameter_tuning(linear, param_grid, x_train_f1, y_train_f1, x_val_f1, y_val_f1, cv)

In [None]:
# Best parameter solver.
best_model_lda_cluster1.best_params_

In [None]:
best_model_lda_shr_cluster1 = parameter_tuning(linear_shr, grid_lda, x_train_f1, y_train_f1, x_val_f1, y_val_f1, cv)

In [None]:
best_model_lda_shr_cluster1.best_params_

# Elman network

In [None]:
from utils import train_elman

## Elman one line



#### Train Elman that takes only one observation on 4 main datasets

### FD001

In [None]:
x_train1l, y_train1l, x_val1l, y_val1l, x_test1l, y_test1l, X_train_pre1l, X_test_pre1l = process_data_standard2('FD001',
                                                                                                 sensors,
                                                                                                 dir_path,
                                                                                                 condition ='10.0_0.25_100.0',
                                                                                                 algo='clf', sequence_length=1,

                                                                                                 clust=-1)

In [None]:


elman1l, hist1l = train_elman("elman1_ds1", x_train1l, y_train1l, x_val1l, y_val1l, 1, 256, neurons2= None, optimizer='Adadelta')

Model: "model_16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_17 (InputLayer)       [(None, 1, 15)]           0         
                                                                 
 simple_rnn_16 (SimpleRNN)   (None, 256)               69632     
                                                                 
 dense_16 (Dense)            (None, 4)                 1028      
                                                                 
Total params: 70660 (276.02 KB)
Trainable params: 70660 (276.02 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None
306.19588685035706


In [None]:
elman1lv2, hist1lv2 = train_elman("elman1_v2_ds1", x_train1l, y_train1l, x_val1l, y_val1l, 1, 256, neurons2= None, optimizer='Adadelta')

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 1, 15)]           0         
                                                                 
 simple_rnn (SimpleRNN)      (None, 256)               69632     
                                                                 
 dense (Dense)               (None, 4)                 1028      
                                                                 
Total params: 70660 (276.02 KB)
Trainable params: 70660 (276.02 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None
295.04047417640686


In [None]:
elman1lv3, hist1lv3 = train_elman("elman1_v3_ds1", x_train1l, y_train1l, x_val1l, y_val1l, 1, 256, neurons2= None, optimizer='Adadelta')


Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 1, 15)]           0         
                                                                 
 simple_rnn_1 (SimpleRNN)    (None, 256)               69632     
                                                                 
 dense_1 (Dense)             (None, 4)                 1028      
                                                                 
Total params: 70660 (276.02 KB)
Trainable params: 70660 (276.02 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None
267.45800018310547


In [None]:
elman1lv4, hist1lv4 = train_elman("elman1_v4_ds1", x_train1l, y_train1l, x_val1l, y_val1l, 1, 256, neurons2= None, optimizer='Adadelta')


Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 1, 15)]           0         
                                                                 
 simple_rnn_2 (SimpleRNN)    (None, 256)               69632     
                                                                 
 dense_2 (Dense)             (None, 4)                 1028      
                                                                 
Total params: 70660 (276.02 KB)
Trainable params: 70660 (276.02 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None
301.33780217170715


In [None]:
elman1lv5, hist1lv5 = train_elman("elman1_v5_ds1", x_train1l, y_train1l, x_val1l, y_val1l, 1, 256, neurons2= None, optimizer='Adadelta')


Model: "model_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_4 (InputLayer)        [(None, 1, 15)]           0         
                                                                 
 simple_rnn_3 (SimpleRNN)    (None, 256)               69632     
                                                                 
 dense_3 (Dense)             (None, 4)                 1028      
                                                                 
Total params: 70660 (276.02 KB)
Trainable params: 70660 (276.02 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None
329.6912455558777


### FD002

In [None]:
x_train2l, y_train2l, x_val2l, y_val2l, x_test2l, y_test2l, X_train_pre2l, X_test_pre2l = process_data_standard2('FD002',
                                                                                                 sensors,
                                                                                                 dir_path,
                                                                                                 condition ='10.0_0.25_100.0',
                                                                                                 algo='clf', sequence_length=1,

                                                                                                 clust=-1)

In [None]:


elman2l, hist2l = train_elman("xfx", x_train2l, y_train2l, x_val2l, y_val2l, 1, 256, neurons2= None, optimizer='Adadelta')

Model: "model_17"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_18 (InputLayer)       [(None, 1, 15)]           0         
                                                                 
 simple_rnn_17 (SimpleRNN)   (None, 256)               69632     
                                                                 
 dense_17 (Dense)            (None, 4)                 1028      
                                                                 
Total params: 70660 (276.02 KB)
Trainable params: 70660 (276.02 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None
420.19116473197937


In [None]:
elman2lv2, hist2lv2 = train_elman("elman2_v2_ds2", x_train2l, y_train2l, x_val2l, y_val2l, 1, 256, neurons2= None, optimizer='Adadelta')


Model: "model_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_5 (InputLayer)        [(None, 1, 15)]           0         
                                                                 
 simple_rnn_4 (SimpleRNN)    (None, 256)               69632     
                                                                 
 dense_4 (Dense)             (None, 4)                 1028      
                                                                 
Total params: 70660 (276.02 KB)
Trainable params: 70660 (276.02 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None
668.0018801689148


In [None]:
elman2lv3, hist2lv3 = train_elman("elman2_v3_ds2", x_train2l, y_train2l, x_val2l, y_val2l, 1, 256, neurons2= None, optimizer='Adadelta')


Model: "model_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_6 (InputLayer)        [(None, 1, 15)]           0         
                                                                 
 simple_rnn_5 (SimpleRNN)    (None, 256)               69632     
                                                                 
 dense_5 (Dense)             (None, 4)                 1028      
                                                                 
Total params: 70660 (276.02 KB)
Trainable params: 70660 (276.02 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None
605.815661907196


In [None]:
elman2lv4, hist2lv4 = train_elman("elman2_v4_ds2", x_train2l, y_train2l, x_val2l, y_val2l, 1, 256, neurons2= None, optimizer='Adadelta')


Model: "model_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_7 (InputLayer)        [(None, 1, 15)]           0         
                                                                 
 simple_rnn_6 (SimpleRNN)    (None, 256)               69632     
                                                                 
 dense_6 (Dense)             (None, 4)                 1028      
                                                                 
Total params: 70660 (276.02 KB)
Trainable params: 70660 (276.02 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None
379.20612359046936


In [None]:
elman2lv5, hist2lv5 = train_elman("elman2_v5_ds2", x_train2l, y_train2l, x_val2l, y_val2l, 1, 256, neurons2= None, optimizer='Adadelta')


Model: "model_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_8 (InputLayer)        [(None, 1, 15)]           0         
                                                                 
 simple_rnn_7 (SimpleRNN)    (None, 256)               69632     
                                                                 
 dense_7 (Dense)             (None, 4)                 1028      
                                                                 
Total params: 70660 (276.02 KB)
Trainable params: 70660 (276.02 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None
795.0913350582123


### FD003

In [None]:
x_train3l, y_train3l, x_val3l, y_val3l, x_test3l, y_test3l, X_train_pre3l, X_test_pre3l = process_data_standard2('FD003',
                                                                                                 sensors,
                                                                                                 dir_path,
                                                                                                 condition ='10.0_0.25_100.0',
                                                                                                 algo='clf', sequence_length=1,

                                                                                                 clust=-1)


In [None]:

elman3l, hist3l = train_elman("elman1_ds3", x_train3l, y_train3l, x_val3l, y_val3l, 1, 256, neurons2= None, optimizer='Adadelta')

Model: "model_18"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_19 (InputLayer)       [(None, 1, 15)]           0         
                                                                 
 simple_rnn_18 (SimpleRNN)   (None, 256)               69632     
                                                                 
 dense_18 (Dense)            (None, 4)                 1028      
                                                                 
Total params: 70660 (276.02 KB)
Trainable params: 70660 (276.02 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None
415.65037274360657


In [None]:
elman3lv2, hist3lv2 = train_elman("elman3_v2_ds3", x_train3l, y_train3l, x_val3l, y_val3l, 1, 256, neurons2= None, optimizer='Adadelta')


Model: "model_20"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_21 (InputLayer)       [(None, 1, 15)]           0         
                                                                 
 simple_rnn_20 (SimpleRNN)   (None, 256)               69632     
                                                                 
 dense_20 (Dense)            (None, 4)                 1028      
                                                                 
Total params: 70660 (276.02 KB)
Trainable params: 70660 (276.02 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None
414.2959749698639


In [None]:
elman3lv3, hist3lv3 = train_elman("elman3_v3_ds3", x_train3l, y_train3l, x_val3l, y_val3l, 1, 256, neurons2= None, optimizer='Adadelta')


Model: "model_9"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_10 (InputLayer)       [(None, 1, 15)]           0         
                                                                 
 simple_rnn_9 (SimpleRNN)    (None, 256)               69632     
                                                                 
 dense_9 (Dense)             (None, 4)                 1028      
                                                                 
Total params: 70660 (276.02 KB)
Trainable params: 70660 (276.02 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None
405.25956177711487


In [None]:
elman3lv4, hist3lv4 = train_elman("elman3_v4_ds3", x_train3l, y_train3l, x_val3l, y_val3l, 1, 256, neurons2= None, optimizer='Adadelta')


Model: "model_10"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_11 (InputLayer)       [(None, 1, 15)]           0         
                                                                 
 simple_rnn_10 (SimpleRNN)   (None, 256)               69632     
                                                                 
 dense_10 (Dense)            (None, 4)                 1028      
                                                                 
Total params: 70660 (276.02 KB)
Trainable params: 70660 (276.02 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None
406.68492579460144


In [None]:
elman3lv5, hist3lv5 = train_elman("elman3_v5_ds3", x_train3l, y_train3l, x_val3l, y_val3l, 1, 256, neurons2= None, optimizer='Adadelta')


Model: "model_11"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_12 (InputLayer)       [(None, 1, 15)]           0         
                                                                 
 simple_rnn_11 (SimpleRNN)   (None, 256)               69632     
                                                                 
 dense_11 (Dense)            (None, 4)                 1028      
                                                                 
Total params: 70660 (276.02 KB)
Trainable params: 70660 (276.02 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None
415.64572286605835


### FD004

In [None]:
x_train4l, y_train4l, x_val4l, y_val4l, x_test4l, y_test4l, X_train_pre4l, X_test_pre4l = process_data_standard2('FD004',
                                                                                                 sensors,
                                                                                                 dir_path,
                                                                                                 condition ='10.0_0.25_100.0',
                                                                                                 algo='clf', sequence_length=1,

                                                                                                 clust=-1)

In [None]:


elman4l, hist4l = train_elman("elman1_ds4", x_train4l, y_train4l, x_val4l, y_val4l, 1, 256, neurons2= None, optimizer='Adadelta')

Model: "model_19"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_20 (InputLayer)       [(None, 1, 15)]           0         
                                                                 
 simple_rnn_19 (SimpleRNN)   (None, 256)               69632     
                                                                 
 dense_19 (Dense)            (None, 4)                 1028      
                                                                 
Total params: 70660 (276.02 KB)
Trainable params: 70660 (276.02 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None
898.2899444103241


In [None]:
elman4lv2, hist4lv2 = train_elman("elman4_v2_ds4", x_train4l, y_train4l, x_val4l, y_val4l, 1, 256, neurons2= None, optimizer='Adadelta')


Model: "model_12"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_13 (InputLayer)       [(None, 1, 15)]           0         
                                                                 
 simple_rnn_12 (SimpleRNN)   (None, 256)               69632     
                                                                 
 dense_12 (Dense)            (None, 4)                 1028      
                                                                 
Total params: 70660 (276.02 KB)
Trainable params: 70660 (276.02 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None
627.1570990085602


In [None]:
elman4lv3, hist4lv3 = train_elman("elman4_v3_ds4", x_train4l, y_train4l, x_val4l, y_val4l, 1, 256, neurons2= None, optimizer='Adadelta')


Model: "model_13"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_14 (InputLayer)       [(None, 1, 15)]           0         
                                                                 
 simple_rnn_13 (SimpleRNN)   (None, 256)               69632     
                                                                 
 dense_13 (Dense)            (None, 4)                 1028      
                                                                 
Total params: 70660 (276.02 KB)
Trainable params: 70660 (276.02 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None
975.4867980480194


In [None]:
elman4lv4, hist4lv4 = train_elman("elman4_v4_ds4", x_train4l, y_train4l, x_val4l, y_val4l, 1, 256, neurons2= None, optimizer='Adadelta')


Model: "model_14"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_15 (InputLayer)       [(None, 1, 15)]           0         
                                                                 
 simple_rnn_14 (SimpleRNN)   (None, 256)               69632     
                                                                 
 dense_14 (Dense)            (None, 4)                 1028      
                                                                 
Total params: 70660 (276.02 KB)
Trainable params: 70660 (276.02 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None
596.7743253707886


In [None]:
elman4lv5, hist4lv5 = train_elman("elman4_v5_ds4", x_train4l, y_train4l, x_val4l, y_val4l, 1, 256, neurons2= None, optimizer='Adadelta')


Model: "model_15"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_16 (InputLayer)       [(None, 1, 15)]           0         
                                                                 
 simple_rnn_15 (SimpleRNN)   (None, 256)               69632     
                                                                 
 dense_15 (Dense)            (None, 4)                 1028      
                                                                 
Total params: 70660 (276.02 KB)
Trainable params: 70660 (276.02 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None
343.8621401786804


### Save history

In [None]:
hist1 = [hist1l.history, hist1lv2.history, hist1lv3.history, hist1lv4.history, hist1lv5.history]
hist2 = [hist2l.history, hist2lv2.history, hist2lv3.history, hist2lv4.history, hist2lv5.history]
hist3 = [hist3l.history, hist3lv2.history, hist3lv3.history, hist3lv4.history, hist3lv5.history]
hist4 = [hist4l.history, hist4lv2.history, hist4lv3.history, hist4lv4.history, hist4lv5.history]
hists = [hist1, hist2, hist3, hist4]

filehandler = open(b"/content/drive/MyDrive/classiff_results/history1l.pkl","wb")
pickle.dump(hists,filehandler)

### Predictions of Elman one line on the validation dataset

In [None]:
y_pred1l = elman1l.predict(x_val1l).argmax(axis=1)
print(classification_report(y_val1l.flatten(), y_pred1l,zero_division=0))

              precision    recall  f1-score   support

         0.0       0.85      0.89      0.87      1020
         1.0       0.63      0.67      0.65      1500
         2.0       0.54      0.65      0.59      1157
         3.0       0.00      0.00      0.00       393

    accuracy                           0.65      4070
   macro avg       0.50      0.55      0.53      4070
weighted avg       0.60      0.65      0.62      4070



In [None]:
y_pred1l_t = elman1l.predict(x_test1l).argmax(axis=1)



In [None]:
y_pred2l = elman2l.predict(x_val2l).argmax(axis=1)
print(classification_report(y_val2l.flatten(), y_pred2l,zero_division=0))

              precision    recall  f1-score   support

         0.0       0.89      0.80      0.84      2652
         1.0       0.63      0.64      0.64      3900
         2.0       0.54      0.73      0.62      2963
         3.0       0.00      0.00      0.00       780

    accuracy                           0.66     10295
   macro avg       0.52      0.54      0.53     10295
weighted avg       0.63      0.66      0.64     10295



In [None]:
y_pred2l_t = elman2l.predict(x_test2l).argmax(axis=1)



In [None]:
y_pred3l = elman3l.predict(x_val3l).argmax(axis=1)
print(classification_report(y_val3l.flatten(), y_pred3l ,zero_division=0))

              precision    recall  f1-score   support

         0.0       0.88      0.87      0.87      1020
         1.0       0.63      0.68      0.65      1500
         2.0       0.61      0.34      0.44      1372
         3.0       0.49      0.78      0.60       816

    accuracy                           0.64      4708
   macro avg       0.65      0.67      0.64      4708
weighted avg       0.65      0.64      0.63      4708



In [None]:
y_pred3l_t = elman3l.predict(x_test3l).argmax(axis=1)



In [None]:
y_pred4l = elman4l.predict(x_val4l).argmax(axis=1)
print(classification_report(y_val4l.flatten(), y_pred4l))

              precision    recall  f1-score   support

         0.0       0.81      0.82      0.82      2550
         1.0       0.51      0.63      0.57      3750
         2.0       0.41      0.36      0.38      3104
         3.0       0.54      0.44      0.49      2551

    accuracy                           0.56     11955
   macro avg       0.57      0.56      0.56     11955
weighted avg       0.56      0.56      0.56     11955



In [None]:
y_pred4l_t = elman4l.predict(x_test4l).argmax(axis=1)




### Elman one line for 2 subdatasets of dataset 3 through fault mode

In [None]:
elman_f0l, hist_f0l = train_elman("elman1_cluster0", x_train_f0, y_train_f0, x_val_f0, y_val_f0, 1, 256, neurons2= None, optimizer='Adadelta')

In [None]:
elman_f1l, hist_f1l = train_elman("elman1_cluster1", x_train_f1, y_train_f1, x_val_f1, y_val_f1, 1, 256, neurons2= None, optimizer='Adadelta')


In [None]:
y_pred_f0l = elman_f0l.predict(x_val_f0).argmax(axis=1)
print(classification_report(y_val_f0.flatten(), y_pred_f0l,zero_division=0))

              precision    recall  f1-score   support

         0.0       0.76      0.92      0.83       510
         1.0       0.50      0.69      0.58       750
         2.0       0.57      0.42      0.48       686
         3.0       0.72      0.34      0.46       370

    accuracy                           0.60      2316
   macro avg       0.64      0.59      0.59      2316
weighted avg       0.62      0.60      0.59      2316



In [None]:
y_pred_f0l_t = elman_f0l.predict(x_test_f0).argmax(axis=1)



In [None]:
y_pred_f1l = elman_f1l.predict(x_val_f1).argmax(axis=1)
print(classification_report(y_val_f1.flatten(), y_pred_f1l,zero_division=0))


              precision    recall  f1-score   support

         0.0       0.75      0.92      0.83       612
         1.0       0.51      0.71      0.59       896
         2.0       0.55      0.35      0.43       695
         3.0       0.00      0.00      0.00       234

    accuracy                           0.59      2437
   macro avg       0.45      0.49      0.46      2437
weighted avg       0.53      0.59      0.55      2437



In [None]:
y_pred_f1l_t = elman_f1l.predict(x_test_f1).argmax(axis=1)



## Elman 30 lines

### FD001

In [None]:
elman1_30, hist1_30 = train_elman("elman30_ds1", x_train1, y_train1, x_val1, y_val1, 30, 256, neurons2= None, optimizer='Adadelta')

In [None]:
elman1_30v2, hist1_30v2 = train_elman("elman30_ds1v2", x_train1, y_train1, x_val1, y_val1, 30, 256, neurons2= None, optimizer='Adadelta')

In [None]:
elman1_30v3, hist1_30v3 = train_elman("elman30_ds1v3", x_train1, y_train1, x_val1, y_val1, 30, 256, neurons2= None, optimizer='Adadelta')

In [None]:
elman1_30v4, hist1_30v4 = train_elman("elman30_ds1v4", x_train1, y_train1, x_val1, y_val1, 30, 256, neurons2= None, optimizer='Adadelta')

In [None]:
elman1_30v5, hist1_30v5 = train_elman("elman30_ds1v5", x_train1, y_train1, x_val1, y_val1, 30, 256, neurons2= None, optimizer='Adadelta')

In [None]:
hist130 = [hist1_30v2.history, hist1_30v3.history, hist1_30v4.history, hist1_30v5.history]
hists30 = [hist130]

filehandler = open(b"/content/drive/MyDrive/classiff_results/history30l_1.pkl","wb")
pickle.dump(hist130,filehandler)

### FD002

In [None]:
elman2_30, hist2_30 = train_elman("elman30_ds2", x_train2, y_train2, x_val2, y_val2, 30, 256, neurons2= None, optimizer='Adadelta')

In [None]:
elman2_30v2, hist2_30v2 = train_elman("elman30_ds2v2", x_train2, y_train2, x_val2, y_val2, 30, 256, neurons2= None, optimizer='Adadelta')

In [None]:
elman2_30v3, hist2_30v3 = train_elman("elman30_ds2v3", x_train2, y_train2, x_val2, y_val2, 30, 256, neurons2= None, optimizer='Adadelta')

In [None]:
elman2_30v4, hist2_30v4 = train_elman("elman30_ds2v4", x_train2, y_train2, x_val2, y_val2, 30, 256, neurons2= None, optimizer='Adadelta')

In [None]:
elman2_30v5, hist2_30v5 = train_elman("elman30_ds2v5", x_train2, y_train2, x_val2, y_val2, 30, 256, neurons2= None, optimizer='Adadelta')

In [None]:
hist130 = [hist1_30v2.history, hist1_30v3.history, hist1_30v4.history, hist1_30v5.history]
hist230 = [hist2_30v2.history, hist2_30v3.history, hist2_30v4.history, hist2_30v5.history]
hists30 = [hist130, hist230]

filehandler = open(b"/content/drive/MyDrive/classiff_results/history30l1to2.pkl","wb")
pickle.dump(hists30,filehandler)

### FD003

In [None]:
elman3_30, hist3_30 = train_elman("elman30_ds3", x_train3, y_train3, x_val3, y_val3, 30, 256, neurons2= None, optimizer='Adadelta')

In [None]:
elman3_30v2, hist3_30v2 = train_elman("elman30_ds3v2", x_train3, y_train3, x_val3, y_val3, 30, 256, neurons2= None, optimizer='Adadelta')

In [None]:
elman3_30v3, hist3_30v3 = train_elman("elman30_ds3v3", x_train3, y_train3, x_val3, y_val3, 30, 256, neurons2= None, optimizer='Adadelta')

In [None]:
elman3_30v4, hist3_30v4 = train_elman("elman30_ds3v4", x_train3, y_train3, x_val3, y_val3, 30, 256, neurons2= None, optimizer='Adadelta')

In [None]:
elman3_30v5, hist3_30v5 = train_elman("elman30_ds3v5", x_train3, y_train3, x_val3, y_val3, 30, 256, neurons2= None, optimizer='Adadelta')

In [None]:
hist130 = [hist1_30v2.history, hist1_30v3.history, hist1_30v4.history, hist1_30v5.history]
hist230 = [hist2_30v2.history, hist2_30v3.history, hist2_30v4.history, hist2_30v5.history]
hist330 = [hist3_30v2.history, hist3_30v3.history, hist3_30v4.history, hist3_30v5.history]
#hist430 = [hist4_30v2.history, hist4_30v3.history, hist4_30v4.history, hist4_30v5.history]
hists30 = [hist130, hist230, hist330]

filehandler = open(b"/content/drive/MyDrive/classiff_results/history30l1to3.pkl","wb")
pickle.dump(hists30,filehandler)

### FD004

In [None]:
elman4_30, hist4_30 = train_elman("elman30_ds4", x_train4, y_train4, x_val4, y_val4, 30, 256, neurons2= None, optimizer='Adadelta')

In [None]:
elman4_30v2, hist4_30v2 = train_elman("elman30_ds4v2", x_train4, y_train4, x_val4, y_val4, 30, 256, neurons2= None, optimizer='Adadelta')

In [None]:
elman4_30v3, hist4_30v3 = train_elman("elman30_ds4v3", x_train4, y_train4, x_val4, y_val4, 30, 256, neurons2= None, optimizer='Adadelta')

In [None]:
elman4_30v4, hist4_30v4 = train_elman("elman30_ds4v4", x_train4, y_train4, x_val4, y_val4, 30, 256, neurons2= None, optimizer='Adadelta')

In [None]:
elman4_30v5, hist4_30v5 = train_elman("elman30_ds4v5", x_train4, y_train4, x_val4, y_val4, 30, 256, neurons2= None, optimizer='Adadelta')

In [None]:
hist130 = [hist1_30v2.history, hist1_30v3.history, hist1_30v4.history, hist1_30v5.history]
hist230 = [hist2_30v2.history, hist2_30v3.history, hist2_30v4.history, hist2_30v5.history]
hist330 = [hist3_30v2.history, hist3_30v3.history, hist3_30v4.history, hist3_30v5.history]
hist430 = [hist4_30v2.history, hist4_30v3.history, hist4_30v4.history, hist4_30v5.history]
hists30 = [hist130, hist230, hist330, hist430]

filehandler = open(b"/content/drive/MyDrive/classiff_results/history30l1-4.pkl","wb")
pickle.dump(hists30,filehandler)

### Predictions of Elman 30 lines on validation dataset

In [None]:
y_pred = elman1_30.predict(x_val1).argmax(axis=1)
print(classification_report(y_val1.flatten(), y_pred))

              precision    recall  f1-score   support

         0.0       0.91      0.73      0.81      1020
         1.0       0.61      0.53      0.57      1479
         2.0       0.47      0.48      0.47       837
         3.0       0.06      0.19      0.09       154

    accuracy                           0.56      3490
   macro avg       0.51      0.48      0.48      3490
weighted avg       0.64      0.56      0.59      3490



In [None]:
y_pred = elman1_30.predict(x_val1).argmax(axis=1)
print(classification_report(y_val1.flatten(), y_pred))

              precision    recall  f1-score   support

         0.0       0.91      0.89      0.90      1020
         1.0       0.73      0.83      0.78      1479
         2.0       0.65      0.64      0.64       837
         3.0       0.00      0.00      0.00       154

    accuracy                           0.76      3490
   macro avg       0.57      0.59      0.58      3490
weighted avg       0.73      0.76      0.75      3490



In [None]:
y_pred2 = elman2_30.predict(x_val2).argmax(axis=1)
print(classification_report(y_val2, y_pred2))

              precision    recall  f1-score   support

         0.0       0.85      0.75      0.80      2652
         1.0       0.63      0.45      0.53      3820
         2.0       0.48      0.54      0.51      1944
         3.0       0.07      0.29      0.11       371

    accuracy                           0.56      8787
   macro avg       0.51      0.51      0.49      8787
weighted avg       0.64      0.56      0.59      8787



In [None]:
y_pred3 = elman3_30.predict(x_val3).argmax(axis=1)
print(classification_report(y_val3, y_pred3))

              precision    recall  f1-score   support

         0.0       0.91      0.90      0.91      1020
         1.0       0.73      0.71      0.72      1500
         2.0       0.49      0.54      0.51      1029
         3.0       0.50      0.46      0.48       579

    accuracy                           0.68      4128
   macro avg       0.66      0.65      0.65      4128
weighted avg       0.68      0.68      0.68      4128



In [None]:
y_pred4 = elman4_30.predict(x_val4).argmax(axis=1)
print(classification_report(y_val4, y_pred4))

              precision    recall  f1-score   support

         0.0       0.84      0.87      0.85      2550
         1.0       0.60      0.66      0.63      3680
         2.0       0.42      0.43      0.43      2509
         3.0       0.52      0.38      0.44      1766

    accuracy                           0.61     10505
   macro avg       0.60      0.58      0.59     10505
weighted avg       0.60      0.61      0.60     10505



In [None]:
y_pred1_t = elman1_30.predict(x_test1).argmax(axis=1)
y_pred2_t = model_elman_2.predict(x_test2).argmax(axis=1)
y_pred3_t = model_elman_3.predict(x_test3).argmax(axis=1)
y_pred4_t = model_elman_4.predict(x_test4).argmax(axis=1)



In [None]:

# files
dir_path = '/content/drive/MyDrive/CMAPSSData/'



# data readout
sensor_names  = ["tfan_in_tot", "tLPC_out_tot", "tHPC_out_tot",
           "tLPT_out_tot", "pfan_in", "pbypass_tot", "pHPC_out_tot", "fan_speed_physical", "core_speed_physical",
           "P50_P2_ratio", "pHPC_out_stat", "fuel_flow_to_Ps30", "fan_speed_corrected", "core_speed_corrected",
           "bypass_ratio", "burnerfuel_air_ratio", "bleed_enthalpy", "fan_speed_demanded", "fan_speed_demanded_corrected" ,
           "HPT_coolant_bleed", "LPT_coolant_bleed"]



sensors = ['tLPC_out_tot', 'tHPC_out_tot', 'tLPT_out_tot', 'pbypass_tot',
       'pHPC_out_tot', 'fan_speed_physical', 'core_speed_physical',
       'pHPC_out_stat', 'fuel_flow_to_Ps30', 'fan_speed_corrected',
       'core_speed_corrected', 'bypass_ratio', 'bleed_enthalpy',
       'HPT_coolant_bleed', 'LPT_coolant_bleed']



x_train1, y_train1, x_val1, y_val1, x_test1, y_test1, X_train_pre1, X_test_pre1 = process_data_standard2('FD001',
                                                                                                 sensors,

                                                                                                 dir_path,
                                                                                                 condition ='10.0_0.25_100.0',
                                                                                                 algo='clf',
                                                                                                 sequence_length=1,
                                                                                                 clust=-1)

In [None]:
#filehandler = open(b"/content/drive/MyDrive/classiff_results/elman_one_line_faluts_pred.pkl","wb")
ds1 = [[rft1, svmt1, lda1t, y_val1l, y_val1],[rfp1, svmp1, lda1p, y_pred1l, y_pred1]]
ds2 = [[rft2, svmt2, lda2t, y_val2l, y_val2],[rfp2, svmp2, lda2p, y_pred2l, y_pred2]]
ds3 = [[rft3, svmt3, lda3t, y_val3l, y_val3],[rfp3, svmp3, lda3p, y_pred3l, y_pred3]]
ds4 = [[rft4, svmt4, lda4t, y_val4l, y_val4],[rfp4, svmp4, lda4p, y_pred4l, y_pred4]]
dsopc1 = [[rfopct1, svmopct1, ldaopc1t], [rfopcp1, svmopcp1, ldaopc1p]]
dsopc2 = [[rfopct2, svmopct2, ldaopc2t], [rfopcp2, svmopcp2, ldaopc2p]]
dsopc3 = [[rfopct3, svmopct3, ldaopc3t], [rfopcp3, svmopcp3, ldaopc3p]]
dsopc4 = [[rfopct4, svmopct4, ldaopc4t], [rfopcp4, svmopcp4, ldaopc4p]]
dsopc5 = [[rfopct5, svmopct5, ldaopc5t], [rfopcp5, svmopcp5, ldaopc5p]]
dsopc6 = [[rfopct6, svmopct6, ldaopc6t], [rfopcp6, svmopcp6, ldaopc6p]]
dslc0 = [[rfcl0t, svmc0t, ldacl0t , y_val_f0], [rfcl0p, svmc0p, ldacl0p, y_pred_f0l]]
dslc1 = [[rfcl1t, svmc1t, ldacl1t, y_val_f1 ], [rfcl1p, svmc1p, ldacl1p, y_pred_f1l]]


datasets = [ds1, ds2, ds3, ds4, dslc0, dslc1]

In [None]:
#filehandler = open(b"/content/drive/MyDrive/classiff_results/elman_one_line_faluts_pred.pkl","wb")
ds1_t = [[y_test1, y_test1, y_test1, y_test1, y_test1],[rfp1_t, svmp1_t, lda1p_t, y_pred1l_t, y_pred1_t]]
ds2_t = [[y_test2, y_test2, y_test2, y_test2, y_test2],[rfp2_t, svmp2_t, lda2p_t, y_pred2l_t, y_pred2_t]]
ds3_t = [[y_test3, y_test3, y_test3, y_test3, y_test3],[rfp3_t, svmp3_t, lda3p_t, y_pred3l_t, y_pred3_t]]
ds4_t = [[y_test4, y_test4, y_test4, y_test4, y_test4],[rfp4_t, svmp4_t, lda4p_t, y_pred4l_t, y_pred4_t]]

dsopc1_t = [[rfopct1, svmopct1, ldaopc1t], [rfopcp1_t, svmsopcp1_t, ldaopc1p_t]]
dsopc2_t = [[rfopct2, svmopct2, ldaopc2t], [rfopcp2_t, svmsopcp2_t, ldaopc2p_t]]
dsopc3_t = [[rfopct3, svmopct3, ldaopc3t], [rfopcp3_t, svmsopcp3_t, ldaopc3p_t]]
dsopc4_t = [[rfopct4, svmopct4, ldaopc4t], [rfopcp4_t, svmsopcp4_t, ldaopc4p_t]]
dsopc5_t = [[rfopct5, svmopct5, ldaopc5t], [rfopcp5_t, svmsopcp5_t, ldaopc5p_t]]
dsopc6_t = [[rfopct6, svmopct6, ldaopc6t], [rfopcp6_t, svmsopcp6_t, ldaopc6p_t]]
dslc0_t = [[rfcl0t, svmc0t, ldacl0t , y_val_f0], [rfcl0p_t, svmc0p_t, ldacl0p_t, y_pred_f0l_t]]
dslc1_t = [[rfcl1t, svmc1t, ldacl1t, y_val_f1 ], [rfcl1p_t, svmc1p_t, ldacl1p_t, y_pred_f1l_t]]


datasets_t = [ds1_t, ds2_t, ds3_t, ds4_t]

In [None]:
accuracy_score(y_test1, rfp1_t)

0.73

In [None]:
filehandler = open(b"/content/drive/MyDrive/classiff_results/all_classifiers_t.pkl","wb")
pickle.dump(datasets_t,filehandler)

# STACKING

In [None]:
from utils import create_stacking_dataset

In [None]:
from utils import stacking

In [None]:
from utils import train_lda

In [None]:
from utils import train_svm

In [None]:
from utils import train_rf

In [None]:
# Train lda models for all 4 datasets with the parameters found during parameter
# tuning

cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=237)

best_model_lda1 = train_lda('lsqr', 0.01, x_train1, y_train1, cv)
best_model_lda2 = train_lda('lsqr', None, x_train2, y_train2, cv)
best_model_lda3 = train_lda('lsqr', None, x_train3, y_train3, cv)
best_model_lda4 = train_lda('svd', None, x_train4, y_train4, cv)

In [None]:
# Train svm models for all 4 datasets with the parameters found during parameter
# tuning

best_model_svm1 = train_svm(0.1, 'scale', 'rbf', x_train1, y_train1)
best_model_svm2 = train_svm(0.1, 'auto', 'rbf', x_train2, y_train2)
best_model_svm3 = train_svm(0.1, 'scale', 'rbf', x_train3, y_train3)
best_model_svm4 = train_svm(1.0, 'scale', 'rbf', x_train4, y_train4)

In [None]:
# Train random forest models for all 4 datasets with the parameters found during
# parameter tuning.

best_model_rfa1 = train_rf(20, 'sqrt', 10, 150, x_train1, y_train1)
best_model_rfa2 = train_rf(10, 'sqrt', 5, 150, x_train2, y_train2)
best_model_rfa3 = train_rf(10, 'sqrt', 5, 100, x_train3, y_train3)
best_model_rfa4 = train_rf(20, 'sqrt', 10, 150, x_train4, y_train4)

In [None]:
# Initiate elman network and gets the weights for the already trained models for
# the different datasets.

inputs = Input(shape=(30, len(sensors)))
x = SimpleRNN(256)(inputs)
x = Dense(4, activation='softmax')(x)

# dataset 1
model_elman_1 = Model(inputs, x)
model_elman_1.load_weights("/content/drive/MyDrive/classiff_results/elman_models/elman30_ds1v2.ckpt")

# dataset 2.
model_elman_2 = Model(inputs, x)
model_elman_2.load_weights("/content/drive/MyDrive/classiff_results/elman_models/elman30_ds2.ckpt")

# dataset 3.
model_elman_3 = Model(inputs, x)
model_elman_3.load_weights("/content/drive/MyDrive/classiff_results/elman_models/elman30_ds3v5.ckpt")

# dataset 4.
model_elman_4 = Model(inputs, x)
model_elman_4.load_weights("/content/drive/MyDrive/classiff_results/elman_models/elman30_ds4.ckpt")


<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x7c79b61f74c0>

## Dataset 1

In [None]:
# Creating stacked datasets for training and testing of stacking model.
stack_train1 = create_stacking_dataset(best_model_lda1, best_model_rfa1, best_model_svm1, model_elman_1, x_val1)
stack_test1 = create_stacking_dataset(best_model_lda1, best_model_rfa1, best_model_svm1, model_elman_1, x_test1)



In [None]:
# Train the stacking model and calculate accuracy.
stack_model1, y_test_stack1, y_pred_stack1 = stacking(stack_train1, y_val1, stack_test1, y_test1, n_estimators=20)

Model accuracy score : 0.7600

The scores for cross validation of gradient boosting are:
 [0.7 0.8 0.9 0.8 0.9 0.9 0.9 0.6 0.7 0.7]

The mean score of gradient boosting is: 0.79

              precision    recall  f1-score   support

           0       0.96      0.79      0.87        33
           1       0.76      0.84      0.80        56
           2       0.27      0.27      0.27        11

    accuracy                           0.76       100
   macro avg       0.66      0.63      0.65       100
weighted avg       0.77      0.76      0.76       100



## Dataset 2.

In [None]:
# Creating stacked datasets for training and testing of stacking model.
stack_train2 = create_stacking_dataset(best_model_lda2, best_model_rfa2, best_model_svm2, model_elman_2, x_val2)
stack_test2 = create_stacking_dataset(best_model_lda2, best_model_rfa2, best_model_svm2, model_elman_2, x_test2)



In [None]:
# Train the stacking model and calculate accuracy.
stack_model2, y_test_stack2, y_pred_stack2 = stacking(stack_train2, y_val2, stack_test2, y_test2, n_estimators=20)

Model accuracy score : 0.7220

The scores for cross validation of gradient boosting are:
 [0.76923077 0.69230769 0.69230769 0.84615385 0.65384615 0.69230769
 0.73076923 0.69230769 0.57692308 0.72      ]

The mean score of gradient boosting is: 0.7066153846153845

              precision    recall  f1-score   support

           0       0.90      0.85      0.88        88
           1       0.65      0.82      0.72       114
           2       0.59      0.33      0.43        57

    accuracy                           0.72       259
   macro avg       0.71      0.67      0.68       259
weighted avg       0.72      0.72      0.71       259



## Dataset 3.

In [None]:
# Creating stacked datasets for training and testing of stacking model.
stack_train3 = create_stacking_dataset(best_model_lda3, best_model_rfa3, best_model_svm3, model_elman_3, x_val3)
stack_test3 = create_stacking_dataset(best_model_lda3, best_model_rfa3, best_model_svm3, model_elman_3, x_test3)



In [None]:
# Train the stacking model and calculate accuracy.
stack_model3, y_test_stack3, y_pred_stack3 = stacking(stack_train3, y_val3, stack_test3, y_test3, n_estimators=20)

Model accuracy score : 0.6500

The scores for cross validation of gradient boosting are:
 [0.7 0.8 0.8 0.8 0.8 0.5 0.7 0.6 0.9 0.8]

The mean score of gradient boosting is: 0.74

              precision    recall  f1-score   support

         0.0       0.81      0.86      0.83        29
         1.0       0.79      0.59      0.67        56
         2.0       0.29      0.47      0.36        15
         3.0       0.00      0.00      0.00         0

    accuracy                           0.65       100
   macro avg       0.47      0.48      0.47       100
weighted avg       0.72      0.65      0.67       100



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


## Dataset 4.

In [None]:
# Creating stacked datasets for training and testing of stacking model.
stack_train4 = create_stacking_dataset(best_model_lda4, best_model_rfa4, best_model_svm4, model_elman_4, x_val4)
stack_test4 = create_stacking_dataset(best_model_lda4, best_model_rfa4, best_model_svm4, model_elman_4, x_test4)



In [None]:
# Train the stacking model and calculate accuracy.
stack_model4, y_test_stack4, y_pred_stack4 = stacking(stack_train4, y_val4, stack_test4, y_test4 , n_estimators=100)

Model accuracy score : 0.6935

The scores for cross validation of gradient boosting are:
 [0.76  0.68  0.64  0.76  0.88  0.8   0.76  0.76  0.75  0.875]

The mean score of gradient boosting is: 0.7665

              precision    recall  f1-score   support

         0.0       0.94      0.91      0.92        80
         1.0       0.73      0.67      0.70       101
         2.0       0.57      0.46      0.51        67
         3.0       0.00      0.00      0.00         0

    accuracy                           0.69       248
   macro avg       0.56      0.51      0.53       248
weighted avg       0.75      0.69      0.72       248



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
stack_test = [y_test_stack1, y_test_stack2, y_test_stack3, y_test_stack4]
stack_pred = [y_pred_stack1, y_pred_stack2, y_pred_stack3, y_pred_stack4]
filehandler = open(b"/content/drive/MyDrive/classiff_results/stacking_preds.pkl","wb")
pickle.dump((stack_test, stack_pred),filehandler)
