In [1]:
# === Configuration du chemin d'import pour modules personnalisés ===
import os
import sys
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

# === Bibliothèques standards ===
import math
import random
import time
import warnings

# === Manipulation de données ===
import numpy as np
import pandas as pd

# === Visualisation ===
import matplotlib.pyplot as plt
import seaborn as sns

# === Machine Learning & Deep Learning ===
import keras
from keras import backend as K

import tensorflow as tf
from tensorflow.keras import Sequential, optimizers
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import Dense, Dropout, LSTM, Activation, GRU
from tensorflow.keras.optimizers import Adam

import sklearn
from sklearn import preprocessing
from sklearn.decomposition import PCA
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import GroupKFold
from sklearn.preprocessing import MinMaxScaler, StandardScaler

from lime.lime_tabular import RecurrentTabularExplainer
from scipy import optimize
from tqdm import tqdm
from factor_analyzer.rotator import Rotator

# === Modules internes (utils + methods externe) ===
from utils.model_function import *
from utils.methods import *
from utils.data_prep import *
from utils.evaluator import *
from utils.SHAP import *
from utils.L2X import *

# === Affichage inline (pour Jupyter Notebook uniquement) ===
# %matplotlib inline  # Décommenter si vous êtes dans un notebook

# === Suppression des avertissements ===
warnings.filterwarnings('ignore')

# === Fixation du seed pour la reproductibilité ===
SEED = 0

def set_seed(seed=SEED):
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)

set_seed(SEED)




In [2]:
# Load data
# Load data and preprocess
train, test, y_test = prepare_data('FD004.txt')
print(train.shape, test.shape, y_test.shape)
sensor_names = ['T20','T24','T30','T50','P20','P15','P30','Nf','Nc','epr','Ps30','phi',
                    'NRf','NRc','BPR','farB','htBleed','Nf_dmd','PCNfR_dmd','W31','W32']

remaining_sensors = ['T24','T30','T50', 'P15', 'P30','Nf','Nc', 'epr','Ps30','phi',
                    'NRf','NRc','BPR', 'farB','htBleed','W31','W32']
drop_sensors = [element for element in sensor_names if element not in remaining_sensors]

rul_piecewise = 120
train['RUL'].clip(upper=rul_piecewise, inplace=True)

(61249, 27) (41214, 26) (248, 1)


In [18]:
def model_lstm_1layer(input_shape, nodes_per_layer, dropout, activation):
    
    cb = keras.callbacks.EarlyStopping(monitor='loss', patience=4)
    model = Sequential()
    model.add(LSTM(units = nodes_per_layer, activation=activation, 
                  input_shape=input_shape))
    model.add(Dropout(dropout))
    model.add(Dense(256))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error',
                  optimizer=Adam(learning_rate=0.001))
    # model.save_weights(weights_file)

    return model

In [19]:
# X_cr_train, X_cr_test= StandardScaler().fit_transform(X_train_interim[remaining_sensors]), StandardScaler().fit_transform(X_test_interim[remaining_sensors])
# pca = PCA()

# component_train , component_test = pca.fit(X_cr_train).transform(X_cr_train), pca.transform(X_cr_test)
# # print(pca.explained_variance_, np_component) # choos component which lambda >1 # kaiser

# np_component = len(pca.explained_variance_[pca.explained_variance_>1])
# print(pca.explained_variance_,'\n', "Nb components: ", np_component) # choos component which lambda >1 # kaiser
# comp = ['comp' + str(i) for i in range(1,np_component+1)]
# X_train_interim[comp],  X_test_interim[comp]= component_train[:, :np_component], component_test[:, :np_component]


In [20]:
%%time
results_pca = pd.DataFrame()
for SEED in range(5):  
    tf.random.set_seed(SEED)
    mse = []
    R2_val = []
    RMSE = []
    score_val = []
    
    # 0.20	[64]	0.3	tanh	32	25
    
    # parameter's sample
    # weights_file = "weights_file_lstm_optimalmodel_all.h5"
    alpha = 0.3
    sequence_length = 40
    epochs = 20
    nodes_per_layer = [64]
    dropout = 0.2
    activation = 'tanh'
    batch_size = 32
    remaining_sensors = remaining_sensors
    # create model
    # input_shape = (sequence_length, len(remaining_sensors))
    # model = model_lstm_1layer(input_shape, nodes_per_layer[0], dropout, activation)
    
    # Data prepration
    X_train_interim, X_test_interim = prep_data(train, test, drop_sensors, remaining_sensors, alpha)
    
    #PCA data reduction 
    X_cr_train, X_cr_test= StandardScaler().fit_transform(X_train_interim[remaining_sensors]), StandardScaler().fit_transform(X_test_interim[remaining_sensors])
    pca = PCA()
    component_train , component_test = pca.fit(X_cr_train).transform(X_cr_train), pca.transform(X_cr_test)
    # print(pca.explained_variance_, np_component) # choos component which lambda >1 # kaiser

    np_component = len(pca.explained_variance_[pca.explained_variance_>1])
    # np_component = 4
    print(pca.explained_variance_,'\n', "Nb components: ", np_component) # choos component which lambda >1 # kaiser
    comp = ['comp' + str(i) for i in range(1,np_component+1)]
    X_train_interim[comp],  X_test_interim[comp]= component_train[:, :np_component], component_test[:, :np_component]
    
    #rotation 
    rotator = Rotator(method='varimax')
    X_train_interim[comp] = rotator.fit_transform(X_train_interim[comp])
    X_test_interim[comp] = rotator.fit_transform(X_test_interim[comp])
    print(X_train_interim[comp].head())
    print(X_test_interim[comp].head())
    # print(X_train_interim[comp].shape, X_test_interim[comp].shape)

    # create sequences train, test
    train_array = gen_data_wrapper(X_train_interim, sequence_length, comp)
    label_array = gen_label_wrapper(X_train_interim, sequence_length, ['RUL'])

    test_gen = (list(gen_test_data(X_test_interim[X_test_interim['Unit']==unit_nr], sequence_length, comp, -99.))
               for unit_nr in X_test_interim['Unit'].unique())
    
    test_array = np.concatenate(list(test_gen)).astype(np.float32)
    test_rul = rul_piecewise_fct(y_test,rul_piecewise)
    
    input_shape = (sequence_length, len(comp))
    model = model_lstm_1layer(input_shape, nodes_per_layer[0], dropout, activation)
    print(train_array.shape, label_array.shape, test_array.shape)
            
    # Model fitting
    cb = EarlyStopping(monitor='val_loss', patience=4, restore_best_weights=True)
    with tf.device('/device:GPU:0'):
        start_time = time.time()
        model.compile(loss='mean_squared_error', optimizer=Adam(learning_rate=0.001))
        weights_file = model.get_weights()
        model.set_weights(weights_file)  # reset optimizer and node weights before every training iteration
        history = model.fit(train_array, label_array,
                                validation_data=(test_array, test_rul),
                                epochs=epochs,
                                batch_size=batch_size,
                                callbacks=[cb],
                                verbose=1)
        end_time = time.time()
        training_time = end_time - start_time
        mse.append(history.history['val_loss'][-1])

        y_hat_val_split = model.predict(test_array)
        R2_val.append(r2_score(test_rul, y_hat_val_split))
        RMSE.append(np.sqrt(mean_squared_error(test_rul, y_hat_val_split)))
        score_val.append(compute_s_score(test_rul, y_hat_val_split))
            
        
    #  append results
    d = {'RMSE' :np.mean(RMSE), 'std_RMSE' :np.std(RMSE),
         'S_score' :np.mean(score_val), 'std_S_score' :np.std(score_val),
         'MSE':np.mean(mse), 'std_MSE':np.std(mse),
         'nodes':str(nodes_per_layer), 'dropout':dropout, 
         'activation':activation, 'batch_size':batch_size, 'TW' : sequence_length,
         'time': training_time}

#     results = results.append(pd.DataFrame(d, index=[0]), ignore_index=True)
    results_pca = pd.concat([results_pca, pd.DataFrame(d, index=[0])], ignore_index=True)
    results_pca.to_csv('../results/pca/fd004_varimax.csv')

[7.95914066e+00 5.20347330e+00 1.05754225e+00 9.73443909e-01
 6.01841369e-01 3.89076541e-01 1.95285565e-01 1.42521636e-01
 1.18388892e-01 9.96185263e-02 9.54720058e-02 7.55025638e-02
 3.73317439e-02 2.33308417e-02 1.38792003e-02 9.96708843e-03
 4.46146340e-03] 
 Nb components:  3
      comp1     comp2     comp3
0 -3.804325  0.171658 -0.238337
1 -5.588574  0.488095  0.626100
2 -5.185448  0.229021  0.376971
3 -4.784509 -0.302863  0.436564
4 -4.834911 -0.288770  0.698135
      comp1     comp2     comp3
0 -7.033486 -0.269317  2.006290
1 -5.642710 -1.017398  2.032851
2 -5.716409 -1.393880  1.273379
3 -5.905808 -1.307582  0.688476
4 -5.688590 -1.080781  1.366046
(51538, 40, 3) (51538, 1) (248, 40, 3)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
[7.95914066e+00 5.20347330e+00 1.05754225e+00 9.73443909e-01
 6.01841369e-01 3.89076541e-01 1.95285565e-01 1.42521636e-01
 1.

In [21]:
results_pca

Unnamed: 0,RMSE,std_RMSE,S_score,std_S_score,MSE,std_MSE,nodes,dropout,activation,batch_size,TW,time
0,26.317962,0.0,6822.032115,0.0,758.647827,0.0,[64],0.2,tanh,32,40,416.924613
1,33.505556,0.0,15303.876841,0.0,1522.657227,0.0,[64],0.2,tanh,32,40,168.295775
2,34.532893,0.0,19392.498365,0.0,1242.661865,0.0,[64],0.2,tanh,32,40,189.506618
3,29.425937,0.0,7581.310541,0.0,865.885803,0.0,[64],0.2,tanh,32,40,336.332053
4,28.739241,0.0,7751.887639,0.0,1042.337036,0.0,[64],0.2,tanh,32,40,287.685031


### FD002

In [22]:
# Load data
# Load data and preprocess
train, test, y_test = prepare_data('FD002.txt')
print(train.shape, test.shape, y_test.shape)
sensor_names = ['T20','T24','T30','T50','P20','P15','P30','Nf','Nc','epr','Ps30','phi',
                    'NRf','NRc','BPR','farB','htBleed','Nf_dmd','PCNfR_dmd','W31','W32']

remaining_sensors = ['T24','T30','T50', 'P15', 'P30','Nf','Nc', 'epr','Ps30','phi',
                    'NRf','NRc','BPR','htBleed','W31','W32']
drop_sensors = [element for element in sensor_names if element not in remaining_sensors]

rul_piecewise = 125
train['RUL'].clip(upper=rul_piecewise, inplace=True)

(53759, 27) (33991, 26) (259, 1)


In [23]:
%%time
results_pca002 = pd.DataFrame()
for SEED in range(5):  
    tf.random.set_seed(SEED)
    mse = []
    R2_val = []
    RMSE = []
    score_val = []
    
    # 0.20	[64]	0.3	tanh	32	25
    
    # parameter's sample
    # weights_file = "weights_file_lstm_optimalmodel_all.h5"
    alpha = 0.2
    sequence_length = 40
    epochs = 20
    nodes_per_layer = [32]
    dropout = 0.1
    activation = 'tanh'
    batch_size = 128
    remaining_sensors = remaining_sensors
    # create model
    input_shape = (sequence_length, len(remaining_sensors))
    model = model_lstm_1layer(input_shape, nodes_per_layer[0], dropout, activation)
    
    # Data prepration
    X_train_interim, X_test_interim = prep_data(train, test, drop_sensors, remaining_sensors, alpha)
    
    #PCA data reduction 
    X_cr_train, X_cr_test= StandardScaler().fit_transform(X_train_interim[remaining_sensors]), StandardScaler().fit_transform(X_test_interim[remaining_sensors])
    pca = PCA()
    component_train , component_test = pca.fit(X_cr_train).transform(X_cr_train), pca.transform(X_cr_test)
    # print(pca.explained_variance_, np_component) # choos component which lambda >1 # kaiser

    np_component = len(pca.explained_variance_[pca.explained_variance_>1])
    print(pca.explained_variance_,'\n', "Nb components: ", np_component) # choos component which lambda >1 # kaiser
    comp = ['comp' + str(i) for i in range(1,np_component+1)]
    X_train_interim[comp],  X_test_interim[comp]= component_train[:, :np_component], component_test[:, :np_component]
    
    # Rotate
    rotator = Rotator(method='varimax')
    X_train_interim[comp] = rotator.fit_transform(X_train_interim[comp])
    X_test_interim[comp] = rotator.fit_transform(X_test_interim[comp])
    print(X_train_interim[comp].head())
    print(X_test_interim[comp].head())

    # create sequences train, test
    train_array = gen_data_wrapper(X_train_interim, sequence_length, comp)
    label_array = gen_label_wrapper(X_train_interim, sequence_length, ['RUL'])

    test_gen = (list(gen_test_data(X_test_interim[X_test_interim['Unit']==unit_nr], sequence_length,comp, -99.))
               for unit_nr in X_test_interim['Unit'].unique())
    
    test_array = np.concatenate(list(test_gen)).astype(np.float32)
    test_rul = rul_piecewise_fct(y_test,rul_piecewise)
    
    input_shape = (sequence_length, len(comp))
    model = model_lstm_1layer(input_shape, nodes_per_layer[0], dropout, activation)
    print(train_array.shape, label_array.shape, test_array.shape)
            
    # Model fitting
    cb = EarlyStopping(monitor='val_loss', patience=4, restore_best_weights=True)
    with tf.device('/device:GPU:0'):
        start_time = time.time()
        model.compile(loss='mean_squared_error', optimizer=Adam(learning_rate=0.001))
        weights_file = model.get_weights()
        model.set_weights(weights_file)  # reset optimizer and node weights before every training iteration
        history = model.fit(train_array, label_array,
                                validation_data=(test_array, test_rul),
                                epochs=epochs,
                                batch_size=batch_size,
                                callbacks=[cb],
                                verbose=1)
        end_time = time.time()
        training_time = end_time - start_time
        mse.append(history.history['val_loss'][-1])

        y_hat_val_split = model.predict(test_array)
        R2_val.append(r2_score(test_rul, y_hat_val_split))
        RMSE.append(np.sqrt(mean_squared_error(test_rul, y_hat_val_split)))
        score_val.append(compute_s_score(test_rul, y_hat_val_split))
            
        
    #  append results
    d = {'RMSE' :np.mean(RMSE), 'std_RMSE' :np.std(RMSE),
         'S_score' :np.mean(score_val), 'std_S_score' :np.std(score_val),
         'MSE':np.mean(mse), 'std_MSE':np.std(mse),
         'nodes':str(nodes_per_layer), 'dropout':dropout, 
         'activation':activation, 'batch_size':batch_size, 'TW' : sequence_length,
         'time': training_time}

#     results = results.append(pd.DataFrame(d, index=[0]), ignore_index=True)
    results_pca002 = pd.concat([results_pca002, pd.DataFrame(d, index=[0])], ignore_index=True)
    results_pca002.to_csv('../results/pca/fd002_varimax.csv')

[1.04271508e+01 2.66149542e+00 1.05555422e+00 4.67459514e-01
 2.81050559e-01 2.11386654e-01 1.92544371e-01 1.64287263e-01
 1.32272442e-01 1.16578816e-01 1.04679947e-01 7.46965656e-02
 4.85539287e-02 3.45854321e-02 1.97697647e-02 8.23190831e-03] 
 Nb components:  3
      comp1     comp2     comp3
0  0.237497 -4.288898 -1.123881
1  0.866028 -3.819474 -1.219568
2  0.635171 -3.323800  0.780730
3 -0.275638 -3.436208 -0.208620
4  0.473778 -3.251752  0.864355
      comp1     comp2     comp3
0 -0.010903  0.632199  3.488154
1 -1.137766 -1.016830  2.856822
2 -1.142163 -2.675207  1.094389
3 -0.000756 -3.208743  0.308315
4 -0.741858 -3.012953  1.068638
(43619, 40, 3) (43619, 1) (259, 40, 3)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
[1.04271508e+01 2.66149542e+00 1.05555422e+00 4.67459514e-01
 2.81050559e-01 2.11386654e-01 1.92544371e-01 1.64287263e-01
 1.32272442e-01 1.16578816e-01 1.04679947e-01 7.46965656e-02
 4.85539287e-02 3.45854321e-02 1.97697647e-02 8.2319

In [24]:
results_pca002

Unnamed: 0,RMSE,std_RMSE,S_score,std_S_score,MSE,std_MSE,nodes,dropout,activation,batch_size,TW,time
0,27.995586,0.0,4261.021336,0.0,924.156982,0.0,[32],0.1,tanh,128,40,40.749763
1,26.467134,0.0,3526.288108,0.0,739.944763,0.0,[32],0.1,tanh,128,40,28.815168
2,26.665649,0.0,3993.771572,0.0,756.697449,0.0,[32],0.1,tanh,128,40,31.342091
3,26.162858,0.0,3398.607448,0.0,707.411072,0.0,[32],0.1,tanh,128,40,61.218324
4,26.329413,0.0,4707.650054,0.0,693.237976,0.0,[32],0.1,tanh,128,40,92.156073


### FD003

In [25]:
# Load data
# Load data and preprocess
train, test, y_test = prepare_data('FD003.txt')
print(train.shape, test.shape, y_test.shape)
sensor_names = ['T20','T24','T30','T50','P20','P15','P30','Nf','Nc','epr','Ps30','phi',
                    'NRf','NRc','BPR','farB','htBleed','Nf_dmd','PCNfR_dmd','W31','W32']

remaining_sensors = ['T24','T30','T50', 'P15', 'P30','Nf','Nc', 'epr','Ps30','phi',
                    'NRf','NRc','BPR','htBleed','W31','W32']
drop_sensors = [element for element in sensor_names if element not in remaining_sensors]

rul_piecewise = 125
train['RUL'].clip(upper=rul_piecewise, inplace=True)

(24720, 27) (16596, 26) (100, 1)


In [26]:
%%time
results_pca003 = pd.DataFrame()
for SEED in range(5):  
    tf.random.set_seed(SEED)
    mse = []
    R2_val = []
    RMSE = []
    score_val = []
    
    # 0.20	[64]	0.3	tanh	32	25
    
    # parameter's sample
    # weights_file = "weights_file_lstm_optimalmodel_all.h5"
    alpha = 0.1
    sequence_length = 35
    epochs = 20
    nodes_per_layer = [64]
    dropout = 0.2
    activation = 'tanh'
    batch_size = 32
    remaining_sensors = remaining_sensors
    
    # Data prepration
    X_train_interim, X_test_interim = prep_data(train, test, drop_sensors, remaining_sensors, alpha)
    
    #PCA data reduction 
    X_cr_train, X_cr_test= StandardScaler().fit_transform(X_train_interim[remaining_sensors]), StandardScaler().fit_transform(X_test_interim[remaining_sensors])
    pca = PCA()
    component_train , component_test = pca.fit(X_cr_train).transform(X_cr_train), pca.transform(X_cr_test)
    # print(pca.explained_variance_, np_component) # choos component which lambda >1 # kaiser

    np_component = len(pca.explained_variance_[pca.explained_variance_>1])
    print(pca.explained_variance_,'\n', "Nb components: ", np_component) # choos component which lambda >1 # kaiser
    comp = ['comp' + str(i) for i in range(1,np_component+1)]
    X_train_interim[comp],  X_test_interim[comp]= component_train[:, :np_component], component_test[:, :np_component]
    
    # Rotate
    rotator = Rotator(method='varimax')
    X_train_interim[comp] = rotator.fit_transform(X_train_interim[comp])
    X_test_interim[comp] = rotator.fit_transform(X_test_interim[comp])
    print(X_train_interim[comp].head())
    print(X_test_interim[comp].head())

    # create sequences train, test
    train_array = gen_data_wrapper(X_train_interim, sequence_length, comp)
    label_array = gen_label_wrapper(X_train_interim, sequence_length, ['RUL'])

    test_gen = (list(gen_test_data(X_test_interim[X_test_interim['Unit']==unit_nr], sequence_length,comp, -99.))
               for unit_nr in X_test_interim['Unit'].unique())
    
    test_array = np.concatenate(list(test_gen)).astype(np.float32)
    test_rul = rul_piecewise_fct(y_test,rul_piecewise)
    
    input_shape = (sequence_length, len(comp))
    model = model_lstm_1layer(input_shape, nodes_per_layer[0], dropout, activation)
    print(train_array.shape, label_array.shape, test_array.shape)
            
    # Model fitting
    cb = EarlyStopping(monitor='val_loss', patience=4, restore_best_weights=True)
    with tf.device('/device:GPU:0'):
        start_time = time.time()
        model.compile(loss='mean_squared_error', optimizer=Adam(learning_rate=0.001))
        weights_file = model.get_weights()
        model.set_weights(weights_file)  # reset optimizer and node weights before every training iteration
        history = model.fit(train_array, label_array,
                                validation_data=(test_array, test_rul),
                                epochs=epochs,
                                batch_size=batch_size,
                                callbacks=[cb],
                                verbose=1)
        end_time = time.time()
        training_time = end_time - start_time
        mse.append(history.history['val_loss'][-1])

        y_hat_val_split = model.predict(test_array)
        R2_val.append(r2_score(test_rul, y_hat_val_split))
        RMSE.append(np.sqrt(mean_squared_error(test_rul, y_hat_val_split)))
        score_val.append(compute_s_score(test_rul, y_hat_val_split))
            
        
    #  append results
    d = {'RMSE' :np.mean(RMSE), 'std_RMSE' :np.std(RMSE),
         'S_score' :np.mean(score_val), 'std_S_score' :np.std(score_val),
         'MSE':np.mean(mse), 'std_MSE':np.std(mse),
         'nodes':str(nodes_per_layer), 'dropout':dropout, 
         'activation':activation, 'batch_size':batch_size, 'TW' : sequence_length,
         'time': training_time}

#     results = results.append(pd.DataFrame(d, index=[0]), ignore_index=True)
    results_pca003 = pd.concat([results_pca003, pd.DataFrame(d, index=[0])], ignore_index=True)
    results_pca003.to_csv('../results/pca/fd003_varimax.csv')

[8.19095667e+00 5.49399656e+00 8.81347307e-01 7.81564822e-01
 3.51238057e-01 1.43235420e-01 4.09913921e-02 3.53324081e-02
 2.44339253e-02 1.75337353e-02 1.33853528e-02 1.23424994e-02
 6.63828566e-03 3.52672765e-03 3.10180225e-03 1.02230366e-03] 
 Nb components:  2
      comp1     comp2
0 -1.538312 -0.544704
1 -0.930198 -0.333238
2 -0.870208 -0.240347
3 -0.788517 -0.178757
4 -0.909442 -0.304579
      comp1     comp2
0 -0.623506 -2.289400
1 -0.450795 -1.901779
2 -0.334288 -2.275177
3 -0.622983 -1.845581
4 -0.643156 -1.711918
(21320, 35, 2) (21320, 1) (100, 35, 2)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
[8.19095667e+00 5.49399656e+00 8.81347307e-01 7.81564822e-01
 3.51238057e-01 1.43235420e-01 4.09913921e-02 3.53324081e-02
 2.44339253e-02 1.75337353e-02 1.33853528e-02 1.23424994e-02
 6.63828566e-03 3.52672765e-03 3.10180225e-03 1.02230366e-03] 
 Nb components:  2
      comp1     comp2
0 -1.538312 -0.544704
1 -0.930198 -0.333238
2 -0.870208 -0.240347
3 

In [27]:
results_pca003

Unnamed: 0,RMSE,std_RMSE,S_score,std_S_score,MSE,std_MSE,nodes,dropout,activation,batch_size,TW,time
0,27.961714,0.0,11933.343361,0.0,1117.577759,0.0,[64],0.2,tanh,32,35,50.961694
1,24.26013,0.0,5653.835713,0.0,791.407654,0.0,[64],0.2,tanh,32,35,86.691065
2,24.916652,0.0,3737.031015,0.0,701.431824,0.0,[64],0.2,tanh,32,35,63.303207
3,22.380443,0.0,1912.213083,0.0,507.229248,0.0,[64],0.2,tanh,32,35,76.991445
4,21.932046,0.0,2638.054409,0.0,575.747681,0.0,[64],0.2,tanh,32,35,75.760082


### FD001

In [28]:
# Load data
# Load data and preprocess
train, test, y_test = prepare_data('FD001.txt')
print(train.shape, test.shape, y_test.shape)
sensor_names = ['T20','T24','T30','T50','P20','P15','P30','Nf','Nc','epr','Ps30','phi',
                    'NRf','NRc','BPR','farB','htBleed','Nf_dmd','PCNfR_dmd','W31','W32']
remaining_sensors = ['T24','T30','T50', 'P15', 'P30','Nf','Nc','Ps30','phi',
                    'NRf','NRc','BPR','htBleed','W31','W32']
drop_sensors = [element for element in sensor_names if element not in remaining_sensors]

rul_piecewise = 125
train['RUL'].clip(upper=rul_piecewise, inplace=True)

(20631, 27) (13096, 26) (100, 1)


In [29]:
%%time
results_pca001 = pd.DataFrame()
for SEED in range(5):  
    set_seed(SEED)
    mse = []
    R2_val = []
    RMSE = []
    score_val = []
    
    # 0.20	[64]	0.3	tanh	32	25
    
    # parameter's sample
    # weights_file = "weights_file_lstm_optimalmodel_all.h5"
    alpha = 0.1
    sequence_length = 30
    epochs = 20
    nodes_per_layer = [128]
    dropout = 0.2
    activation = 'tanh'
    batch_size = 64
    remaining_sensors = remaining_sensors    
    # Data prepration
    X_train_interim, X_test_interim = prep_data(train, test, drop_sensors, remaining_sensors, alpha)
    
    #PCA data reduction 
    X_cr_train, X_cr_test= StandardScaler().fit_transform(X_train_interim[remaining_sensors]), StandardScaler().fit_transform(X_test_interim[remaining_sensors])
    pca = PCA()
    component_train , component_test = pca.fit(X_cr_train).transform(X_cr_train), pca.transform(X_cr_test)
    # print(pca.explained_variance_, np_component) # choos component which lambda >1 # kaiser

    np_component = len(pca.explained_variance_[pca.explained_variance_>1])
    print(pca.explained_variance_,'\n', "Nb components: ", np_component) # choos component which lambda >1 # kaiser
    comp = ['comp' + str(i) for i in range(1,np_component+1)]
    X_train_interim[comp],  X_test_interim[comp]= component_train[:, :np_component], component_test[:, :np_component]
    
    # Rotate
    rotator = Rotator(method='varimax')
    X_train_interim[comp] = rotator.fit_transform(X_train_interim[comp])
    X_test_interim[comp] = rotator.fit_transform(X_test_interim[comp])
    print(X_train_interim[comp].head())
    print(X_test_interim[comp].head())

    # create sequences train, test
    train_array = gen_data_wrapper(X_train_interim, sequence_length, comp)
    label_array = gen_label_wrapper(X_train_interim, sequence_length, ['RUL'])

    test_gen = (list(gen_test_data(X_test_interim[X_test_interim['Unit']==unit_nr], sequence_length,comp, -99.))
               for unit_nr in X_test_interim['Unit'].unique())
    
    test_array = np.concatenate(list(test_gen)).astype(np.float32)
    test_rul = rul_piecewise_fct(y_test,rul_piecewise)
    
    input_shape = (sequence_length, len(comp))
    model = model_lstm_1layer(input_shape, nodes_per_layer[0], dropout, activation)
    print(train_array.shape, label_array.shape, test_array.shape)
            
    # Model fitting
    cb = EarlyStopping(monitor='val_loss', patience=4, restore_best_weights=True)
    with tf.device('/device:GPU:0'):
        start_time = time.time()
        model.compile(loss='mean_squared_error', optimizer=Adam(learning_rate=0.001))
        weights_file = model.get_weights()
        model.set_weights(weights_file)  # reset optimizer and node weights before every training iteration
        history = model.fit(train_array, label_array,
                                validation_data=(test_array, test_rul),
                                epochs=epochs,
                                batch_size=batch_size,
                                callbacks=[cb],
                                verbose=1)
        end_time = time.time()
        training_time = end_time - start_time
        mse.append(history.history['val_loss'][-1])

        y_hat_val_split = model.predict(test_array)
        R2_val.append(r2_score(test_rul, y_hat_val_split))
        RMSE.append(np.sqrt(mean_squared_error(test_rul, y_hat_val_split)))
        score_val.append(compute_s_score(test_rul, y_hat_val_split))
            
        
    #  append results
    d = {'RMSE' :np.mean(RMSE), 'std_RMSE' :np.std(RMSE),
         'S_score' :np.mean(score_val), 'std_S_score' :np.std(score_val),
         'MSE':np.mean(mse), 'std_MSE':np.std(mse),
         'nodes':str(nodes_per_layer), 'dropout':dropout, 
         'activation':activation, 'batch_size':batch_size, 'TW' : sequence_length,
         'time': training_time}

#     results = results.append(pd.DataFrame(d, index=[0]), ignore_index=True)
    results_pca001 = pd.concat([results_pca001, pd.DataFrame(d, index=[0])], ignore_index=True)
    results_pca001.to_csv('../results/pca/fd001_varimax.csv')

[1.16218316e+01 2.22745151e+00 7.53919107e-01 7.60689125e-02
 5.80733626e-02 4.69453854e-02 4.15078334e-02 3.47383080e-02
 3.32981565e-02 2.89969632e-02 2.53785317e-02 1.84836056e-02
 1.69423744e-02 1.35008525e-02 3.59058504e-03] 
 Nb components:  2
      comp1     comp2
0 -3.350030 -0.498183
1 -2.765970 -0.864560
2 -2.981935 -0.884688
3 -3.234127 -0.924249
4 -3.089479 -0.789932
      comp1     comp2
0 -1.585337 -0.748141
1 -2.355410  0.391989
2 -1.888595  0.232093
3 -2.040020  0.005877
4 -2.349974 -0.057874
(17731, 30, 2) (17731, 1) (100, 30, 2)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
[1.16218316e+01 2.22745151e+00 7.53919107e-01 7.60689125e-02
 5.80733626e-02 4.69453854e-02 4.15078334e-02 3.47383080e-02
 3.32981565e-02 2.89969632e-02 2.53785317e-02 1.84836056e-02
 1.69423744e-02 1.35008525e-02 3.59058504e-03] 
 Nb components:  2
      comp1     comp2
0 -3.350030 -0.498183
1 -2.765970 -0.864560
2 -2.981935 -0.884688
3 -3.234127 -0.924249
4 -3.08947

In [30]:
results_pca001

Unnamed: 0,RMSE,std_RMSE,S_score,std_S_score,MSE,std_MSE,nodes,dropout,activation,batch_size,TW,time
0,23.156052,0.0,1257.22845,0.0,589.259583,0.0,[128],0.2,tanh,64,30,44.204294
1,21.387756,0.0,1317.097333,0.0,568.977295,0.0,[128],0.2,tanh,64,30,50.187534
2,21.904547,0.0,1207.393286,0.0,685.709229,0.0,[128],0.2,tanh,64,30,55.04504
3,25.340424,0.0,1613.314798,0.0,740.722168,0.0,[128],0.2,tanh,64,30,113.445598
4,24.464262,0.0,1312.354064,0.0,671.235413,0.0,[128],0.2,tanh,64,30,44.935172
