In [1]:
from __future__ import print_function
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import keras
import warnings
import tensorflow as tf
import seaborn as sns
import sklearn
import random
import math
import time


from lime.lime_tabular import RecurrentTabularExplainer
from tqdm import tqdm
from tqdm import tqdm
from sklearn.metrics import mean_squared_error, r2_score 
from sklearn.model_selection import GroupKFold
from sklearn import preprocessing
from keras import backend as K
from sklearn.preprocessing import MinMaxScaler , StandardScaler
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, Activation, GRU
from scipy import optimize
from tensorflow.keras import optimizers
from sklearn.decomposition import PCA
from tensorflow.keras.callbacks import EarlyStopping

from sp_modif.model_function import *
from sp_modif.methods import *
from sp_modif.data_prep import *
from sp_modif.evaluator import *
from sp_modif.SHAP import *
from sp_modif.L2X import *
from methods import *

%matplotlib inline
warnings.filterwarnings('ignore')

SEED = 0
def set_seed(seed=SEED):
    os.environ['PYTHONHASHSEED'] = str(SEED)
    random.seed(SEED)
    np.random.seed(SEED)
    tf.random.set_seed(SEED)

# Appeler la fonction pour fixer le seed
set_seed(SEED)




In [2]:
# Load data
# Load data and preprocess
train, test, y_test = prepare_data('FD004.txt')
print(train.shape, test.shape, y_test.shape)
sensor_names = ['T20','T24','T30','T50','P20','P15','P30','Nf','Nc','epr','Ps30','phi',
                    'NRf','NRc','BPR','farB','htBleed','Nf_dmd','PCNfR_dmd','W31','W32']

remaining_sensors = ['T24','T30','T50', 'P15', 'P30','Nf','Nc', 'epr','Ps30','phi',
                    'NRf','NRc','BPR', 'farB','htBleed','W31','W32']
drop_sensors = [element for element in sensor_names if element not in remaining_sensors]

rul_piecewise = 120
train['RUL'].clip(upper=rul_piecewise, inplace=True)

(61249, 27) (41214, 26) (248, 1)


In [3]:
np_component_fd001 = 3
np_component_fd002 = 3
np_component_fd003 = 3
np_component_fd004 = 3

In [4]:
def model_lstm_1layer(input_shape, nodes_per_layer, dropout, activation):
    
    cb = keras.callbacks.EarlyStopping(monitor='loss', patience=4)
    model = Sequential()
    model.add(LSTM(units = nodes_per_layer, activation=activation, 
                  input_shape=input_shape))
    model.add(Dropout(dropout))
    model.add(Dense(256))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error',
                  optimizer=Adam(learning_rate=0.001))
    # model.save_weights(weights_file)

    return model

In [22]:
%%time
results_pca = pd.DataFrame()
for SEED in range(5):  
    tf.random.set_seed(SEED)
    mse = []
    R2_val = []
    RMSE = []
    score_val = []
    
    # 0.20	[64]	0.3	tanh	32	25
    
    # parameter's sample
    # weights_file = "weights_file_lstm_optimalmodel_all.h5"
    alpha = 0.3
    sequence_length = 40
    epochs = 20
    nodes_per_layer = [64]
    dropout = 0.2
    activation = 'tanh'
    batch_size = 32
    remaining_sensors = remaining_sensors
    # create model
    # input_shape = (sequence_length, len(remaining_sensors))
    # model = model_lstm_1layer(input_shape, nodes_per_layer[0], dropout, activation)
    
    # Data prepration
    X_train_interim, X_test_interim = prep_data(train, test, drop_sensors, remaining_sensors, alpha)
    
    #PCA data reduction 
    X_cr_train, X_cr_test= StandardScaler().fit_transform(X_train_interim[remaining_sensors]), StandardScaler().fit_transform(X_test_interim[remaining_sensors])
    pca = PCA()
    component_train , component_test = pca.fit(X_cr_train).transform(X_cr_train), pca.transform(X_cr_test)
    # print(pca.explained_variance_, np_component) # choos component which lambda >1 # kaiser

    # np_component = len(pca.explained_variance_[pca.explained_variance_>1])
    np_component = np_component_fd001
    print(pca.explained_variance_,'\n', "Nb components: ", np_component) # choos component which lambda >1 # kaiser
    comp = ['comp' + str(i) for i in range(1,np_component+1)]
    X_train_interim[comp],  X_test_interim[comp]= component_train[:, :np_component], component_test[:, :np_component]
    

    # create sequences train, test
    train_array = gen_data_wrapper(X_train_interim, sequence_length, comp)
    label_array = gen_label_wrapper(X_train_interim, sequence_length, ['RUL'])

    test_gen = (list(gen_test_data(X_test_interim[X_test_interim['Unit']==unit_nr], sequence_length,comp, -99.))
               for unit_nr in X_test_interim['Unit'].unique())
    
    test_array = np.concatenate(list(test_gen)).astype(np.float32)
    test_rul = rul_piecewise_fct(y_test,rul_piecewise)
    
    input_shape = (sequence_length, len(comp))
    model = model_lstm_1layer(input_shape, nodes_per_layer[0], dropout, activation)
    print(train_array.shape, label_array.shape, test_array.shape)
            
    # Model fitting
    cb = EarlyStopping(monitor='val_loss', patience=4, restore_best_weights=True)
    with tf.device('/device:GPU:0'):
        start_time = time.time()
        model.compile(loss='mean_squared_error', optimizer=Adam(learning_rate=0.001))
        weights_file = model.get_weights()
        model.set_weights(weights_file)  # reset optimizer and node weights before every training iteration
        history = model.fit(train_array, label_array,
                                validation_data=(test_array, test_rul),
                                epochs=epochs,
                                batch_size=batch_size,
                                callbacks=[cb],
                                verbose=1)
        end_time = time.time()
        training_time = end_time - start_time
        mse.append(history.history['val_loss'][-1])

        y_hat_val_split = model.predict(test_array)
        R2_val.append(r2_score(test_rul, y_hat_val_split))
        RMSE.append(np.sqrt(mean_squared_error(test_rul, y_hat_val_split)))
        score_val.append(compute_s_score(test_rul, y_hat_val_split))
            
        
    #  append results
    d = {'RMSE' :np.mean(RMSE), 'std_RMSE' :np.std(RMSE),
         'S_score' :np.mean(score_val), 'std_S_score' :np.std(score_val),
         'MSE':np.mean(mse), 'std_MSE':np.std(mse),
         'nodes':str(nodes_per_layer), 'dropout':dropout, 
         'activation':activation, 'batch_size':batch_size, 'TW' : sequence_length,
         'time': training_time}

#     results = results.append(pd.DataFrame(d, index=[0]), ignore_index=True)
    results_pca = pd.concat([results_pca, pd.DataFrame(d, index=[0])], ignore_index=True)
    results_pca.to_csv('results/pca/fd004_nbcomp.csv')

[7.95914066e+00 5.20347330e+00 1.05754225e+00 9.73443909e-01
 6.01841369e-01 3.89076541e-01 1.95285565e-01 1.42521636e-01
 1.18388892e-01 9.96185263e-02 9.54720058e-02 7.55025638e-02
 3.73317439e-02 2.33308417e-02 1.38792003e-02 9.96708843e-03
 4.46146340e-03] 
 Nb components:  3
(51538, 40, 3) (51538, 1) (248, 40, 3)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
[7.95914066e+00 5.20347330e+00 1.05754225e+00 9.73443909e-01
 6.01841369e-01 3.89076541e-01 1.95285565e-01 1.42521636e-01
 1.18388892e-01 9.96185263e-02 9.54720058e-02 7.55025638e-02
 3.73317439e-02 2.33308417e-02 1.38792003e-02 9.96708843e-03
 4.46146340e-03] 
 Nb components:  3
(51538, 40, 3) (51538, 1) (248, 40, 3)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
[7.95914066e+00 5.20347330e+00 1.05754225e+00 9.73443909e-01
 6.01841369e-01 3.89076541e-01 1.95285565e-01 1.42521636e-01
 1.18388892e-01 9.96185263e-02 9.54720058e-02 7.55025638e-02
 3.73317439e-02 2.33308417e-02 1.38792003e-02 9.96708843e-03
 4.461

In [12]:
explained_variance = np.array([
 7.9591, 5.2035, 1.0575, 0.9734, 0.6018, 0.3891, 0.1953, 0.1425,
 0.1184, 0.0996, 0.0955, 0.0755, 0.0373, 0.0233, 0.0139, 0.0100, 0.0045
])

# Pourcentage de variance expliquée par composante
explained_variance_ratio = explained_variance / explained_variance.sum() * 100

# Variance cumulée
cumulative_variance = np.cumsum(explained_variance_ratio)
cumulative_variance.round(2)

array([ 46.82,  77.43,  83.65,  89.37,  92.91,  95.2 ,  96.35,  97.19,
        97.88,  98.47,  99.03,  99.48,  99.7 ,  99.83,  99.91,  99.97,
       100.  ])

In [23]:
results_pca

Unnamed: 0,RMSE,std_RMSE,S_score,std_S_score,MSE,std_MSE,nodes,dropout,activation,batch_size,TW,time
0,34.67167,0.0,37744.626651,0.0,1313.072632,0.0,[64],0.2,tanh,32,40,55.423713
1,31.914653,0.0,10807.580388,0.0,1222.218018,0.0,[64],0.2,tanh,32,40,54.111316
2,29.960175,0.0,8044.491358,0.0,1059.063232,0.0,[64],0.2,tanh,32,40,115.157727
3,31.939575,0.0,12140.526244,0.0,1024.887085,0.0,[64],0.2,tanh,32,40,107.83211
4,31.529178,0.0,16230.094263,0.0,1176.055786,0.0,[64],0.2,tanh,32,40,88.713208


### FD002

In [24]:
# Load data
# Load data and preprocess
train, test, y_test = prepare_data('FD002.txt')
print(train.shape, test.shape, y_test.shape)
sensor_names = ['T20','T24','T30','T50','P20','P15','P30','Nf','Nc','epr','Ps30','phi',
                    'NRf','NRc','BPR','farB','htBleed','Nf_dmd','PCNfR_dmd','W31','W32']

remaining_sensors = ['T24','T30','T50', 'P15', 'P30','Nf','Nc', 'epr','Ps30','phi',
                    'NRf','NRc','BPR','htBleed','W31','W32']
drop_sensors = [element for element in sensor_names if element not in remaining_sensors]

rul_piecewise = 125
train['RUL'].clip(upper=rul_piecewise, inplace=True)

(53759, 27) (33991, 26) (259, 1)


In [25]:
%%time
results_pca002 = pd.DataFrame()
for SEED in range(5):  
    tf.random.set_seed(SEED)
    mse = []
    R2_val = []
    RMSE = []
    score_val = []
    
    # 0.20	[64]	0.3	tanh	32	25
    
    # parameter's sample
    # weights_file = "weights_file_lstm_optimalmodel_all.h5"
    alpha = 0.2
    sequence_length = 40
    epochs = 20
    nodes_per_layer = [32]
    dropout = 0.1
    activation = 'tanh'
    batch_size = 128
    remaining_sensors = remaining_sensors
    # create model
    # input_shape = (sequence_length, len(remaining_sensors))
    # model = model_lstm_1layer(input_shape, nodes_per_layer[0], dropout, activation)
    
    # Data prepration
    X_train_interim, X_test_interim = prep_data(train, test, drop_sensors, remaining_sensors, alpha)
    
    #PCA data reduction 
    X_cr_train, X_cr_test= StandardScaler().fit_transform(X_train_interim[remaining_sensors]), StandardScaler().fit_transform(X_test_interim[remaining_sensors])
    pca = PCA()
    component_train , component_test = pca.fit(X_cr_train).transform(X_cr_train), pca.transform(X_cr_test)
    # print(pca.explained_variance_, np_component) # choos component which lambda >1 # kaiser

    # np_component = len(pca.explained_variance_[pca.explained_variance_>1])
    np_component = np_component_fd002
    print(pca.explained_variance_,'\n', "Nb components: ", np_component) # choos component which lambda >1 # kaiser
    comp = ['comp' + str(i) for i in range(1,np_component+1)]
    X_train_interim[comp],  X_test_interim[comp]= component_train[:, :np_component], component_test[:, :np_component]
    

    # create sequences train, test
    train_array = gen_data_wrapper(X_train_interim, sequence_length, comp)
    label_array = gen_label_wrapper(X_train_interim, sequence_length, ['RUL'])

    test_gen = (list(gen_test_data(X_test_interim[X_test_interim['Unit']==unit_nr], sequence_length,comp, -99.))
               for unit_nr in X_test_interim['Unit'].unique())
    
    test_array = np.concatenate(list(test_gen)).astype(np.float32)
    test_rul = rul_piecewise_fct(y_test,rul_piecewise)
    
    input_shape = (sequence_length, len(comp))
    model = model_lstm_1layer(input_shape, nodes_per_layer[0], dropout, activation)
    print(train_array.shape, label_array.shape, test_array.shape)
            
    # Model fitting
    cb = EarlyStopping(monitor='val_loss', patience=4, restore_best_weights=True)
    with tf.device('/device:GPU:0'):
        start_time = time.time()
        model.compile(loss='mean_squared_error', optimizer=Adam(learning_rate=0.001))
        weights_file = model.get_weights()
        model.set_weights(weights_file)  # reset optimizer and node weights before every training iteration
        history = model.fit(train_array, label_array,
                                validation_data=(test_array, test_rul),
                                epochs=epochs,
                                batch_size=batch_size,
                                callbacks=[cb],
                                verbose=1)
        end_time = time.time()
        training_time = end_time - start_time
        mse.append(history.history['val_loss'][-1])

        y_hat_val_split = model.predict(test_array)
        R2_val.append(r2_score(test_rul, y_hat_val_split))
        RMSE.append(np.sqrt(mean_squared_error(test_rul, y_hat_val_split)))
        score_val.append(compute_s_score(test_rul, y_hat_val_split))
            
        
    #  append results
    d = {'RMSE' :np.mean(RMSE), 'std_RMSE' :np.std(RMSE),
         'S_score' :np.mean(score_val), 'std_S_score' :np.std(score_val),
         'MSE':np.mean(mse), 'std_MSE':np.std(mse),
         'nodes':str(nodes_per_layer), 'dropout':dropout, 
         'activation':activation, 'batch_size':batch_size, 'TW' : sequence_length,
         'time': training_time}

#     results = results.append(pd.DataFrame(d, index=[0]), ignore_index=True)
    results_pca002 = pd.concat([results_pca002, pd.DataFrame(d, index=[0])], ignore_index=True)
    results_pca002.to_csv('results/pca/fd002_nbcomp.csv')

[1.04271508e+01 2.66149542e+00 1.05555422e+00 4.67459514e-01
 2.81050559e-01 2.11386654e-01 1.92544371e-01 1.64287263e-01
 1.32272442e-01 1.16578816e-01 1.04679947e-01 7.46965656e-02
 4.85539287e-02 3.45854321e-02 1.97697647e-02 8.23190831e-03] 
 Nb components:  3
(43619, 40, 3) (43619, 1) (259, 40, 3)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
[1.04271508e+01 2.66149542e+00 1.05555422e+00 4.67459514e-01
 2.81050559e-01 2.11386654e-01 1.92544371e-01 1.64287263e-01
 1.32272442e-01 1.16578816e-01 1.04679947e-01 7.46965656e-02
 4.85539287e-02 3.45854321e-02 1.97697647e-02 8.23190831e-03] 
 Nb components:  3
(43619, 40, 3) (43619, 1) (259, 40, 3)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
[1.04271508e+01 2.66149542e+00 1.05555422e+00 4.67459514e-01
 2.81050559e-01 2.11386654e-01 1.92544371e-01 1.64287263e-01
 1.32272442e-01 1.16578816e-01 1.04679947e-01 7.

In [18]:
from ace_tools_open import display_dataframe_to_user

# Valeurs de explained_variance_
explained_variance = np.array([
    1.04271508e+01, 2.66149542e+00, 1.05555422e+00, 4.67459514e-01,
    2.81050559e-01, 2.11386654e-01, 1.92544371e-01, 1.64287263e-01,
    1.32272442e-01, 1.16578816e-01, 1.04679947e-01, 7.46965656e-02,
    4.85539287e-02, 3.45854321e-02, 1.97697647e-02, 8.23190831e-03
])

# Pourcentage de variance expliquée par composante
explained_variance_ratio = explained_variance / explained_variance.sum()

# Variance cumulée
cumulative_variance = np.cumsum(explained_variance_ratio)

# Mise en tableau
df = pd.DataFrame({
    "Composante": [f"PC{i+1}" for i in range(len(explained_variance))],
    "Variance expliquée": explained_variance,
    "% Variance expliquée": np.round(explained_variance_ratio * 100, 2),
    "% Cumulé": np.round(cumulative_variance * 100, 2)
})

# Affichage du tableau
display_dataframe_to_user("Variance expliquée par composante principale", df)

Variance expliquée par composante principale


0
Loading ITables v2.4.4 from the internet...  (need help?)


In [26]:
results_pca002

Unnamed: 0,RMSE,std_RMSE,S_score,std_S_score,MSE,std_MSE,nodes,dropout,activation,batch_size,TW,time
0,24.394288,0.0,2779.157061,0.0,665.14447,0.0,[32],0.1,tanh,128,40,31.485861
1,25.87501,0.0,3887.86729,0.0,834.781616,0.0,[32],0.1,tanh,128,40,30.872162
2,24.515957,0.0,3621.939094,0.0,672.605347,0.0,[32],0.1,tanh,128,40,35.550115
3,26.280228,0.0,4181.798602,0.0,783.059814,0.0,[32],0.1,tanh,128,40,21.321056
4,24.550833,0.0,2894.56528,0.0,627.363159,0.0,[32],0.1,tanh,128,40,53.692363


### FD003

In [27]:
# Load data
# Load data and preprocess
train, test, y_test = prepare_data('FD003.txt')
print(train.shape, test.shape, y_test.shape)
sensor_names = ['T20','T24','T30','T50','P20','P15','P30','Nf','Nc','epr','Ps30','phi',
                    'NRf','NRc','BPR','farB','htBleed','Nf_dmd','PCNfR_dmd','W31','W32']

remaining_sensors = ['T24','T30','T50', 'P15', 'P30','Nf','Nc', 'epr','Ps30','phi',
                    'NRf','NRc','BPR','htBleed','W31','W32']
drop_sensors = [element for element in sensor_names if element not in remaining_sensors]

rul_piecewise = 125
train['RUL'].clip(upper=rul_piecewise, inplace=True)

(24720, 27) (16596, 26) (100, 1)


In [28]:
%%time
results_pca003 = pd.DataFrame()
for SEED in range(5):  
    tf.random.set_seed(SEED)
    mse = []
    R2_val = []
    RMSE = []
    score_val = []
    
    # 0.20	[64]	0.3	tanh	32	25
    
    # parameter's sample
    # weights_file = "weights_file_lstm_optimalmodel_all.h5"
    alpha = 0.1
    sequence_length = 35
    epochs = 20
    nodes_per_layer = [64]
    dropout = 0.2
    activation = 'tanh'
    batch_size = 32
    remaining_sensors = remaining_sensors
    
    # Data prepration
    X_train_interim, X_test_interim = prep_data(train, test, drop_sensors, remaining_sensors, alpha)
    
    #PCA data reduction 
    X_cr_train, X_cr_test= StandardScaler().fit_transform(X_train_interim[remaining_sensors]), StandardScaler().fit_transform(X_test_interim[remaining_sensors])
    pca = PCA()
    component_train , component_test = pca.fit(X_cr_train).transform(X_cr_train), pca.transform(X_cr_test)
    # print(pca.explained_variance_, np_component) # choos component which lambda >1 # kaiser

    # np_component = len(pca.explained_variance_[pca.explained_variance_>1])
    np_component = np_component_fd003
    print(pca.explained_variance_,'\n', "Nb components: ", np_component) # choos component which lambda >1 # kaiser
    comp = ['comp' + str(i) for i in range(1,np_component+1)]
    X_train_interim[comp],  X_test_interim[comp]= component_train[:, :np_component], component_test[:, :np_component]
    

    # create sequences train, test
    train_array = gen_data_wrapper(X_train_interim, sequence_length, comp)
    label_array = gen_label_wrapper(X_train_interim, sequence_length, ['RUL'])

    test_gen = (list(gen_test_data(X_test_interim[X_test_interim['Unit']==unit_nr], sequence_length,comp, -99.))
               for unit_nr in X_test_interim['Unit'].unique())
    
    test_array = np.concatenate(list(test_gen)).astype(np.float32)
    test_rul = rul_piecewise_fct(y_test,rul_piecewise)
    
    input_shape = (sequence_length, len(comp))
    model = model_lstm_1layer(input_shape, nodes_per_layer[0], dropout, activation)
    print(train_array.shape, label_array.shape, test_array.shape)
            
    # Model fitting
    cb = EarlyStopping(monitor='val_loss', patience=4, restore_best_weights=True)
    with tf.device('/device:GPU:0'):
        start_time = time.time()
        model.compile(loss='mean_squared_error', optimizer=Adam(learning_rate=0.001))
        weights_file = model.get_weights()
        model.set_weights(weights_file)  # reset optimizer and node weights before every training iteration
        history = model.fit(train_array, label_array,
                                validation_data=(test_array, test_rul),
                                epochs=epochs,
                                batch_size=batch_size,
                                callbacks=[cb],
                                verbose=1)
        end_time = time.time()
        training_time = end_time - start_time
        mse.append(history.history['val_loss'][-1])

        y_hat_val_split = model.predict(test_array)
        R2_val.append(r2_score(test_rul, y_hat_val_split))
        RMSE.append(np.sqrt(mean_squared_error(test_rul, y_hat_val_split)))
        score_val.append(compute_s_score(test_rul, y_hat_val_split))
            
        
    #  append results
    d = {'RMSE' :np.mean(RMSE), 'std_RMSE' :np.std(RMSE),
         'S_score' :np.mean(score_val), 'std_S_score' :np.std(score_val),
         'MSE':np.mean(mse), 'std_MSE':np.std(mse),
         'nodes':str(nodes_per_layer), 'dropout':dropout, 
         'activation':activation, 'batch_size':batch_size, 'TW' : sequence_length,
         'time': training_time}

#     results = results.append(pd.DataFrame(d, index=[0]), ignore_index=True)
    results_pca003 = pd.concat([results_pca003, pd.DataFrame(d, index=[0])], ignore_index=True)
    results_pca003.to_csv('results/pca/fd003_nbcomp.csv')

[8.19095667e+00 5.49399656e+00 8.81347307e-01 7.81564822e-01
 3.51238057e-01 1.43235420e-01 4.09913921e-02 3.53324081e-02
 2.44339253e-02 1.75337353e-02 1.33853528e-02 1.23424994e-02
 6.63828566e-03 3.52672765e-03 3.10180225e-03 1.02230366e-03] 
 Nb components:  3
(21320, 35, 3) (21320, 1) (100, 35, 3)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
[8.19095667e+00 5.49399656e+00 8.81347307e-01 7.81564822e-01
 3.51238057e-01 1.43235420e-01 4.09913921e-02 3.53324081e-02
 2.44339253e-02 1.75337353e-02 1.33853528e-02 1.23424994e-02
 6.63828566e-03 3.52672765e-03 3.10180225e-03 1.02230366e-03] 
 Nb components:  3
(21320, 35, 3) (21320, 1) (100, 35, 3)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
[8.19095667e+00 5.49399656e+00 8.81347307e-01 7.81564822e-01
 3.51238057e-01 1.43235420e-01 4.09913921e-02 3.5

In [19]:
# Nouvelle série de valeurs de explained_variance_
explained_variance_2 = np.array([
    8.19095667e+00, 5.49399656e+00, 8.81347307e-01, 7.81564822e-01,
    3.51238057e-01, 1.43235420e-01, 4.09913921e-02, 3.53324081e-02,
    2.44339253e-02, 1.75337353e-02, 1.33853528e-02, 1.23424994e-02,
    6.63828566e-03, 3.52672765e-03, 3.10180225e-03, 1.02230366e-03
])

# Pourcentage de variance expliquée par composante
explained_variance_ratio_2 = explained_variance_2 / explained_variance_2.sum()

# Variance cumulée
cumulative_variance_2 = np.cumsum(explained_variance_ratio_2)

# Mise en tableau
df2 = pd.DataFrame({
    "Composante": [f"PC{i+1}" for i in range(len(explained_variance_2))],
    "Variance expliquée": explained_variance_2,
    "% Variance expliquée": np.round(explained_variance_ratio_2 * 100, 2),
    "% Cumulé": np.round(cumulative_variance_2 * 100, 2)
})

# Affichage du tableau
display_dataframe_to_user("Variance expliquée par composante principale (2e série)", df2)


Variance expliquée par composante principale (2e série)


0
Loading ITables v2.4.4 from the internet...  (need help?)


In [29]:
results_pca003

Unnamed: 0,RMSE,std_RMSE,S_score,std_S_score,MSE,std_MSE,nodes,dropout,activation,batch_size,TW,time
0,26.513756,0.0,1731.358953,0.0,765.590088,0.0,[64],0.2,tanh,32,35,64.187508
1,27.28363,0.0,1622.14157,0.0,777.788513,0.0,[64],0.2,tanh,32,35,65.350867
2,29.189492,0.0,2778.226881,0.0,874.092407,0.0,[64],0.2,tanh,32,35,40.475281
3,28.283972,0.0,1656.095051,0.0,925.732727,0.0,[64],0.2,tanh,32,35,71.344422
4,27.909782,0.0,1951.634191,0.0,833.726379,0.0,[64],0.2,tanh,32,35,60.459723


### FD001

In [30]:
# Load data
# Load data and preprocess
train, test, y_test = prepare_data('FD001.txt')
print(train.shape, test.shape, y_test.shape)
sensor_names = ['T20','T24','T30','T50','P20','P15','P30','Nf','Nc','epr','Ps30','phi',
                    'NRf','NRc','BPR','farB','htBleed','Nf_dmd','PCNfR_dmd','W31','W32']
remaining_sensors = ['T24','T30','T50', 'P15', 'P30','Nf','Nc','Ps30','phi',
                    'NRf','NRc','BPR','htBleed','W31','W32']
drop_sensors = [element for element in sensor_names if element not in remaining_sensors]

rul_piecewise = 125
train['RUL'].clip(upper=rul_piecewise, inplace=True)

(20631, 27) (13096, 26) (100, 1)


In [31]:
%%time
results_pca001 = pd.DataFrame()
for SEED in range(5):  
    set_seed(SEED)
    mse = []
    R2_val = []
    RMSE = []
    score_val = []
    
    # 0.20	[64]	0.3	tanh	32	25
    
    # parameter's sample
    # weights_file = "weights_file_lstm_optimalmodel_all.h5"
    alpha = 0.1
    sequence_length = 30
    epochs = 20
    nodes_per_layer = [128]
    dropout = 0.2
    activation = 'tanh'
    batch_size = 64
    remaining_sensors = remaining_sensors    
    # Data prepration
    X_train_interim, X_test_interim = prep_data(train, test, drop_sensors, remaining_sensors, alpha)
    
    #PCA data reduction 
    X_cr_train, X_cr_test= StandardScaler().fit_transform(X_train_interim[remaining_sensors]), StandardScaler().fit_transform(X_test_interim[remaining_sensors])
    pca = PCA()
    component_train , component_test = pca.fit(X_cr_train).transform(X_cr_train), pca.transform(X_cr_test)
    # print(pca.explained_variance_, np_component) # choos component which lambda >1 # kaiser

    # np_component = len(pca.explained_variance_[pca.explained_variance_>1])
    np_component = np_component_fd001
    print(pca.explained_variance_,'\n', "Nb components: ", np_component) # choos component which lambda >1 # kaiser
    comp = ['comp' + str(i) for i in range(1,np_component+1)]
    X_train_interim[comp],  X_test_interim[comp]= component_train[:, :np_component], component_test[:, :np_component]
    

    # create sequences train, test
    train_array = gen_data_wrapper(X_train_interim, sequence_length, comp)
    label_array = gen_label_wrapper(X_train_interim, sequence_length, ['RUL'])

    test_gen = (list(gen_test_data(X_test_interim[X_test_interim['Unit']==unit_nr], sequence_length,comp, -99.))
               for unit_nr in X_test_interim['Unit'].unique())
    
    test_array = np.concatenate(list(test_gen)).astype(np.float32)
    test_rul = rul_piecewise_fct(y_test,rul_piecewise)
    
    input_shape = (sequence_length, len(comp))
    model = model_lstm_1layer(input_shape, nodes_per_layer[0], dropout, activation)
    print(train_array.shape, label_array.shape, test_array.shape)
            
    # Model fitting
    cb = EarlyStopping(monitor='val_loss', patience=4, restore_best_weights=True)
    with tf.device('/device:GPU:0'):
        start_time = time.time()
        model.compile(loss='mean_squared_error', optimizer=Adam(learning_rate=0.001))
        weights_file = model.get_weights()
        model.set_weights(weights_file)  # reset optimizer and node weights before every training iteration
        history = model.fit(train_array, label_array,
                                validation_data=(test_array, test_rul),
                                epochs=epochs,
                                batch_size=batch_size,
                                callbacks=[cb],
                                verbose=1)
        end_time = time.time()
        training_time = end_time - start_time
        mse.append(history.history['val_loss'][-1])

        y_hat_val_split = model.predict(test_array)
        R2_val.append(r2_score(test_rul, y_hat_val_split))
        RMSE.append(np.sqrt(mean_squared_error(test_rul, y_hat_val_split)))
        score_val.append(compute_s_score(test_rul, y_hat_val_split))
            
        
    #  append results
    d = {'RMSE' :np.mean(RMSE), 'std_RMSE' :np.std(RMSE),
         'S_score' :np.mean(score_val), 'std_S_score' :np.std(score_val),
         'MSE':np.mean(mse), 'std_MSE':np.std(mse),
         'nodes':str(nodes_per_layer), 'dropout':dropout, 
         'activation':activation, 'batch_size':batch_size, 'TW' : sequence_length,
         'time': training_time}

#     results = results.append(pd.DataFrame(d, index=[0]), ignore_index=True)
    results_pca001 = pd.concat([results_pca001, pd.DataFrame(d, index=[0])], ignore_index=True)
    results_pca001.to_csv('results/pca/fd001_nbcomp.csv')

[1.16218316e+01 2.22745151e+00 7.53919107e-01 7.60689125e-02
 5.80733626e-02 4.69453854e-02 4.15078334e-02 3.47383080e-02
 3.32981565e-02 2.89969632e-02 2.53785317e-02 1.84836056e-02
 1.69423744e-02 1.35008525e-02 3.59058504e-03] 
 Nb components:  3
(17731, 30, 3) (17731, 1) (100, 30, 3)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
[1.16218316e+01 2.22745151e+00 7.53919107e-01 7.60689125e-02
 5.80733626e-02 4.69453854e-02 4.15078334e-02 3.47383080e-02
 3.32981565e-02 2.89969632e-02 2.53785317e-02 1.84836056e-02
 1.69423744e-02 1.35008525e-02 3.59058504e-03] 
 Nb components:  3
(17731, 30, 3) (17731, 1) (100, 30, 3)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
[1.16218316e+01 2.22745151e+00 7.53919107e-01 7.60689125e-02
 5.80733626e-02 4.69453854e-02 4.15078334e-02 3.47383080e-02
 3.32981565e-02 2.89969632e-02 2.53785317e-02 1.84836056e-02
 1.69423744e-02 1.35008525e-02 3.59058504e-03] 
 Nb

In [20]:
# Nouvelle série de valeurs de explained_variance_
explained_variance_3 = np.array([
    1.16218316e+01, 2.22745151e+00, 7.53919107e-01, 7.60689125e-02,
    5.80733626e-02, 4.69453854e-02, 4.15078334e-02, 3.47383080e-02,
    3.32981565e-02, 2.89969632e-02, 2.53785317e-02, 1.84836056e-02,
    1.69423744e-02, 1.35008525e-02, 3.59058504e-03
])

# Pourcentage de variance expliquée par composante
explained_variance_ratio_3 = explained_variance_3 / explained_variance_3.sum()

# Variance cumulée
cumulative_variance_3 = np.cumsum(explained_variance_ratio_3)

# Mise en tableau
df3 = pd.DataFrame({
    "Composante": [f"PC{i+1}" for i in range(len(explained_variance_3))],
    "Variance expliquée": explained_variance_3,
    "% Variance expliquée": np.round(explained_variance_ratio_3 * 100, 2),
    "% Cumulé": np.round(cumulative_variance_3 * 100, 2)
})

# Affichage du tableau
display_dataframe_to_user("Variance expliquée par composante principale (3e série)", df3)


Variance expliquée par composante principale (3e série)


0
Loading ITables v2.4.4 from the internet...  (need help?)


In [32]:
results_pca001

Unnamed: 0,RMSE,std_RMSE,S_score,std_S_score,MSE,std_MSE,nodes,dropout,activation,batch_size,TW,time
0,27.127203,0.0,2121.673982,0.0,961.813843,0.0,[128],0.2,tanh,64,30,53.355826
1,24.321346,0.0,1482.404001,0.0,844.831177,0.0,[128],0.2,tanh,64,30,32.034118
2,24.853285,0.0,1500.476944,0.0,744.628174,0.0,[128],0.2,tanh,64,30,29.953367
3,26.703668,0.0,1629.109454,0.0,852.641418,0.0,[128],0.2,tanh,64,30,45.899949
4,26.864737,0.0,1772.700158,0.0,903.630127,0.0,[128],0.2,tanh,64,30,35.293452
