In [1]:
from __future__ import print_function
import warnings
warnings.filterwarnings("ignore")

import pandas as pd
import matplotlib.pyplot as plt

from lime.lime_tabular import RecurrentTabularExplainer
from tqdm import tqdm
import keras
from sp_modif.model_function import *
from sp_modif.methods import *
from sp_modif.data_prep import *
from sp_modif.evaluator import *
from sp_modif.SHAP import *
from sp_modif.L2X import *

import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn
import random
# import keras
import math

from sklearn.metrics import mean_squared_error, r2_score 
from sklearn.model_selection import GroupKFold
from sklearn import preprocessing
from keras import backend as K
from sklearn.preprocessing import MinMaxScaler , StandardScaler
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, Activation, GRU
from scipy import optimize
from methods import *
from tensorflow.keras import optimizers
%matplotlib inline




In [2]:
SEED = 0
def set_seed(seed=SEED):
    os.environ['PYTHONHASHSEED'] = str(SEED)
    random.seed(SEED)
    np.random.seed(SEED)
    tf.random.set_seed(SEED)

# Appeler la fonction pour fixer le seed
set_seed(SEED)

In [3]:
# Function to generate explanations
# Function explanation for lime

def get_lime_explanation(data, e, L2X=False) :
    # e  = fn = model.predict
    # Iniatialisation
    df_expplanation = pd.DataFrame(columns=[str(i) for i in range(data.shape[1]*data.shape[2])])

    # Get explanations
    for row in range(data.shape[0]) : 
        explanation = lime_explainer.explain_instance(data[row],
                                                      e,
                                                      num_features=data.shape[1]*data.shape[2]) 
        # fn = model.predict, initialize lime_explainer = Reccurent()
        lime_values = explanation.local_exp[1]
        # Add explanation in df_explanation
        lime_dict = {}
        for tup in lime_values :
            lime_dict[str(tup[0])] = tup[1]
        df_expplanation.loc[len(df_expplanation)] = lime_dict
    
    return df_expplanation

# # Function explanation for others
def get_explainations(data, e, L2X = False) :
    
    # df diemnsion
    if L2X==True :
        X_to_def_col = data[0:1]
        explanation_test = e.explain(X_to_def_col.reshape((X_to_def_col.shape[0], -1)))
        num_columns = explanation_test.flatten().shape[0]
        
    else : 
        explanation_test = e.explain(data[0:1])
        num_columns = explanation_test.flatten().shape[0]
    
    # Iniatialisation
    df_expplanation = pd.DataFrame(columns=[str(i) for i in range(num_columns)])

    # Get explanations
    for row in range(data.shape[0]) :
        if L2X==True:
            X_row = data[row:row+1]
            explanation = e.explain(X_row.reshape((X_row.shape[0], -1)))
        else :
            explanation = e.explain(data[row:row+1])
        # Add explanation in df_explanation
        explanation = explanation.flatten()
        feature_dict = {}
        for i in range(num_columns) :
            feature_dict[str(i)] = explanation[i]
        df_expplanation.loc[len(df_expplanation)] = feature_dict
    
    return df_expplanation

### FD001

In [4]:
# Load data and preprocess
train, test, y_test = prepare_data('FD001.txt')
print(train.shape, test.shape, y_test.shape)
sensor_names = ['T20','T24','T30','T50','P20','P15','P30','Nf','Nc','epr','Ps30','phi',
                'NRf','NRc','BPR','farB','htBleed','Nf_dmd','PCNfR_dmd','W31','W32']

remaining_sensors = ['T24','T30','T50','P30','Nf','Nc','Ps30','phi',
                'NRf','NRc','BPR','htBleed','W31','W32'] # selection based on main_notebook

drop_sensors = [element for element in sensor_names if element not in remaining_sensors]

(20631, 27) (13096, 26) (100, 1)


In [5]:
def model001(input_shape, weights_file):
    '''
    node = 256, activation = tanh, dropout = 0.3, bs = 64
    '''
     
    nodes_per_layer = 256
    activation_value= 'tanh'
    dropout = 0.3
    bs = 64

    cb = keras.callbacks.EarlyStopping(monitor='loss', patience=4)
    model = Sequential()
    model.add(LSTM(nodes_per_layer, activation=activation_value, input_shape=input_shape))
    model.add(Dropout(dropout))
    model.add(Dense(1))
    model.compile(loss='mse',
                  optimizer=tf.keras.optimizers.legacy.Adam(learning_rate=0.001))
    model.save_weights(weights_file)

    return model

In [6]:
from tqdm import tqdm
sequence_lengths = [30, 35, 38, 39, 40]
list_alpha = [0.2, 0.3, 0.5]
list_upper = [120,125,130]
metrics_names = ['identity', 'separability', 'stability', 'coherence', 'completness', 'congruence',
                 'selectivity', 'accumen',	'Verm_stability',	'fidelity',	'sparsity', 'instability']
                     
df_metrics = pd.DataFrame()
results = pd.DataFrame()
# all_list = [(seq, alpha, upper) for seq in sequence_lengths for alpha in list_alpha for rec_rul in list_upper]
all_list = []
for sq in sequence_lengths:
    for alpha in list_alpha:
        for upper in list_upper:
            triplet = (sq, alpha, upper)
            all_list.append(triplet)
list_d = []
list_df_metric = []
df_metrics = pd.DataFrame()
results = pd.DataFrame()

for elm in tqdm(all_list[37:40]):
    
    # Modeling
    print(elm)
    print('...')
    sequence_length = elm[0]
    alpha = elm[1]
    upper = elm[2]
    
    learning_rate_ = 0.001
    dropout = 0.3
    activation = 'tanh'
    epochs = 20
    batch_size = 64
    train = rul_piecewise_fct(train, upper)
    
    #Model creation
    weights_file = "fd001_result/" + str(alpha) + '1lstm_hyper_parameter_weights.h5'
    input_shape = (sequence_length, len(remaining_sensors))
    model = model001(input_shape, weights_file)
    
    # Data prepration
    X_train_interim, X_test_interim = prep_data(train, test, drop_sensors, remaining_sensors, alpha)

    # create sequences train, test
    train_array = gen_data_wrapper(X_train_interim, sequence_length,remaining_sensors)
    label_array = gen_label_wrapper(X_train_interim, sequence_length, ['RUL'])

    test_gen = (list(gen_test_data(X_test_interim[X_test_interim['Unit']==unit_nr], sequence_length,remaining_sensors, -99.))
               for unit_nr in X_test_interim['Unit'].unique())
    
    test_array = np.concatenate(list(test_gen)).astype(np.float32)
    test_rul = rul_piecewise_fct(y_test,upper)
    print(train_array.shape, label_array.shape, test_array.shape)
    
    mse_val = []
    R2_val = []
    RMSE = []
    score_val = []
    mse = []

    
    # Model fitting
    with tf.device('/device:GPU:0'):
        history = model.fit(train_array, label_array,
                                validation_data=(test_array, test_rul),
                                epochs=epochs,
                                batch_size=batch_size,
                                # callbacks=[cb],
                                verbose=0)
        mse.append(history.history['val_loss'][-1])

        y_hat_val_split = model.predict(test_array)
        R2_val.append(r2_score(test_rul, y_hat_val_split))
        RMSE.append(np.sqrt(mean_squared_error(test_rul, y_hat_val_split)))
        score_val.append(compute_s_score(test_rul, y_hat_val_split))
            
        
    #  append results
    d = {'RMSE' :np.mean(RMSE), 'std_RMSE' :np.std(RMSE),
         'S_score' :np.mean(score_val), 'std_S_score' :np.std(score_val),
         'MSE':np.mean(mse), 'std_MSE':np.std(mse),
        #  'nodes':str(nodes_per_layer),
         'dropout':dropout, 
         'activation':activation, 'batch_size':batch_size}

#     results = results.append(pd.DataFrame(d, index=[0]), ignore_index=True)
    results = pd.concat([results, pd.DataFrame(d, index=[0])], ignore_index=True)
    results.to_csv('fd001_result/performance_by_layer37.csv')
     
        
    # Get explanation and calculate the score
    with tf.device('/device:GPU:0'):    
        for rd in range(5):
        
                # Get explanation and calculate the score
                # Echantillonage
                n_individus = test_array.shape[0]

                # # Choisir aléatoirement 5 indices d'individus
                np.random.seed(rd)
                indices_choisis = np.random.choice(n_individus, size=5, replace=False)

                # Sélectionner les données correspondant aux indices choisis
                test_array_sampling = test_array[indices_choisis, :, :]
                label_array_sampling = y_test.values[indices_choisis, :]

                # Afficher les dimensions des données sélectionnées
                print(test_array_sampling.shape, label_array_sampling.shape)
                
                # distance matrix XX'
                X_dist = pd.DataFrame(squareform(pdist(test_array_sampling.reshape((test_array_sampling.shape[0], -1)))))

                # LIME
                lime_explainer = RecurrentTabularExplainer(test_array, training_labels=label_array,
                                                                feature_names=remaining_sensors,
                                                                mode = 'regression',
                                                                )
                lime_values = get_lime_explanation(test_array_sampling, e = model.predict)
                lime_values.shape
                Lime_dist = pd.DataFrame(squareform(pdist(lime_values))) # Lime values explanation matrix

                #Lime's metrics
                list_metrics_lime = {}
                list_metrics_lime['identity'] = identity(X_dist, Lime_dist)
                list_metrics_lime['separability'] = separability(X_dist, Lime_dist)
                list_metrics_lime['stability'] = stability(X_dist, Lime_dist)
                list_metrics_lime['coherence'], list_metrics_lime['completness'], list_metrics_lime['congruence'] = coherence(model=model.predict, 
                                                                explainer = get_lime_explanation,
                                                            samples=test_array_sampling,
                                                                targets=label_array, e = model.predict)
                list_metrics_lime['selectivity'] = selectivity(model=model.predict, explainer = get_lime_explanation,
                                                            samples=test_array_sampling, e_x = model.predict)
                list_metrics_lime['accumen'] = acumen(get_lime_explanation, test_array_sampling, e=model.predict)
                list_metrics_lime['Verm_stability'] = stability_Velmurugan(get_lime_explanation, test_array_sampling,
                                                                            e=model.predict, top_features=200)
                list_metrics_lime['fidelity'], list_metrics_lime['sparsity'] = fidelity(model=model.predict, 
                                                                explainer = get_lime_explanation,
                                                                samples=test_array_sampling,
                                                                e = model.predict, L2X=True)
                list_metrics_lime['instability'] = instability(model=model.predict, 
                                                                explainer = get_lime_explanation,
                                                                samples=test_array_sampling,
                                                                e = model.predict, L2X=True)
                
                list_metrics_lime['alpha'] = alpha
                list_metrics_lime['upper'] = upper
                list_metrics_lime['sequence_length'] = sequence_length
                list_metrics_lime['explainer'] = 'lime'
                # list_metrics_lime['model'] = model_name

                df_metrics = pd.concat([df_metrics, pd.DataFrame([list_metrics_lime])])

                # SHAP
                e = KernelSHAP(model)
                shapvalues = get_explainations(test_array_sampling, e)
                shapvalues.shape

                list_metrics_shap = {}
                shap_dist = pd.DataFrame(squareform(pdist(shapvalues))) # shap values explanation matrix

                list_metrics_shap['identity'] = identity(X_dist, shap_dist)
                list_metrics_shap['separability'] = separability(X_dist, shap_dist)
                list_metrics_shap['stability'] = stability(X_dist, shap_dist)
                list_metrics_shap['coherence'], list_metrics_shap['completness'], list_metrics_shap['congruence'] = coherence(model=model.predict, 
                                                                explainer = get_explainations,
                                                            samples=test_array_sampling,
                                                                targets=label_array, e = e)
                list_metrics_shap['selectivity'] = selectivity(model=model.predict, explainer = get_explainations,
                                                samples=test_array_sampling, e_x=e)
                list_metrics_shap['accumen'] = acumen(get_explainations, test_array_sampling, e=e)
                list_metrics_shap['Verm_stability'] = stability_Velmurugan(get_explainations, test_array_sampling,
                                                                            e=e, top_features=200)
                list_metrics_shap['fidelity'], list_metrics_shap['sparsity']= fidelity(model=model.predict, 
                                                                explainer = get_explainations,
                                                                samples=test_array_sampling,
                                                                e = e)
                list_metrics_shap['instability']= instability(model=model.predict, 
                                                                explainer = get_explainations,
                                                                samples=test_array_sampling,
                                                                e = e)
                
                list_metrics_shap['alpha'] = alpha
                list_metrics_shap['upper'] = upper
                list_metrics_shap['sequence_length'] = sequence_length
                list_metrics_shap['explainer'] = 'shap'
                # list_metrics_shap['model'] = model_name

                df_metrics = pd.concat([df_metrics, pd.DataFrame([list_metrics_shap])])
                
                # L2X
                e = L2X(model.predict, test_array_sampling)
                l2xvalues = get_explainations(test_array_sampling, e, L2X=True)
                l2xvalues.shape

                # L2X's metrics
                list_metrics_l2x = {}
                l2x_dist = pd.DataFrame(squareform(pdist(l2xvalues))) # Lime values explanation matrix

                list_metrics_l2x['identity'] = identity(X_dist, l2x_dist)
                list_metrics_l2x['separability'] = separability(X_dist, l2x_dist)
                list_metrics_l2x['stability'] = stability(X_dist, l2x_dist)
                list_metrics_l2x['coherence'], list_metrics_l2x['completness'], list_metrics_l2x['congruence'] = coherence(model=model.predict, explainer = get_explainations,
                                                            samples=test_array_sampling, targets=label_array_sampling, e = e, L2X=True)
                list_metrics_l2x['selectivity'] = selectivity(model=model.predict, explainer = get_explainations,
                                                samples=test_array_sampling, e_x=e, L2X=True)
                list_metrics_l2x['accumen'] = acumen(get_explainations, test_array_sampling, e=e, L2X=True)
                list_metrics_l2x['Verm_stability'] = stability_Velmurugan(get_explainations, test_array_sampling,
                                                                            e=e, top_features=200, L2X=True)
                list_metrics_l2x['fidelity'], list_metrics_l2x['sparsity']= fidelity(model=model.predict, 
                                                                explainer = get_explainations,
                                                                samples=test_array_sampling,
                                                                e = e, L2X=True)
                list_metrics_l2x['instability'] = instability(model=model.predict, 
                                                                explainer = get_explainations,
                                                                samples=test_array_sampling,
                                                                e = e, L2X=True)
                
                list_metrics_l2x['alpha'] = alpha
                list_metrics_l2x['upper'] = upper
                list_metrics_l2x['sequence_length'] = sequence_length
                list_metrics_l2x['explainer'] = 'l2x'
                # list_metrics_l2x['model'] = model_name


                df_metrics = pd.concat([df_metrics, pd.DataFrame([list_metrics_l2x])])
        df_metrics.to_csv('fd001_result/quality_by_layer37.csv')

  0%|          | 0/3 [00:00<?, ?it/s]

(40, 0.2, 125)
...

(16731, 40, 14) (16731, 1) (100, 40, 14)

(5, 40, 14) (5, 1)
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
13
13
13
13
13
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10


15
15
15
15
15
17
17
17
17
17
14
14
14
14
14
12
12
12
12
12
14
14
14
14
14
(5, 40, 14) (5, 1)
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
14
14
14
14
14
18
18
18
18
18
11
11
11
11
11
11
11
11
11
11
12
12
12
12
12
(5, 40, 14) (5, 1)
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
19
19
19
19
19
21
21
21
21
21
20
20
20
20
20
16
16
16
16
16
15
15
15
15
15
(5, 40, 14) (5, 1)
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
13
13
13
13
13
10
10
10
10
10
10
10
10
10
10
14
14
14
14
14
13
13
13
13
13
15
15

 33%|███▎      | 1/3 [2:42:15<5:24:30, 9735.35s/it]

(40, 0.2, 130)
...
(16731, 40, 14) (16731, 1) (100, 40, 14)
(5, 40, 14) (5, 1)
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
12
12
12
12
12
11
11
11
11
11
10
10
10
10
10
13
13
13
13
13
(5, 40, 14) (5, 1)
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
12
12
12
12
12
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
17
17
17
17
17
13
13
13
13
13
18
18
18
18
18
18
18
18
18
18
15
15
15
15
15
(5, 40, 14) (5, 1)
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
18
18
18
18
18
10
10
10
10
10
10
10
10
10
10
11
11
11
11
11
(5, 40, 14) (5, 1)
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
14
14
14
14
14
10
10
10
10
10
10
10
10
10
10
12
12
12
12
12
21
21
21


 67%|██████▋   | 2/3 [5:34:27<2:48:06, 10086.44s/it]

(40, 0.3, 120)
...
(16731, 40, 14) (16731, 1) (100, 40, 14)
(5, 40, 14) (5, 1)
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
15
15
15
15
15
17
17
17
17
17
19
19
19
19
19
16
16
16
16
16
18
18
18
18
18
(5, 40, 14) (5, 1)
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
17
17
17
17
17
16
16
16
16
16
18
18
18
18
18
14
14
14
14
14
11
11
11
11
11
(5, 40, 14) (5, 1)
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
17
17
17
17
17
14
14
14
14
14
18
18
18
18
18
18
18
18
18
18
11
11
11
11
11
(5, 40, 14) (5, 1)
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
12
12
12
12
12
10
10
10
10
10
13
13
13
13
13
10
10
10
10
10
11
11
11


100%|██████████| 3/3 [8:23:27<00:00, 10069.03s/it]  


In [7]:
# (40, 0.2, 120)