In [1]:
from __future__ import print_function
import pandas as pd
import matplotlib.pyplot as plt

from lime.lime_tabular import RecurrentTabularExplainer
from tqdm import tqdm
import keras
from sp_modif.model_function import *
from sp_modif.methods import *
from sp_modif.data_prep import *
from sp_modif.evaluator import *
from sp_modif.SHAP import *
from sp_modif.L2X import *
import warnings

warnings.filterwarnings('ignore')
print("okay")


okay


In [2]:
SEED = 0
def set_seed(seed=SEED):
    os.environ['PYTHONHASHSEED'] = str(SEED)
    random.seed(SEED)
    np.random.seed(SEED)
    tf.random.set_seed(SEED)

# Appeler la fonction pour fixer le seed
set_seed(SEED)

In [3]:
# Function to generate explanations
# Function explanation for lime

def get_lime_explanation(data, e, L2X=False) :
    # e  = fn = model.predict
    # Iniatialisation
    df_expplanation = pd.DataFrame(columns=[str(i) for i in range(data.shape[1]*data.shape[2])])

    # Get explanations
    for row in range(data.shape[0]) : 
        explanation = lime_explainer.explain_instance(data[row],
                                                      e,
                                                      num_features=data.shape[1]*data.shape[2]) 
        # fn = model.predict, initialize lime_explainer = Reccurent()
        lime_values = explanation.local_exp[1]
        # Add explanation in df_explanation
        lime_dict = {}
        for tup in lime_values :
            lime_dict[str(tup[0])] = tup[1]
        df_expplanation.loc[len(df_expplanation)] = lime_dict
    
    return df_expplanation

# # Function explanation for others
def get_explainations(data, e, L2X = False) :
    
    # df diemnsion
    if L2X==True :
        X_to_def_col = data[0:1]
        explanation_test = e.explain(X_to_def_col.reshape((X_to_def_col.shape[0], -1)))
        num_columns = explanation_test.flatten().shape[0]
        
    else : 
        explanation_test = e.explain(data[0:1])
        num_columns = explanation_test.flatten().shape[0]
    
    # Iniatialisation
    df_expplanation = pd.DataFrame(columns=[str(i) for i in range(num_columns)])

    # Get explanations
    for row in range(data.shape[0]) :
        if L2X==True:
            X_row = data[row:row+1]
            explanation = e.explain(X_row.reshape((X_row.shape[0], -1)))
        else :
            explanation = e.explain(data[row:row+1])
        # Add explanation in df_explanation
        explanation = explanation.flatten()
        feature_dict = {}
        for i in range(num_columns) :
            feature_dict[str(i)] = explanation[i]
        df_expplanation.loc[len(df_expplanation)] = feature_dict
    
    return df_expplanation

In [4]:
# Data preparation
# Data loading
train, test, y_test = prepare_data('FD003.txt')
print(train.shape, test.shape, y_test.shape)
# train['RUL'].clip(upper=125, inplace=True)
sensor_names = ['T20','T24','T30','T50','P20','P15','P30','Nf','Nc','epr','Ps30','phi',
                'NRf','NRc','BPR','farB','htBleed','Nf_dmd','PCNfR_dmd','W31','W32']

remaining_sensors = ['T24','T30','T50','P30','Nf','Nc','Ps30','phi',
                'NRf','NRc','BPR','htBleed','W31','W32'] # selection based on main_notebook

drop_sensors = [element for element in sensor_names if element not in remaining_sensors] 

(24720, 27) (16596, 26) (100, 1)


In [5]:
def model003(input_shape,weights_file, nb_layers = None):
    nodes_per_layer = 32
    activation_value= 'tanh'
    dropout = 0.4
    bs = 64
    if nb_layers==1:
        '''
        node = 256, activation = tanh, dropout = 0.3, bs = 64
        
        '''
        '''
        nodes_per_layer = 256
        activation_value= 'tanh'
        dropout = 0.4
        bs = 64
        '''

        model = Sequential()
        model.add(LSTM(nodes_per_layer, activation=activation_value, input_shape=input_shape))
        model.add(Dropout(dropout))
        model.add(Dense(1))
        
    elif nb_layers == 2 :
        model = Sequential()
        model.add(LSTM(32, activation='tanh', input_shape=input_shape, return_sequences=True))
        model.add(LSTM(64, activation='tanh'))
        model.add(Dropout(dropout))
        model.add(Dense(256, activation = 'relu'))
        model.add(Dropout(dropout))
        model.add(Dense(1))
        
    elif nb_layers==3:
        model = Sequential()
        model.add(LSTM(32, activation='tanh', input_shape=input_shape, return_sequences=True))
        model.add(LSTM(64, activation='tanh', return_sequences=True))
        Dropout(dropout)
        # Dense(256, activation = 'relu')
        model.add(LSTM(32, activation='tanh'))
        model.add(Dense(256, activation = 'relu'))
        model.add(Dropout(dropout))
        model.add(Dense(1))
    
    elif nb_layers==4:
        model = Sequential()
        model.add(LSTM(32, activation='tanh', input_shape=input_shape, return_sequences=True))
        model.add(LSTM(64, activation='tanh', return_sequences=True))
        model.add(Dropout(0.2))
        model.add(LSTM(64, activation='tanh', return_sequences=True))
        model.add(LSTM(32, activation='tanh'))
        model.add(Dense(256, activation = 'relu'))
        model.add(Dropout(dropout))
        model.add(Dense(1))
        
    model.compile(loss='mean_squared_error', optimizer=Adam(learning_rate=0.001))
    model.save_weights(weights_file)

    return model

In [6]:
# Data preparation
# Data prepration
sequence_length = 30
alpha = 0.3
upper = 125
    
learning_rate_ = 0.001
dropout = 0.4
activation = 'tanh'
epochs = 20
batch_size = 64

results = pd.DataFrame()
df_metrics = pd.DataFrame()
    
train = rul_piecewise_fct(train, upper)

X_train_interim, X_test_interim = prep_data(train, test, drop_sensors, remaining_sensors, alpha)
# create sequences train, test
train_array = gen_data_wrapper(X_train_interim, sequence_length,remaining_sensors)
label_array = gen_label_wrapper(X_train_interim, sequence_length, ['RUL'])

test_gen = (list(gen_test_data(X_test_interim[X_test_interim['Unit']==unit_nr], sequence_length,remaining_sensors, -99.))
               for unit_nr in X_test_interim['Unit'].unique())
    
test_array = np.concatenate(list(test_gen)).astype(np.float32)
test_rul = rul_piecewise_fct(y_test,upper)
print(train_array.shape, label_array.shape, test_array.shape)



for md in tqdm.tqdm(range(4,5)):
    # Model creation
    weights_file = "fd003_result/" + str(md) + 'lstm_hyper_parameter_weights.h5'
    input_shape = (sequence_length, len(remaining_sensors))
    model = model003(input_shape=input_shape, weights_file=weights_file, nb_layers=md)
        
    mse_val = []
    R2_val = []
    RMSE = []
    score_val = []
    mse = []
    # Model fitting
    with tf.device('/device:GPU:0'):
        history = model.fit(train_array, label_array,
                                validation_data=(test_array, test_rul),
                                epochs=epochs,
                                batch_size=batch_size,
                                # callbacks=[cb],
                                verbose=1)
        mse.append(history.history['val_loss'][-1])

        y_hat_val_split = model.predict(test_array)
        R2_val.append(r2_score(test_rul, y_hat_val_split))
        RMSE.append(np.sqrt(mean_squared_error(test_rul, y_hat_val_split)))
        score_val.append(compute_s_score(test_rul, y_hat_val_split))

        #  append results
    d = {'RMSE' :np.mean(RMSE), 'std_RMSE' :np.std(RMSE),
         'S_score' :np.mean(score_val), 'std_S_score' :np.std(score_val),
         'MSE':np.mean(mse), 'std_MSE':np.std(mse),
        #  'nodes':str(nodes_per_layer),
         'dropout':dropout, 'activation':activation, 'batch_size':batch_size,
         'TW' : sequence_length, 'alpha' : alpha, 'upper' : upper}
    results = pd.concat([results, pd.DataFrame(d, index=[0])], ignore_index=True)
    results.to_csv('fd003_result/nblayers/performance4.csv')
    
    with tf.device('/device:GPU:0'):
            for rd in range(5):

                # Get explanation and calculate the score
                # Echantillonage
                n_individus = test_array.shape[0]

                # # Choisir aléatoirement 5 indices d'individus
                np.random.seed(rd)
                indices_choisis = np.random.choice(n_individus, size=5, replace=False)

                # Sélectionner les données correspondant aux indices choisis
                test_array_sampling = test_array[indices_choisis, :, :]
                label_array_sampling = y_test.values[indices_choisis, :]

                # Afficher les dimensions des données sélectionnées
                print(test_array_sampling.shape, label_array_sampling.shape)
                
                # distance matrix XX'
                X_dist = pd.DataFrame(squareform(pdist(test_array_sampling.reshape((test_array_sampling.shape[0], -1)))))

                # LIME
                lime_explainer = RecurrentTabularExplainer(test_array, training_labels=label_array,
                                                                feature_names=remaining_sensors,
                                                                mode = 'regression',
                                                                )
                lime_values = get_lime_explanation(test_array_sampling, e = model.predict)
                lime_values.shape
                Lime_dist = pd.DataFrame(squareform(pdist(lime_values))) # Lime values explanation matrix

                #Lime's metrics
                list_metrics_lime = {}
                list_metrics_lime['identity'] = identity(X_dist, Lime_dist)
                list_metrics_lime['separability'] = separability(X_dist, Lime_dist)
                list_metrics_lime['stability'] = stability(X_dist, Lime_dist)
                list_metrics_lime['coherence'], list_metrics_lime['completness'], list_metrics_lime['congruence'] = coherence(model=model.predict, 
                                                                explainer = get_lime_explanation,
                                                            samples=test_array_sampling,
                                                                targets=label_array, e = model.predict)
                list_metrics_lime['selectivity'] = selectivity(model=model.predict, explainer = get_lime_explanation,
                                                            samples=test_array_sampling, e_x = model.predict)
                list_metrics_lime['accumen'] = acumen(get_lime_explanation, test_array_sampling, e=model.predict)
                list_metrics_lime['Verm_stability'] = stability_Velmurugan(get_lime_explanation, test_array_sampling,
                                                                            e=model.predict, top_features=200)
                list_metrics_lime['fidelity'], list_metrics_lime['sparsity'] = fidelity(model=model.predict, 
                                                                explainer = get_lime_explanation,
                                                                samples=test_array_sampling,
                                                                e = model.predict, L2X=True)
                list_metrics_lime['instability'] = instability(model=model.predict, 
                                                                explainer = get_lime_explanation,
                                                                samples=test_array_sampling,
                                                                e = model.predict, L2X=True)
                
                list_metrics_lime['alpha'] = alpha
                list_metrics_lime['upper'] = upper
                list_metrics_lime['sequence_length'] = sequence_length
                list_metrics_lime['explainer'] = 'lime'
                list_metrics_lime['model'] = md

                df_metrics = pd.concat([df_metrics, pd.DataFrame([list_metrics_lime])])

                # SHAP
                e = KernelSHAP(model)
                shapvalues = get_explainations(test_array_sampling, e)
                shapvalues.shape

                list_metrics_shap = {}
                shap_dist = pd.DataFrame(squareform(pdist(shapvalues))) # shap values explanation matrix

                list_metrics_shap['identity'] = identity(X_dist, shap_dist)
                list_metrics_shap['separability'] = separability(X_dist, shap_dist)
                list_metrics_shap['stability'] = stability(X_dist, shap_dist)
                list_metrics_shap['coherence'], list_metrics_shap['completness'], list_metrics_shap['congruence'] = coherence(model=model.predict, 
                                                                explainer = get_explainations,
                                                            samples=test_array_sampling,
                                                                targets=label_array, e = e)
                list_metrics_shap['selectivity'] = selectivity(model=model.predict, explainer = get_explainations,
                                                samples=test_array_sampling, e_x=e)
                list_metrics_shap['accumen'] = acumen(get_explainations, test_array_sampling, e=e)
                list_metrics_shap['Verm_stability'] = stability_Velmurugan(get_explainations, test_array_sampling,
                                                                            e=e, top_features=200)
                list_metrics_shap['fidelity'], list_metrics_shap['sparsity']= fidelity(model=model.predict, 
                                                                explainer = get_explainations,
                                                                samples=test_array_sampling,
                                                                e = e)
                list_metrics_shap['instability']= instability(model=model.predict, 
                                                                explainer = get_explainations,
                                                                samples=test_array_sampling,
                                                                e = e)
                
                list_metrics_shap['alpha'] = alpha
                list_metrics_shap['upper'] = upper
                list_metrics_shap['sequence_length'] = sequence_length
                list_metrics_shap['explainer'] = 'shap'
                list_metrics_shap['model'] = md

                df_metrics = pd.concat([df_metrics, pd.DataFrame([list_metrics_shap])])
                
                # L2X
                e = L2X(model.predict, test_array_sampling)
                l2xvalues = get_explainations(test_array_sampling, e, L2X=True)
                l2xvalues.shape

                # L2X's metrics
                list_metrics_l2x = {}
                l2x_dist = pd.DataFrame(squareform(pdist(l2xvalues))) # Lime values explanation matrix

                list_metrics_l2x['identity'] = identity(X_dist, l2x_dist)
                list_metrics_l2x['separability'] = separability(X_dist, l2x_dist)
                list_metrics_l2x['stability'] = stability(X_dist, l2x_dist)
                list_metrics_l2x['coherence'], list_metrics_l2x['completness'], list_metrics_l2x['congruence'] = coherence(model=model.predict, explainer = get_explainations,
                                                            samples=test_array_sampling, targets=label_array_sampling, e = e, L2X=True)
                list_metrics_l2x['selectivity'] = selectivity(model=model.predict, explainer = get_explainations,
                                                samples=test_array_sampling, e_x=e, L2X=True)
                list_metrics_l2x['accumen'] = acumen(get_explainations, test_array_sampling, e=e, L2X=True)
                list_metrics_l2x['Verm_stability'] = stability_Velmurugan(get_explainations, test_array_sampling,
                                                                            e=e, top_features=200, L2X=True)
                list_metrics_l2x['fidelity'], list_metrics_l2x['sparsity']= fidelity(model=model.predict, 
                                                                explainer = get_explainations,
                                                                samples=test_array_sampling,
                                                                e = e, L2X=True)
                list_metrics_l2x['instability'] = instability(model=model.predict, 
                                                                explainer = get_explainations,
                                                                samples=test_array_sampling,
                                                                e = e, L2X=True)
                
                list_metrics_l2x['alpha'] = alpha
                list_metrics_l2x['upper'] = upper
                list_metrics_l2x['sequence_length'] = sequence_length
                list_metrics_l2x['explainer'] = 'l2x'
                list_metrics_l2x['model'] = md

                df_metrics = pd.concat([df_metrics, pd.DataFrame([list_metrics_l2x])])
            df_metrics.to_csv('fd003_result/nblayers/quality_by_layer4.csv')

(21820, 30, 14) (21820, 1) (100, 30, 14)


  0%|          | 0/1 [00:00<?, ?it/s]


Epoch 1/20

Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
(5, 30, 14) (5, 1)
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10


16
16
16
16
16
11
11
11
11
11
10
10
10
10
10
12
12
12
12
12
13
13
13
13
13
(5, 30, 14) (5, 1)
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
16
16
16
16
16
14
14
14
14
14
16
16
16
16
16
21
21
21
21
21
15
15
15
15
15
(5, 30, 14) (5, 1)
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
13
13
13
13
13
10
10
10
10
10
15
15
15
15
15
15
15
15
15
15
12
12
12
12
12
(5, 30, 14) (5, 1)
10
10
10
10
10

100%|██████████| 1/1 [3:05:48<00:00, 11148.15s/it]


In [7]:
results

Unnamed: 0,RMSE,std_RMSE,S_score,std_S_score,MSE,std_MSE,dropout,activation,batch_size,TW,alpha,upper
0,14.165535,0.0,530.435644,0.0,200.662369,0.0,0.4,tanh,64,30,0.3,125
