In [1]:
from __future__ import print_function
import pandas as pd
import matplotlib.pyplot as plt

from lime.lime_tabular import RecurrentTabularExplainer
from tqdm import tqdm
import keras
from sp_modif.model_function import *
from sp_modif.methods import *
from sp_modif.data_prep import *
from sp_modif.evaluator import *
from sp_modif.SHAP import *
from sp_modif.L2X import *
import warnings

warnings.filterwarnings('ignore')
print("okay")


okay


In [2]:
# Function to generate explanations
# Function explanation for lime

def get_lime_explanation(data, e, L2X=False) :
    # e  = fn = model.predict
    # Iniatialisation
    df_expplanation = pd.DataFrame(columns=[str(i) for i in range(data.shape[1]*data.shape[2])])

    # Get explanations
    for row in range(data.shape[0]) : 
        explanation = lime_explainer.explain_instance(data[row],
                                                      e,
                                                      num_features=data.shape[1]*data.shape[2]) 
        # fn = model.predict, initialize lime_explainer = Reccurent()
        lime_values = explanation.local_exp[1]
        # Add explanation in df_explanation
        lime_dict = {}
        for tup in lime_values :
            lime_dict[str(tup[0])] = tup[1]
        df_expplanation.loc[len(df_expplanation)] = lime_dict
    
    return df_expplanation

# # Function explanation for others
def get_explainations(data, e, L2X = False) :
    
    # df diemnsion
    if L2X==True :
        X_to_def_col = data[0:1]
        explanation_test = e.explain(X_to_def_col.reshape((X_to_def_col.shape[0], -1)))
        num_columns = explanation_test.flatten().shape[0]
        
    else : 
        explanation_test = e.explain(data[0:1])
        num_columns = explanation_test.flatten().shape[0]
    
    # Iniatialisation
    df_expplanation = pd.DataFrame(columns=[str(i) for i in range(num_columns)])

    # Get explanations
    for row in range(data.shape[0]) :
        if L2X==True:
            X_row = data[row:row+1]
            explanation = e.explain(X_row.reshape((X_row.shape[0], -1)))
        else :
            explanation = e.explain(data[row:row+1])
        # Add explanation in df_explanation
        explanation = explanation.flatten()
        feature_dict = {}
        for i in range(num_columns) :
            feature_dict[str(i)] = explanation[i]
        df_expplanation.loc[len(df_expplanation)] = feature_dict
    
    return df_expplanation

In [7]:
# Data preparation
# Data loading
train, test, y_test = prepare_data('FD004.txt')
print(train.shape, test.shape, y_test.shape)
train['RUL'].clip(upper=125, inplace=True)
sensor_names = ['T20','T24','T30','T50','P20','P15','P30','Nf','Nc','epr','Ps30','phi',
                'NRf','NRc','BPR','farB','htBleed','Nf_dmd','PCNfR_dmd','W31','W32']

remaining_sensors = ['T24','T30','T50','P30','Nf','Nc','Ps30','phi',
                'NRf','NRc','BPR','htBleed','W31','W32'] # selection based on main_notebook

drop_sensors = [element for element in sensor_names if element not in remaining_sensors] 

(61249, 27) (41214, 26) (248, 1)


In [8]:
models = {
    'model_1l' : model004,
    'model_2l' : create_model2C004(),
    'model_3l' : create_model3C004(),
    'model_4l' : create_model4C004(),
    }

In [9]:
from tqdm import tqdm
for elm in tqdm(models.keys()):
    if elm == 'model_1l':
        print(elm)

100%|██████████| 4/4 [00:00<?, ?it/s]

model_1l





In [10]:
# Modeling
# Fixe best hyperparameter
train['RUL'].clip(upper=125, inplace=True)
upper = 125
sequence_length = 39
alpha = 0.2
epochs = 20
batch_size = 64

mse = []
list_d = []
results = pd.DataFrame()
df_metrics = pd.DataFrame()

# Data prepration
X_train_interim, X_test_interim = prep_data(train, test, drop_sensors, remaining_sensors, alpha)

# create sequences train, test
train_array = gen_data_wrapper(X_train_interim, sequence_length,remaining_sensors)
label_array = gen_label_wrapper(X_train_interim, sequence_length, ['RUL'])

test_gen = (list(gen_test_data(X_test_interim[X_test_interim['Unit']==unit_nr], sequence_length,remaining_sensors, -99.))
                for unit_nr in X_test_interim['Unit'].unique())
        
test_array = np.concatenate(list(test_gen)).astype(np.float32)



for model_name in tqdm(models.keys()):

    weights_file = 'lstm003_1C.h5'
    input_shape = (sequence_length, len(remaining_sensors))
    if model_name == 'model_1l':
        model = model = model003(input_shape, weights_file)
    else : 
        model = models[model_name]

    # test_array = np.concatenate(list(test_gen)).astype(np.float32)
    test_rul = rul_piecewise_fct(y_test,upper)
    print(train_array.shape, label_array.shape, test_array.shape)
    if True : 
        mse_val = []
        R2_val = []
        RMSE = []
        score_val = []
        gss = GroupShuffleSplit(n_splits=3, train_size=0.80, random_state=42) 
        test_rul = rul_piecewise_fct(y_test,upper)
        if model_name == 'model_1l':
            with tf.device('/device:GPU:0'):
                for train_unit, val_unit in gss.split(X_train_interim['Unit'].unique(), groups=X_train_interim['Unit'].unique()):
                    train_unit = X_train_interim['Unit'].unique()[train_unit]  # gss returns indexes and index starts at 1
                    train_split_array = gen_data_wrapper(X_train_interim, sequence_length, remaining_sensors, train_unit)
                    train_split_label = gen_label_wrapper(X_train_interim, sequence_length, ['RUL'], train_unit)

                    val_unit = X_train_interim['Unit'].unique()[val_unit]
                    val_split_array = gen_data_wrapper(X_train_interim, sequence_length, remaining_sensors, val_unit)
                    val_split_label = gen_label_wrapper(X_train_interim, sequence_length, ['RUL'], val_unit)

                    # train and evaluate model
                    # model.compile(loss='mse', optimizer='adam')
                    model.load_weights(weights_file)  # reset optimizer and node weights before every training iteration

                    # with tf.device('/device:GPU:0'):
                    history = model.fit(train_split_array, train_split_label,
                                        validation_data=(val_split_array, val_split_label),
                                        epochs=epochs,
                                        batch_size=batch_size,
                                        verbose=0)
        #             mse.append(history.history['val_loss'][-1])
                    test_gen = (list(gen_test_data(X_test_interim[X_test_interim['Unit']==unit_nr], sequence_length,remaining_sensors, -99.))
                                    for unit_nr in X_test_interim['Unit'].unique())
                    test_array = np.concatenate(list(test_gen)).astype(np.float32)
                    mse_val.append(history.history['val_loss'][-1])
                    y_hat_val_split = model.predict(test_array)
                    R2_val.append(r2_score(test_rul, y_hat_val_split))
                    RMSE.append(np.sqrt(mean_squared_error(test_rul, y_hat_val_split)))
                    score_val.append(compute_s_score(test_rul, y_hat_val_split))
        
        else : 
            with tf.device('/device:GPU:0'):
                history = model.fit(train_array, label_array,
                                        validation_data=(test_array, test_rul),
                                        epochs=epochs,
                                        batch_size=batch_size,
                                        verbose=0)
                # mse.append(history.history['val_loss'][-1])
                mse_val.append(history.history['val_loss'][-1])
                y_hat_val_split = model.predict(test_array)
                R2_val.append(r2_score(test_rul, y_hat_val_split))
                RMSE.append(np.sqrt(mean_squared_error(test_rul, y_hat_val_split)))
                score_val.append(compute_s_score(test_rul, y_hat_val_split))
                    
        d = {'RMSE_val':np.sqrt(mse_val),'RMSE_test': RMSE,'R2_test':np.mean(R2_val), 'Score_test':np.mean(score_val),
                    'alpha':alpha, 'rul_piecewise':upper, 'sequence_length':sequence_length, 'model' : model_name}
        list_d.append(d)
        results = pd.concat([results, pd.DataFrame(d)], ignore_index=True)
        results.to_csv('results/result004/performance_by_layerl.csv')

        for rd in range(5):

            # Get explanation and calculate the score
            # Echantillonage
            n_individus = test_array.shape[0]

            # # Choisir aléatoirement 5 indices d'individus
            np.random.seed(rd)
            indices_choisis = np.random.choice(n_individus, size=5, replace=False)

            # Sélectionner les données correspondant aux indices choisis
            test_array_sampling = test_array[indices_choisis, :, :]
            label_array_sampling = y_test.values[indices_choisis, :]

            # Afficher les dimensions des données sélectionnées
            print(test_array_sampling.shape, label_array_sampling.shape)
            
            # distance matrix XX'
            X_dist = pd.DataFrame(squareform(pdist(test_array_sampling.reshape((test_array_sampling.shape[0], -1)))))

            # LIME
            lime_explainer = RecurrentTabularExplainer(test_array, training_labels=label_array,
                                                            feature_names=remaining_sensors,
                                                            mode = 'regression',
                                                            )
            lime_values = get_lime_explanation(test_array_sampling, e = model.predict)
            lime_values.shape
            Lime_dist = pd.DataFrame(squareform(pdist(lime_values))) # Lime values explanation matrix

            #Lime's metrics
            list_metrics_lime = {}
            list_metrics_lime['identity'] = identity(X_dist, Lime_dist)
            list_metrics_lime['separability'] = separability(X_dist, Lime_dist)
            list_metrics_lime['stability'] = stability(X_dist, Lime_dist)
            list_metrics_lime['coherence'], list_metrics_lime['completness'], list_metrics_lime['congruence'] = coherence(model=model.predict, 
                                                            explainer = get_lime_explanation,
                                                        samples=test_array_sampling,
                                                            targets=label_array, e = model.predict)
            list_metrics_lime['selectivity'] = selectivity(model=model.predict, explainer = get_lime_explanation,
                                                        samples=test_array_sampling, e_x = model.predict)
            list_metrics_lime['accumen'] = acumen(get_lime_explanation, test_array_sampling, e=model.predict)
            list_metrics_lime['Verm_stability'] = stability_Velmurugan(get_lime_explanation, test_array_sampling,
                                                                        e=model.predict, top_features=200)
            list_metrics_lime['fidelity'], list_metrics_lime['sparsity'] = fidelity(model=model.predict, 
                                                            explainer = get_lime_explanation,
                                                            samples=test_array_sampling,
                                                            e = model.predict, L2X=True)
            list_metrics_lime['instability'] = instability(model=model.predict, 
                                                            explainer = get_lime_explanation,
                                                            samples=test_array_sampling,
                                                            e = model.predict, L2X=True)
            
            list_metrics_lime['alpha'] = alpha
            list_metrics_lime['upper'] = upper
            list_metrics_lime['sequence_length'] = sequence_length
            list_metrics_lime['explainer'] = 'lime'
            list_metrics_lime['model'] = model_name

            df_metrics = pd.concat([df_metrics, pd.DataFrame([list_metrics_lime])])

            # SHAP
            e = KernelSHAP(model)
            shapvalues = get_explainations(test_array_sampling, e)
            shapvalues.shape

            list_metrics_shap = {}
            shap_dist = pd.DataFrame(squareform(pdist(shapvalues))) # shap values explanation matrix

            list_metrics_shap['identity'] = identity(X_dist, shap_dist)
            list_metrics_shap['separability'] = separability(X_dist, shap_dist)
            list_metrics_shap['stability'] = stability(X_dist, shap_dist)
            list_metrics_shap['coherence'], list_metrics_shap['completness'], list_metrics_shap['congruence'] = coherence(model=model.predict, 
                                                            explainer = get_explainations,
                                                        samples=test_array_sampling,
                                                            targets=label_array, e = e)
            list_metrics_shap['selectivity'] = selectivity(model=model.predict, explainer = get_explainations,
                                            samples=test_array_sampling, e_x=e)
            list_metrics_shap['accumen'] = acumen(get_explainations, test_array_sampling, e=e)
            list_metrics_shap['Verm_stability'] = stability_Velmurugan(get_explainations, test_array_sampling,
                                                                        e=e, top_features=200)
            list_metrics_shap['fidelity'], list_metrics_shap['sparsity']= fidelity(model=model.predict, 
                                                            explainer = get_explainations,
                                                            samples=test_array_sampling,
                                                            e = e)
            list_metrics_shap['instability']= instability(model=model.predict, 
                                                            explainer = get_explainations,
                                                            samples=test_array_sampling,
                                                            e = e)
            
            list_metrics_shap['alpha'] = alpha
            list_metrics_shap['upper'] = upper
            list_metrics_shap['sequence_length'] = sequence_length
            list_metrics_shap['explainer'] = 'shap'
            list_metrics_shap['model'] = model_name

            df_metrics = pd.concat([df_metrics, pd.DataFrame([list_metrics_shap])])
            
            # L2X
            e = L2X(model.predict, test_array_sampling)
            l2xvalues = get_explainations(test_array_sampling, e, L2X=True)
            l2xvalues.shape

            # L2X's metrics
            list_metrics_l2x = {}
            l2x_dist = pd.DataFrame(squareform(pdist(l2xvalues))) # Lime values explanation matrix

            list_metrics_l2x['identity'] = identity(X_dist, l2x_dist)
            list_metrics_l2x['separability'] = separability(X_dist, l2x_dist)
            list_metrics_l2x['stability'] = stability(X_dist, l2x_dist)
            list_metrics_l2x['coherence'], list_metrics_l2x['completness'], list_metrics_l2x['congruence'] = coherence(model=model.predict, explainer = get_explainations,
                                                        samples=test_array_sampling, targets=label_array_sampling, e = e, L2X=True)
            list_metrics_l2x['selectivity'] = selectivity(model=model.predict, explainer = get_explainations,
                                            samples=test_array_sampling, e_x=e, L2X=True)
            list_metrics_l2x['accumen'] = acumen(get_explainations, test_array_sampling, e=e, L2X=True)
            list_metrics_l2x['Verm_stability'] = stability_Velmurugan(get_explainations, test_array_sampling,
                                                                        e=e, top_features=200, L2X=True)
            list_metrics_l2x['fidelity'], list_metrics_l2x['sparsity']= fidelity(model=model.predict, 
                                                            explainer = get_explainations,
                                                            samples=test_array_sampling,
                                                            e = e, L2X=True)
            list_metrics_l2x['instability'] = instability(model=model.predict, 
                                                            explainer = get_explainations,
                                                            samples=test_array_sampling,
                                                            e = e, L2X=True)
            
            list_metrics_l2x['alpha'] = alpha
            list_metrics_l2x['upper'] = upper
            list_metrics_l2x['sequence_length'] = sequence_length
            list_metrics_l2x['explainer'] = 'l2x'
            list_metrics_l2x['model'] = model_name


            df_metrics = pd.concat([df_metrics, pd.DataFrame([list_metrics_l2x])])
        df_metrics.to_csv('results/result004/quality_by_layerl.csv')

  0%|          | 0/4 [00:00<?, ?it/s]

(51787, 39, 14) (51787, 1) (248, 39, 14)

(5, 39, 14) (5, 1)
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
13
13
13
13
13
13
13
13
13
13
12
12
12
12
12
10
10
10
10
10
10
10
10
10
10


12
12
12
12
12
15
15
15
15
15
19
19
19
19
19
15
15
15
15
15
11
11
11
11
11
(5, 39, 14) (5, 1)
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
13
13
13
13
13
13
13
13
13
13
10
10
10
10
10
10
10
10
10
10
13
13
13
13
13
17
17
17
17
17
12
12
12
12
12
16
16
16
16
16
14
14
14
14
14
10
10
10
10
10
(5, 39, 14) (5, 1)
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
13
13
13
13
13
13
13
13
13
13
12
12
12
12
12
10
10
10
10
10
13
13
13
13
13
19
19
19
19
19
19
19
19
19
19
15
15
15
15
15
10
10
10
10
10
10
10
10
10
10
(5, 39, 14) (5, 1)
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
13
13
13
13
13
13
13
13
13
13
13
13
13
13
13
10
10
10
10
10
13
13
13
13
13
11
11
11
11
11
18
18
18
18
18
21
21
21
21
21
10
10
10
1

 25%|██▌       | 1/4 [2:10:29<6:31:27, 7829.03s/it]

(51787, 39, 14) (51787, 1) (248, 39, 14)
(5, 39, 14) (5, 1)
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
19
19
19
19
19
15
15
15
15
15
19
19
19
19
19
18
18
18
18
18
16
16
16
16
16
(5, 39, 14) (5, 1)
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
21
21
21
21
21
12
12
12
12
12
23
23
23
23
23
17
17
17
17
17
10
10
10
10
10
(5, 39, 14) (5, 1)
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
12
12
12
12
12
15
15
15
15
15
14
14
14
14
14
13
13
13
13
13
10
10
10
10
10
(5, 39, 14) (5, 1)
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
17
17
17
17
17
20
20
20
20
20
22
22
22
22
22
16
16
16
16
1

 50%|█████     | 2/4 [4:31:10<4:32:58, 8189.27s/it]

(51787, 39, 14) (51787, 1) (248, 39, 14)
(5, 39, 14) (5, 1)
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
13
13
13
13
13
10
10
10
10
10
13
13
13
13
13
10
10
10
10
10
21
21
21
21
21
13
13
13
13
13
19
19
19
19
19
11
11
11
11
11
(5, 39, 14) (5, 1)
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
12
12
12
12
12
13
13
13
13
13
10
10
10
10
10
11
11
11
11
11
14
14
14
14
14
17
17
17
17
17
15
15
15
15
15
16
16
16
16
16
(5, 39, 14) (5, 1)
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
12
12
12
12
12
14
14
14
14
14
10
10
10
10
10
13
13
13
13
13
11
11
11
11
11
15
15
15
15
15
22
22
22
22
22
17
17
17
17
17
(5, 39, 14) (5, 1)
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
12
12
12
12
12
13
13
13
13
13
12
12
12
12
12
14
14
14
14
1

 75%|███████▌  | 3/4 [7:02:32<2:23:17, 8597.07s/it]

(51787, 39, 14) (51787, 1) (248, 39, 14)
(5, 39, 14) (5, 1)
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
13
13
13
13
13
10
10
10
10
10
13
13
13
13
13
10
10
10
10
10
13
13
13
13
13
17
17
17
17
17
13
13
13
13
13
18
18
18
18
18
12
12
12
12
12
10
10
10
10
10
(5, 39, 14) (5, 1)
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
13
13
13
13
13
13
13
13
13
13
12
12
12
12
12
13
13
13
13
13
13
13
13
13
13
11
11
11
11
11
16
16
16
16
16
11
11
11
11
11
10
10
10
10
10
13
13
13
13
13
(5, 39, 14) (5, 1)
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
13
13
13
13
13
13
13
13
13
13
13
13
13
13
13
13
13
13
13
13
13
13
13
13
13
15
15
15
15
15
14
14
14
14
14
11
11
11
11
11
31
31
31
31
31
12
12
12
12
12
(5, 39, 14) (5, 1)
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
13
13
13
13
13
14
14
14
14
14
13
13
13
13
13
14
14
14
14
14
13
13
13
13
13
12
12
12
12
12
10
10
10
10
10
10
10
10
10
10
19
19
19
19
1

100%|██████████| 4/4 [9:48:01<00:00, 8820.34s/it]  


In [12]:
df_metrics

Unnamed: 0,identity,separability,stability,coherence,completness,congruence,selectivity,accumen,Verm_stability,fidelity,sparsity,instability,alpha,upper,sequence_length,explainer,model
0,1.0,1.0,1.0,0.396575,0.859858,0.231712,0.600059,0.010349,0.745336,0.038609,0.018315,0.496626,0.2,125,39,lime,model_1l
0,1.0,1.0,1.0,0.079117,1.384176,0.152816,0.602597,0.48444,1.0,0.691115,0.018315,0.52545,0.2,125,39,shap,model_1l
0,1.0,1.0,1.0,0.317976,4.816301,0.157744,0.620165,0.019596,1.0,0.026011,0.018315,0.65641,0.2,125,39,l2x,model_1l
0,1.0,1.0,1.0,0.232725,2.564634,0.215667,0.663364,0.079706,0.604761,0.084158,0.018315,0.331964,0.2,125,39,lime,model_1l
0,1.0,1.0,1.0,0.004815,0.676175,0.005961,0.653576,0.490018,1.0,0.719074,0.0,0.5015,0.2,125,39,shap,model_1l
0,1.0,1.0,1.0,0.185049,17.869755,0.162726,0.628326,0.041394,1.0,0.207279,0.018315,0.560001,0.2,125,39,l2x,model_1l
0,1.0,1.0,1.0,0.170843,0.572378,0.199016,0.679362,0.057615,0.564015,0.103174,0.018315,0.399297,0.2,125,39,lime,model_1l
0,1.0,1.0,1.0,0.128458,0.720116,0.2084,0.657258,0.450789,1.0,0.405441,0.007326,0.338765,0.2,125,39,shap,model_1l
0,1.0,1.0,1.0,0.202384,29.547411,0.226149,0.637773,0.043963,1.0,0.265515,0.018315,0.466667,0.2,125,39,l2x,model_1l
0,1.0,1.0,1.0,0.38133,0.11599,0.286515,0.590704,0.178128,0.596612,0.060922,0.018315,0.349753,0.2,125,39,lime,model_1l
