In [2]:
from numpy import loadtxt
from keras.models import load_model
from sklearn.metrics import confusion_matrix
from lime.lime_tabular import LimeTabularExplainer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np

In [3]:
df_lstm = pd.read_csv(r'D:\Uni Docs\DSC4996\Dynamic_fraud_detection_system\Data\Dataset03.csv')

In [4]:
df_lstm = df_lstm.drop('Unnamed: 0', axis=1)

In [5]:
df_lstm.shape

(284910, 31)

In [6]:
X = df_lstm.drop(['Class'], axis = 'columns')
Label_Data = df_lstm['Class']

In [7]:
X_r2 = StandardScaler().fit_transform(X)

In [8]:
X_train,X_test,y_train,y_test = train_test_split(X_r2, Label_Data, test_size=0.3)

In [9]:
X_val, X_test1, y_val, y_test1 = train_test_split(X_test, y_test, test_size=0.5, random_state=42)

In [11]:
# design network
np.random.seed(7)

train_LSTM_X=X_train
val_LSTM_X=X_val

## Reshape input to be 3D [samples, timesteps, features] (format requis par LSTM)
train_LSTM_X = train_LSTM_X.reshape((train_LSTM_X.shape[0], 1, train_LSTM_X.shape[1]))
val_LSTM_X = val_LSTM_X.reshape((val_LSTM_X.shape[0], 1, val_LSTM_X.shape[1]))

train_LSTM_y=y_train
val_LSTM_y=y_val

test_LSTM_X = X_test1.reshape((X_test1.shape[0], 1, X_test1.shape[1]))

In [37]:
model = load_model(r'D:\Uni Docs\DSC4996\Dynamic_fraud_detection_system\LSTM\lstm_Model_gan_with_es.h5')

In [13]:
# predict probabilities for test set
yhat_probs = model.predict(test_LSTM_X, verbose=0)
# reduce to 1d array
yhat_probs = yhat_probs[:, 0]

In [14]:
yhat_classes = (yhat_probs > 0.5).astype(int)

In [15]:
cm_lstm = confusion_matrix(y_test1, yhat_classes)

In [16]:
cm_lstm

array([[29893,    11],
       [   17, 12816]], dtype=int64)

In [17]:
TN, FP, FN, TP = cm_lstm.ravel()

In [25]:
# Convert the Series to numpy arrays
y_test1_array = y_test1.values

In [26]:
# Get the indices of FP and FN instances
FP_indices = [i for i in range(len(y_test1_array)) if y_test1_array[i]==0 and yhat_classes[i]==1]
FN_indices = [i for i in range(len(y_test1_array)) if y_test1_array[i]==1 and yhat_classes[i]==0]

In [40]:
def predict_proba(data, model):
    # Reshape data for LSTM
    data_reshaped = data.reshape(-1, 1, 30)
    proba = model.predict(data_reshaped)
    return np.hstack((1 - proba, proba))

In [36]:
explainer = LimeTabularExplainer(train_LSTM_X.reshape(-1, train_LSTM_X.shape[2]), 
                                 feature_names=X.columns, 
                                 class_names=['Genuine', 'Fraud'], 
                                 verbose=True, 
                                 mode='classification')

In [39]:
test_LSTM_X.shape

(42737, 1, 30)

In [41]:
FP_explanations = []
for i in FP_indices:
    exp = explainer.explain_instance(test_LSTM_X[i].reshape(-1), 
                                     lambda x: predict_proba(x.reshape(-1, 1, 30), model),  # change this line
                                     num_features=5)
    FP_explanations.append(exp)

Intercept -0.0045569395878472625
Prediction_local [0.03221367]
Right: 0.88083994
Intercept -0.007167063180634538
Prediction_local [0.0449061]
Right: 0.81007934
Intercept -0.005492763771930336
Prediction_local [0.03307488]
Right: 0.8663007
Intercept -0.003826163508928569
Prediction_local [0.02919273]
Right: 0.8969698
Intercept -0.004776385516475357
Prediction_local [0.03595193]
Right: 0.81148285
Intercept -0.0036042528400174493
Prediction_local [0.02794299]
Right: 0.77302366
Intercept -0.006485123041376651
Prediction_local [0.03911175]
Right: 0.8804319
Intercept -0.00502477212040824
Prediction_local [0.03336452]
Right: 0.8111771
Intercept -0.004887402175378981
Prediction_local [0.03253867]
Right: 0.8238771
Intercept -0.003024272145031614
Prediction_local [0.02718903]
Right: 0.80185777
Intercept -0.0044988927024948045
Prediction_local [0.03349013]
Right: 0.7738451


In [42]:
FN_explanations = []
for i in FN_indices:
    exp = explainer.explain_instance(test_LSTM_X[i].reshape(-1), 
                                     lambda x: predict_proba(x.reshape(-1, 1, 30), model), 
                                     num_features=5)
    FN_explanations.append(exp)

Intercept -0.0023681396311234755
Prediction_local [0.02488844]
Right: 0.119267754
Intercept 0.0052595700307148385
Prediction_local [0.00193985]
Right: 0.0023365393
Intercept -0.0007885738989767428
Prediction_local [0.02463849]
Right: 1.3565402e-05
Intercept 0.005917766447199621
Prediction_local [0.00640813]
Right: 1.750426e-05
Intercept -0.004664264660673033
Prediction_local [0.03041305]
Right: 0.20574531
Intercept 0.0020245763591793427
Prediction_local [0.00933411]
Right: 0.00020953258
Intercept -0.0034292256668449032
Prediction_local [0.03134237]
Right: 0.17813475
Intercept 0.000518120041661325
Prediction_local [0.02226116]
Right: 0.075207174
Intercept -0.005895144112469423
Prediction_local [0.0361075]
Right: 0.029389292
Intercept 0.0020045771065393915
Prediction_local [0.01408146]
Right: 0.00041585873
Intercept 0.0022989176821811355
Prediction_local [0.01351265]
Right: 0.0009771427
Intercept 0.0018744668225387658
Prediction_local [0.01394741]
Right: 0.0005972565
Intercept -0.0029647

In [44]:
FP_feature_importances = np.zeros(len(X.columns))
FN_feature_importances = np.zeros(len(X.columns))

In [45]:
feature_name_to_index = {name: index for index, name in enumerate(X.columns)}

In [46]:
for exp in FP_explanations:
    for feature_name, importance in exp.as_list():
        original_feature_name = feature_name.split(' ')[0]
        if original_feature_name in feature_name_to_index:
            feature_index = feature_name_to_index[original_feature_name]
            FP_feature_importances[feature_index] += np.abs(importance)

In [47]:
for exp in FN_explanations:
    for feature_name, importance in exp.as_list():
        original_feature_name = feature_name.split(' ')[0]
        if original_feature_name in feature_name_to_index:
            feature_index = feature_name_to_index[original_feature_name]
            FN_feature_importances[feature_index] += np.abs(importance)

In [48]:
FP_feature_importances /= np.sum(FP_feature_importances)
FN_feature_importances /= np.sum(FN_feature_importances)

### Flase positive

In [53]:
for feature_name, fp_importance in zip(X.columns, FP_feature_importances):
    print(f"{feature_name}:{fp_importance}")

Time:0.0
V1:0.0
V2:0.029211479660968275
V3:0.06848481565206174
V4:0.1423810830722951
V5:0.0
V6:0.0
V7:0.14319009527182192
V8:0.0
V9:0.02652916050451221
V10:0.06111919068387853
V11:0.029221451056743033
V12:0.24998364600294748
V13:0.0
V14:0.1630565167802727
V15:0.0
V16:0.05649008742384575
V17:0.030332473890653116
V18:0.0
V19:0.0
V20:0.0
V21:0.0
V22:0.0
V23:0.0
V24:0.0
V25:0.0
V26:0.0
V27:0.0
V28:0.0
Amount:0.0


### False Negetive

In [54]:
for feature_name, fn_importance in zip(X.columns, FN_feature_importances):
    print(f"{feature_name}:{fn_importance}")

Time:0.0
V1:0.012364153954567208
V2:0.06921226575903483
V3:0.031173738965857126
V4:0.15948416725044692
V5:0.0
V6:0.0061312434076639276
V7:0.13532407299909352
V8:0.021951046565225372
V9:0.04312936816125782
V10:0.03548262076907327
V11:0.030912260146140135
V12:0.25941482351604483
V13:0.0
V14:0.119931645073924
V15:0.0
V16:0.036818708430059924
V17:0.014435520657692841
V18:0.0
V19:0.0
V20:0.0069334844821457116
V21:0.0
V22:0.009762798031784564
V23:0.0
V24:0.0
V25:0.0
V26:0.0
V27:0.0
V28:0.0075380818299881115
Amount:0.0


### Correct Predictions

In [58]:
TP_indices = [i for i in range(len(y_test1_array)) if y_test1_array[i]==1 and yhat_classes[i]==1]
TN_indices = [i for i in range(len(y_test1_array)) if y_test1_array[i]==0 and yhat_classes[i]==0]

In [59]:
True_predictions_indices = TP_indices + TN_indices

In [61]:
len(True_predictions_indices)

42709

In [64]:
N = 1000
True_predictions_explanations = []
for i in range(N):
    exp = explainer.explain_instance(test_LSTM_X[i].reshape(-1), 
                                     lambda x: predict_proba(x.reshape(-1, 1, 30), model), 
                                     num_features=5)
    True_predictions_explanations.append(exp)

Intercept 0.006160781799326106
Prediction_local [-0.00407159]
Right: 8.917685e-06
Intercept -0.001671471338490279
Prediction_local [0.02731981]
Right: 0.9999967
Intercept -0.0005966253736280356
Prediction_local [0.02187562]
Right: 0.9999963
Intercept 0.001959113859053936
Prediction_local [0.01132568]
Right: 2.9014076e-05
Intercept -0.0007054297388660435
Prediction_local [0.01900828]
Right: 0.9999949
Intercept 0.0030924545843034456
Prediction_local [0.00995878]
Right: 0.00010557292
Intercept 0.00473810971564268
Prediction_local [0.00308081]
Right: 3.28879e-06
Intercept 1.1048521274844084e-05
Prediction_local [0.01970253]
Right: 0.9999958
Intercept 0.0035811085511953562
Prediction_local [0.0122536]
Right: 7.552032e-07
Intercept 0.0011037343639209883
Prediction_local [0.01389223]
Right: 4.4960775e-06
Intercept -0.00035001249457922504
Prediction_local [0.01795816]
Right: 4.390428e-05
Intercept 0.00834148492532921
Prediction_local [-0.00389235]
Right: 3.2651174e-06
Intercept -0.001425093590

In [65]:
True_predictions_feature_importances = np.zeros(len(X.columns))

In [66]:
for exp in True_predictions_explanations:
    for feature_name, importance in exp.as_list():
        original_feature_name = feature_name.split(' ')[0]
        if original_feature_name in feature_name_to_index:
            feature_index = feature_name_to_index[original_feature_name]
            True_predictions_feature_importances[feature_index] += np.abs(importance)

In [67]:
True_predictions_feature_importances /= np.sum(True_predictions_feature_importances)

In [68]:
for feature_name, ftr_importance in zip(X.columns, True_predictions_feature_importances):
    print(f"{feature_name}:{ftr_importance}")

Time:0.011738436776357084
V1:0.013649508041667995
V2:0.04602109653849483
V3:0.0722110077499402
V4:0.08948683619333293
V5:0.0023551878015175783
V6:0.0013566829672934507
V7:0.11620908885353741
V8:0.017399649103165323
V9:0.04333959071382983
V10:0.0344193439450215
V11:0.05063945737513297
V12:0.17844113281774673
V13:0.0029473176856581137
V14:0.08969242322875627
V15:0.0019896314428440413
V16:0.0566248852168488
V17:0.020453287005509455
V18:0.0014690373653910502
V19:0.0023479698295990723
V20:0.0029510166672268687
V21:0.002694311444425906
V22:0.0016720729491973448
V23:0.005669204660818172
V24:0.002155078585385897
V25:0.0045397531616341935
V26:0.002043161763390583
V27:0.004628110519624921
V28:0.11945472424864563
Amount:0.0014009953480058391
