In [1]:
import numpy as np
import pandas as pd
import lime
import shap
import lime.lime_tabular
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from keras.utils import np_utils
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report

Using `tqdm.autonotebook.tqdm` in notebook mode. Use `tqdm.tqdm` instead to force console mode (e.g. in jupyter console)


In [2]:
df = pd.read_csv("xai.csv", index_col=False)
df.columns = [i.zfill(2) for i in df.columns]
df.drop(axis=1, labels='Unnamed: 0', inplace=True)

X = df.iloc[:, 0:256]
y = df.Class

original_feature_names = X.columns

# Manually create a list of feature names with the original hexadecimal values
feature_names = [f"{original_feature_names[i]}" for i in range(256)]

num_classes = len(np.unique(y))

encoder = LabelEncoder()
encoder.fit(y)

y = encoder.transform(y)
y = np_utils.to_categorical(y, num_classes=num_classes)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = Sequential([
    Dense(64, activation='relu', input_shape=(256,)),
    Dense(64, activation='relu'),
    Dense(num_classes, activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=50, verbose=0)

<keras.callbacks.History at 0x1d226857cd0>

In [3]:
y_pred = model.predict(X_test)
print(classification_report(y_test.argmax(axis=1), y_pred.argmax(axis=1)))

              precision    recall  f1-score   support

           0       0.87      0.86      0.86       114
           1       0.70      0.65      0.67       208
           2       0.66      0.58      0.62       219
           3       0.29      0.30      0.29       105
           4       0.89      0.91      0.90       785
           5       0.52      0.39      0.44       197
           6       0.81      0.83      0.82        66
           7       0.71      0.78      0.75       767
           8       0.61      0.25      0.36        55
           9       0.38      0.45      0.41       180
          10       0.92      0.85      0.88       121
          11       0.85      0.88      0.86       602
          12       0.79      0.85      0.82       131
          13       0.81      0.68      0.74        92

    accuracy                           0.75      3642
   macro avg       0.70      0.66      0.67      3642
weighted avg       0.75      0.75      0.75      3642



In [4]:
explainer = shap.Explainer(model, X_train.values, algorithm="permutation", max_evals=1000)

shap_values = explainer(X_test.values)

Permutation explainer: 3643it [07:55,  7.55it/s]                                                                       


In [11]:
print(shap_values.shape)
print(shap_values)

(3642, 256, 14)
.values =
array([[[ 3.07042680e-03, -2.46339288e-02, -3.36864769e-04, ...,
          4.36079032e-02, -5.29184022e-02,  7.21352487e-03],
        [ 2.76534586e-03, -1.89432299e-03,  3.91620931e-06, ...,
          1.22788309e-03, -4.35198904e-03,  1.70459522e-03],
        [ 6.73480166e-06, -9.52963163e-04,  1.78183824e-04, ...,
          1.31741242e-03, -2.70802603e-03, -1.79764116e-03],
        ...,
        [-1.33856065e-05,  6.27320249e-04,  1.01828382e-04, ...,
          4.82343648e-03, -1.68936165e-03, -2.43344273e-03],
        [-3.86140239e-10, -1.50454908e-02,  7.06977435e-04, ...,
         -3.39024473e-03,  3.38510092e-05, -5.51206646e-05],
        [-1.64033285e-08, -1.51734702e-03, -1.68446335e-02, ...,
         -1.66251494e-03, -9.73037109e-04, -2.68864550e-05]],

       [[ 1.13881501e-05, -1.38372018e-02, -2.01564149e-04, ...,
          5.42591627e-02, -3.20906901e-02,  1.25640764e-04],
        [-1.51583141e-08,  1.39565525e-03,  3.16943957e-05, ...,
          2.

In [5]:
mean_abs_shap_values = np.mean(np.abs(shap_values[0].values), axis=0)  

sorted_feature_importances = sorted(enumerate(mean_abs_shap_values), key=lambda x: x[1], reverse=True)
top_10_negative_features = sorted_feature_importances[:10]

print("Top 10 Features with Negative Impact on the Model:")
for feature_idx, importance in top_10_negative_features:
    print(f"{feature_names[feature_idx]} - {importance:.4f}")

Top 10 Features with Negative Impact on the Model:
07 - 0.0082
04 - 0.0064
0b - 0.0044
09 - 0.0041
01 - 0.0025
02 - 0.0023
0d - 0.0020
0c - 0.0018
0a - 0.0016
05 - 0.0013


In [8]:
df = df.drop(top_10_negative_features, axis=1)
df

KeyError: '[(7, 0.008185287996704722), (4, 0.006375459232798194), (11, 0.004358678967834433), (9, 0.00406225611035695), (1, 0.002526873180391293), (2, 0.0023429387633662987), (13, 0.0019683888557206425), (12, 0.0018065659542205316), (10, 0.001590841297265665), (5, 0.0013093065526814367)] not found in axis'

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = Sequential([
    Dense(64, activation='relu', input_shape=(256,)),
    Dense(64, activation='relu'),
    Dense(num_classes, activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=50, verbose=0)

<keras.callbacks.History at 0x2113d4a1ed0>

In [7]:
y_pred = model.predict(X_test)
print(classification_report(y_test.argmax(axis=1), y_pred.argmax(axis=1)))

              precision    recall  f1-score   support

           0       0.82      0.95      0.88       114
           1       0.62      0.73      0.67       208
           2       0.57      0.74      0.64       219
           3       0.33      0.30      0.32       105
           4       0.89      0.90      0.90       785
           5       0.57      0.53      0.55       197
           6       0.84      0.82      0.83        66
           7       0.77      0.72      0.74       767
           8       0.38      0.27      0.32        55
           9       0.47      0.43      0.45       180
          10       0.79      0.89      0.84       121
          11       0.88      0.81      0.84       602
          12       0.77      0.80      0.79       131
          13       0.72      0.75      0.73        92

    accuracy                           0.75      3642
   macro avg       0.67      0.69      0.68      3642
weighted avg       0.75      0.75      0.75      3642



In [30]:
all_class_feature_importances = []

for class_index in range(num_classes):
    shap_values_class = shap_values[class_index].values
    mean_abs_shap_values = np.mean(np.abs(shap_values_class), axis=0)
    all_class_feature_importances.append(mean_abs_shap_values)

all_class_feature_importances = np.array(all_class_feature_importances)

overall_feature_importances = np.mean(all_class_feature_importances, axis=0)

# Sort and print the top 10 features that negatively impact the model
sorted_feature_importances = sorted(enumerate(overall_feature_importances), key=lambda x: x[1], reverse=True)
top_10_negative_features = sorted_feature_importances[:10]

print("Top 10 Features with Negative Impact on the Model (Overall):")
for feature_idx, importance in top_10_negative_features:
    print(f"{feature_names[feature_idx]} - {importance:.4f}")



Top 10 Features with Negative Impact on the Model (Overall):
07 - 0.0069
0b - 0.0053
04 - 0.0044
02 - 0.0036
09 - 0.0032
01 - 0.0028
0a - 0.0024
0d - 0.0023
05 - 0.0018
06 - 0.0016


In [33]:
print(len(sorted_feature_importances))

14


In [None]:
shap_values_classwise = []

# Calculate SHAP values for each class separately
for i in range(num_classes):
    shap_values = explainer(X_test.values, max_evals=1000, outputs=i)  # Calculate SHAP values for class i
    shap_values_classwise.append(shap_values)

# Calculate the mean SHAP values across all classes
mean_shap_values = np.mean(shap_values_classwise, axis=0)

mean_abs_shap_values = np.mean(np.abs(mean_shap_values.values), axis=0)

sorted_feature_importances = sorted(enumerate(mean_abs_shap_values), key=lambda x: x[1], reverse=True)
top_20_negative_features = sorted_feature_importances[:20]

print("Top 20 Features with Negative Impact on the Model:")
for feature_idx, importance in top_20_negative_features:
    print(f"{feature_names[feature_idx]} - {importance:.4f}")

Permutation explainer: 3643it [10:21,  5.77it/s]                                                                       
Permutation explainer: 3643it [08:31,  6.99it/s]                                                                       
Permutation explainer: 3643it [08:55,  6.66it/s]                                                                       
Permutation explainer: 3643it [08:20,  7.13it/s]                                                                       
Permutation explainer: 3643it [08:25,  7.05it/s]                                                                       
Permutation explainer: 3643it [07:42,  7.70it/s]                                                                       
Permutation explainer: 3643it [07:37,  7.78it/s]                                                                       
Permutation explainer: 3643it [07:59,  7.45it/s]                                                                       
Permutation explainer: 3643it [07:45,  7