In [3]:
import numpy as np
import pandas as pd
import lime
import shap
import lime.lime_tabular
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from keras.utils import np_utils
from sklearn.preprocessing import LabelEncoder

In [5]:
df = pd.read_csv('xai.csv', index_col=False)
df.columns = [i.zfill(2) for i in df.columns]
df.drop(axis=1, labels = 'Unnamed: 0', inplace=True)
df.head(5)

Unnamed: 0,00,01,02,03,04,05,06,07,08,09,...,f7,f8,f9,fa,fb,fc,fd,fe,ff,Class
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


In [7]:
X = df.iloc[:, 0:256]
y = df.Class

num_classes = len(np.unique(y))

encoder = LabelEncoder()
encoder.fit(y)

y = encoder.transform(y)
y = np_utils.to_categorical(y, num_classes=14)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
model = Sequential([
    Dense(64, activation='relu', input_shape=(256,)),
    Dense(64, activation='relu'),
    Dense(14, activation='softmax')  ])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=50, verbose=0)

<keras.callbacks.History at 0x11a7fed29e0>

In [9]:
explainer = shap.Explainer(model, X_train.values, algorithm="permutation", max_evals=1000)

shap_values = explainer.shap_values(X_test.values)

mean_abs_shap_values = np.mean(np.abs(shap_values[0]), axis=0)  

Permutation explainer: 3643it [28:09,  2.15it/s]                                                                       


Top 10 Features with Negative Impact on the Model:
Feature 4 - 0.0059
Feature 7 - 0.0052
Feature 9 - 0.0046
Feature 11 - 0.0041
Feature 13 - 0.0022
Feature 2 - 0.0019
Feature 1 - 0.0015
Feature 8 - 0.0014
Feature 10 - 0.0012
Feature 12 - 0.0011


In [10]:
sorted_feature_importances = sorted(enumerate(mean_abs_shap_values), key=lambda x: x[1], reverse=True)
top_10_negative_features = sorted_feature_importances[:60]

print("Top 10 Features with Negative Impact on the Model:")
for feature_idx, importance in top_10_negative_features:
    print(f"Feature {feature_idx} - {importance:.4f}")

Top 10 Features with Negative Impact on the Model:
Feature 4 - 0.0059
Feature 7 - 0.0052
Feature 9 - 0.0046
Feature 11 - 0.0041
Feature 13 - 0.0022
Feature 2 - 0.0019
Feature 1 - 0.0015
Feature 8 - 0.0014
Feature 10 - 0.0012
Feature 12 - 0.0011
Feature 5 - 0.0010
Feature 3 - 0.0009
Feature 6 - 0.0005
Feature 0 - 0.0004


In [None]:
import shap
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

# Load a sample dataset (Iris dataset)
data = load_iris()
X = data.data
y = data.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train a neural network model (you can replace this with your own model)
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(64, activation='relu'),
    Dense(3, activation='softmax')  # 3 classes for Iris dataset
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=50, verbose=0)

# Initialize the SHAP explainer with a higher max_evals value
explainer = shap.Explainer(model, X_train, algorithm="permutation", max_evals=1000)

# Get SHAP values for all samples in the test set
shap_values = explainer.shap_values(X_test)

# Calculate the mean absolute SHAP value for each feature
mean_abs_shap_values = np.mean(np.abs(shap_values[0]), axis=0)  # Use shap_values[0] for the first class

# Sort and print the top 10 features that negatively impact the model
sorted_feature_importances = sorted(enumerate(mean_abs_shap_values), key=lambda x: x[1], reverse=True)
top_10_negative_features = sorted_feature_importances[:10]

print("Top 10 Features with Negative Impact on the Model:")
for feature_idx, importance in top_10_negative_features:
    print(f"Feature {feature_idx} - {importance:.4f}")



Top 10 Features with Negative Impact on the Model:
Feature 1 - 0.1329
Feature 0 - 0.0861
Feature 2 - 0.0676


In [None]:
sample_idx = 1
sample = X_test.values[sample_idx]

In [None]:
explainer = lime.lime_tabular.LimeTabularExplainer(X_train.values, mode="classification")
explanation = explainer.explain_instance(sample, model.predict)



In [None]:
aggregate_importances = np.zeros(X_train.shape[1])

for class_index in range(num_classes):
    try:
        # Get feature importances for the class (if available)
        class_importances = explanation.as_list(label=class_index)
        # Aggregate feature importances for the class
        if class_importances:
            for _, importance in class_importances:
                # Aggregate importance values directly
                aggregate_importances += np.abs(importance)
    except KeyError:
        continue

# Sort and print the top 10 features that negatively impact the model
sorted_feature_importances = sorted(enumerate(aggregate_importances), key=lambda x: x[1], reverse=True)
top_10_negative_features = sorted_feature_importances[:300]

print("Top 10 Features with Negative Impact on the Model:")
for feature_idx, importance in top_10_negative_features:
    print(f"Feature {feature_idx} - {importance:.4f}")

Top 10 Features with Negative Impact on the Model:
Feature 0 - 0.5835
Feature 1 - 0.5835
Feature 2 - 0.5835
Feature 3 - 0.5835
Feature 4 - 0.5835
Feature 5 - 0.5835
Feature 6 - 0.5835
Feature 7 - 0.5835
Feature 8 - 0.5835
Feature 9 - 0.5835
Feature 10 - 0.5835
Feature 11 - 0.5835
Feature 12 - 0.5835
Feature 13 - 0.5835
Feature 14 - 0.5835
Feature 15 - 0.5835
Feature 16 - 0.5835
Feature 17 - 0.5835
Feature 18 - 0.5835
Feature 19 - 0.5835
Feature 20 - 0.5835
Feature 21 - 0.5835
Feature 22 - 0.5835
Feature 23 - 0.5835
Feature 24 - 0.5835
Feature 25 - 0.5835
Feature 26 - 0.5835
Feature 27 - 0.5835
Feature 28 - 0.5835
Feature 29 - 0.5835
Feature 30 - 0.5835
Feature 31 - 0.5835
Feature 32 - 0.5835
Feature 33 - 0.5835
Feature 34 - 0.5835
Feature 35 - 0.5835
Feature 36 - 0.5835
Feature 37 - 0.5835
Feature 38 - 0.5835
Feature 39 - 0.5835
Feature 40 - 0.5835
Feature 41 - 0.5835
Feature 42 - 0.5835
Feature 43 - 0.5835
Feature 44 - 0.5835
Feature 45 - 0.5835
Feature 46 - 0.5835
Feature 47 - 0.5835

In [None]:
lime_feature_importances = explanation.as_map()[1]
sorted_feature_importances = sorted(lime_feature_importances, key=lambda x: x[1])

In [None]:
aggregate_importances = np.zeros(X_train.shape[1])
sorted_feature_importances = sorted(enumerate(aggregate_importances), key=lambda x: x[1])

In [None]:
negative_features = sorted_feature_importances[:10]

In [None]:
print("Top 10 Features with Negative Impact on the Model:")
for feature_idx, importance in negative_features:
    print(f"Feature {feature_idx} - {importance:.4f}")

Top 10 Features with Negative Impact on the Model:
Feature 0 - 0.0000
Feature 1 - 0.0000
Feature 2 - 0.0000
Feature 3 - 0.0000
Feature 4 - 0.0000
Feature 5 - 0.0000
Feature 6 - 0.0000
Feature 7 - 0.0000
Feature 8 - 0.0000
Feature 9 - 0.0000


In [None]:
for feature, importance in negative_features:
  print(f"{df.feature_names[feature]}-{importance: .4f}")

AttributeError: ignored

In [None]:
import numpy as np
import lime
import lime.lime_tabular
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

# Load a sample dataset (Iris dataset)
data = load_iris()
X = data.data
y = data.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train a neural network model (you can replace this with your own model)
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(64, activation='relu'),
    Dense(3, activation='softmax')  # 3 classes for Iris dataset
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=50, verbose=0)

# Initialize the LIME explainer
explainer = lime.lime_tabular.LimeTabularExplainer(X_train, mode="classification")

# Select a sample from the test data for explanation
sample_idx = 0
sample = X_test[sample_idx]

# Initialize an array to store aggregate importances
aggregate_importances = np.zeros(X_train.shape[1])

# Loop through class indices and aggregate feature importances
for class_index in range(len(data.target_names)):
    try:
        # Explain the model's prediction for the selected sample and class
        explanation = explainer.explain_instance(sample, model.predict, labels=[class_index])
        # Get feature importances for the class (if available)
        class_importances = explanation.as_list(label=class_index)
        # Aggregate feature importances for the class
        if class_importances:
            for feature, importance in class_importances:
                feature_index = int(feature.split()[0])  # Extract the feature index provided by LIME
                aggregate_importances[feature_index] += importance
    except KeyError:
        continue

# Sort and print the top 10 features that negatively impact the model
sorted_feature_importances = sorted(enumerate(aggregate_importances), key=lambda x: x[1])
top_10_negative_features = sorted_feature_importances[:10]

print("Top 10 Features with Negative Impact on the Model:")
for feature_idx, importance in top_10_negative_features:
    print(f"Feature {feature_idx} - {importance:.4f}")





ValueError: ignored