# SHAP Interpretation

In [None]:
import shap
import pandas as pd
import keras
from sklearn.preprocessing import *
import numpy as np
Minmaxsc  = MinMaxScaler(feature_range=(0, 1))
Minmaxsc2  = MinMaxScaler(feature_range=(0, 1))
Stdsc  = StandardScaler()
Stdsc2  = StandardScaler()
MAsc  = MaxAbsScaler()
MAsc2  = MaxAbsScaler()
Rsc  = RobustScaler()
Rsc2  = RobustScaler()

In [None]:
database=pd.read_csv('processed_database.csv')
data_output_full=database.iloc[:,1]
data_input_full=database.iloc[:,2:]
data_input_full_ANN=Stdsc.fit_transform(data_input_full)
data_output_full_ANN=Stdsc2.fit_transform(np.array(data_output_full).reshape(-1,1))

In [None]:
data_input_full_ANN_for_shap=pd.DataFrame(data_input_full_ANN,columns=data_input_full.columns)

In [None]:
model_l=keras.models.load_model("./Neutral_HER/ANN_model_l.h5")
model_m=keras.models.load_model("./Neutral_HER/ANN_model_m.h5")
model_h=keras.models.load_model("./Neutral_HER/ANN_model_h.h5")

In [None]:
from keras.models import load_model
from sklearn.base import BaseEstimator, RegressorMixin
from sklearn.inspection import plot_partial_dependence
from sklearn.utils.validation import check_is_fitted

# Define a new class that inherits from sklearn's base estimator classes.
class KerasPDPWrapper(BaseEstimator, RegressorMixin):
    def __init__(self, model):
        self.model = model
        self.fitted_ = True  # Indicate that the model is already fitted
    def fit(self, X, y=None):
        # Do nothing, the model is already trained
        return self
    def predict(self, X, y=None):
        return self.model.predict(X)

# Create an instance of the wrapper with the Keras model.
PDP_wrapped_model_l = KerasPDPWrapper(model_l)
PDP_wrapped_model_m = KerasPDPWrapper(model_m)
PDP_wrapped_model_h = KerasPDPWrapper(model_h)

In [None]:
from sklearn.inspection import partial_dependence
import matplotlib.pyplot as plt
def pdp_average_plot_2d(features):


    # Compute the partial dependence for each model
    pdp_l, axes_l = partial_dependence(PDP_wrapped_model_l, data_input_full, features)
    pdp_m, axes_m = partial_dependence(PDP_wrapped_model_m, data_input_full, features)
    pdp_h, axes_h = partial_dependence(PDP_wrapped_model_h, data_input_full, features)

    # Average the partial dependence results
    average_pdp = (pdp_l + pdp_m + pdp_h) / 3

    # Plot the averaged results
    fig, ax = plt.subplots(figsize=(10, 8))
    contour = ax.contourf(axes_l[0], axes_l[1], average_pdp[0].T, alpha=0.75)
    ax.set_xlabel(features[0])
    ax.set_ylabel(features[1])
    fig.colorbar(contour)
    plt.show()


In [None]:
# compute SHAP values for each model
explainer_l = shap.Explainer(model_l, data_input_full_ANN)
shap_values_l = explainer_l(data_input_full_ANN_for_shap)

explainer_m = shap.Explainer(model_m, data_input_full_ANN)
shap_values_m = explainer_m(data_input_full_ANN_for_shap)

explainer_h = shap.Explainer(model_h, data_input_full_ANN)
shap_values_h = explainer_h(data_input_full_ANN_for_shap)

# Extract the values from the 64th feature onwards
sliced_values_l = shap_values_l.values[:, 64:]
sliced_values_m = shap_values_m.values[:, 64:]
sliced_values_h = shap_values_h.values[:, 64:]

average_sliced_values_full = (shap_values_l+shap_values_m+shap_values_h)/3
# Average the SHAP values
average_sliced_values = (sliced_values_l + sliced_values_m + sliced_values_h) / 3

# Create a new Explanation object with the averaged values
average_shap_values = shap.Explanation(values=average_sliced_values, data=shap_values_l.data[:, 64:], feature_names=shap_values_l.feature_names[64:])

# Calculate the absolute mean
cohorts = average_shap_values.cohorts(2).abs.mean(0)

shap.summary_plot(average_shap_values,feature_names=list(data_input_full.columns)[64:],max_display=13,alpha=0.5,plot_size=[6,6])

shap.summary_plot(average_shap_values,feature_names=list(data_input_full.columns)[64:],max_display=100,alpha=0.5)