In [None]:
import logging
import os
import pandas as pd
import numpy as np
import dalex as dx
import shap
import autosklearn.classification
import joblib
import matplotlib.pyplot as plt
import seaborn as sns

from datetime import datetime
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.metrics import accuracy_score, confusion_matrix

pd.options.display.max_columns = 999
RANDOM_STATE = 42

In [None]:
DATA_PATH = 'data/final_train.csv'
df = pd.read_csv(DATA_PATH, index_col=0)

In [None]:
x, y = df.drop(columns = ['Activity']), df['Activity']

In [None]:
y_ = y.astype('category').cat.codes

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=RANDOM_STATE)

In [None]:
model = joblib.load('results/automl-single-model-2021-28-09-21:28:03/model.joblib')

In [None]:
def generate_shap_values(model, x, i):
    med = x.median().values.reshape((1, x.shape[1]))
    shap_model = shap.KernelExplainer(model.predict_proba, med, )
    shap_values = shap_model.shap_values(x.iloc[:i, :], )
    return shap_model, shap_values

In [None]:
explain_observation_no = 100

In [None]:
shap_model, shap_values = generate_shap_values(model, x, explain_observation_no)

In [None]:
fig, axs = plt.subplots(6, 1)
for i in range(6):
    plt.sca(axs[i])
    shap.summary_plot(shap_values[i], x.columns, plot_type='dot',show=False)
    plt.gca().set_title(f"Activity type: {model.classes_[i]}")
fig.set_size_inches(9, 30)
fig.tight_layout()
plt.savefig(f'explain-reports/SHAP-dot.png')

In [None]:
fig, axs = plt.subplots(6, 1)
for i in range(6):
    plt.sca(axs[i])
    shap.summary_plot(shap_values[i], x.columns, plot_type='bar',show=False)
    plt.gca().set_title(f"Activity type: {model.classes_[i]}")
fig.set_size_inches(9, 30)
fig.tight_layout()
plt.savefig(f'explain-reports/SHAP-FI.png')

In [None]:
plt.figure()
shap.summary_plot(shap_values, x.columns, plot_type='bar', class_names=list(model.classes_), show=False)
plt.title('SHAP Feature Importance', size=24)
plt.tight_layout()
plt.savefig(f'explain-reports/SHAP-FI-all.png')
plt.show()

In [None]:
for j in range(6):
    if not os.path.exists(f'explain-reports/dependence/png/{model.classes_[j]}'):
        os.mkdir(f'explain-reports/dependence/png/{model.classes_[j]}')
    important_features = np.argsort(np.abs(shap_values[j]).mean(0))[-10:][::-1]
    for i, feature in enumerate(important_features):
        shap.dependence_plot(feature, shap_values[j], x.iloc[:explain_observation_no, :], show=False)        
        plt.title(f'SHAP dependece {x.columns[feature]}', size=18)
        plt.tight_layout()
        plt.savefig(f'explain-reports/dependence/png/{model.classes_[j]}/SHAP-dependence{x.columns[feature]}.png', dpi=300)
        plt.show()