In [None]:
import os
cwd = os.getcwd()
nwd = os.path.dirname(cwd)
os.chdir(nwd)

In [None]:
%load_ext autoreload
%autoreload 2
import numpy as np
from sklearn.linear_model import LinearRegression
from ghostSHAP.ghost_shap import GhostShap
import shap

In [None]:
# Generate data to train the model
np.random.seed(seed=5)
n = 10000
intercept = 0
beta_1 = 2
beta_2 = 1
beta_3 = 0
beta_13 = 1
beta_23 = 0
x_1 = np.random.normal(loc=0, scale=1, size=n)
x_2 = np.random.normal(loc=0, scale=1, size=n)
noise_3 = np.random.normal(loc=0, scale=0.1, size=n)
# Is it needed to add noise_3 to x_3?
x_3 = beta_13 * x_1 + beta_23 * x_2 + noise_3
noise_y = np.random.normal(loc=0, scale=1, size=n)
y = intercept + beta_1 * x_1 + beta_2 * x_2 + beta_3 * x_3 + noise_y
X = np.empty(shape=(n, 3))
X[:, 0] = x_1
X[:, 1] = x_2
X[:, 2] = x_3
print(X[:5, ])

In [None]:
# Generate data to test the method
np.random.seed(seed=9)
x_1_test = np.random.normal(loc=0, scale=1, size=n)
x_2_test = np.random.normal(loc=0, scale=1, size=n)
noise_3_test = np.random.normal(loc=0, scale=0.1, size=n)
# Is it needed to add noise to x_3_test?
x_3_test = beta_13 * x_1_test + beta_23 * x_2_test + noise_3_test
X_test = np.empty(shape=(n, 3))
X_test[:, 0] = x_1_test
X_test[:, 1] = x_2_test
X_test[:, 2] = x_3_test
print(np.mean(X_test, axis=0))
np.mean(x_1_test)

In [None]:
# Train the model
linear_model = LinearRegression(fit_intercept=True).fit(X, y)

In [None]:
print('Intercept:', linear_model.intercept_)
print('Parameters:', linear_model.coef_)

In [None]:
# New point to explain
x_new = np.array([[1, 10, 1]])

In [None]:
# Use ghost Shap
ghost_shap = GhostShap(
    predict_fn=linear_model.predict,
    data=X_test,
    x_test=x_new
)
importance = ghost_shap.get_importance()

In [None]:
print('importance:', importance)
print('sum(importance):', np.sum(importance))
print('prediction:', linear_model.predict(x_new))
print('prediction - mean_predicted_value:', linear_model.predict(x_new) - np.mean(linear_model.predict(X_test)))

In [None]:
# SHAP
#explainer = shap.Explainer(model)
#shap_values = explainer(X)
explainer = shap.KernelExplainer(linear_model.predict, X_test)
shap_values = explainer.explain(x_new)
print('shap_values:', shap_values)
print('sum(shap):', np.sum(shap_values))
print('prediction:', linear_model.predict(x_new))
print('prediction - mean_predicted_value:', linear_model.predict(x_new) - np.mean(linear_model.predict(X_test)))

In [None]:
# Distance
diff_shap = [beta_1 - shap_values[0], beta_2 - shap_values[1], beta_3 - shap_values[2]]
print(diff_shap)
np.sum(np.square(diff_shap))

In [None]:
import xgboost
import shap

In [None]:

# train an XGBoost model
X, y = shap.datasets.boston()
model = xgboost.XGBRegressor().fit(X, y)

# explain the model's predictions using SHAP
# (same syntax works for LightGBM, CatBoost, scikit-learn, transformers, Spark, etc.)
explainer = shap.Explainer(model)
shap_values = explainer(X)

# visualize the first prediction's explanation
#shap.plots.waterfall(shap_values[0])

In [None]:
shap_val = shap_values[0]
shap_val = shap_val.values
print('np.sum(shap_val):', np.sum(shap_val))
pred_all = model.predict(X)
pred_element = pred_all[0]
mean_pred_val = np.mean(pred_all)
print('pred_element:', pred_element)
print('mean_pred_val:', mean_pred_val)
print(pred_element - mean_pred_val)
