In [None]:
!pip install shap

In [2]:
#This post aims to introduce how to explain the interaction values for the model's prediction by SHAP. In this post, we will use data
#NHANES I (1971-1974) from National Health and Nutrition Examaination Surve

#sito: This post aims to introduce how to explain the interaction values for the model's prediction by SHAP. In this post, we will use data NHANES I (1971-1974) from National Health and Nutrition Examaination Survey

In [3]:
import shap
import xgboost
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
%matplotlib inline

In [4]:
#Configuration

test_size = 0.2
random_state = 1

In [5]:
#Load data for NHANES I
X, y = shap.datasets.nhanesi()
X.head()

Unnamed: 0,sex_isFemale,age,physical_activity,serum_albumin,alkaline_phosphatase,alkaline_phosphatase_isUnacceptable,alkaline_phosphatase_isTestnotdone,SGOT,SGOT_isUnacceptable,SGOT_isTestnotdone,...,urine_hematest_isLarge,urine_hematest_isBlankbutapplicable,sedimentation_rate,sedimentation_rate_isBlankbutapplicable,uric_acid,uric_acid_isUnacceptable,uric_acid_isTestnotdone,systolic_blood_pressure,pulse_pressure,bmi
20751,False,51,3,,85.0,False,False,,True,False,...,False,False,,False,6.2,False,False,110.0,40.0,25.406803
20753,False,41,2,,68.0,False,False,2.27,False,False,...,False,False,,False,7.7,False,False,136.0,54.0,24.588331
20754,True,31,2,,61.0,False,False,1.86,False,False,...,False,False,,False,4.9,False,False,110.0,24.0,23.756502
20755,False,48,2,,58.0,False,False,,True,False,...,False,False,,False,8.4,False,False,126.0,36.0,28.610011
20756,True,29,3,,37.0,False,False,1.69,False,False,...,False,False,,False,4.3,False,False,98.0,36.0,19.216489


In [6]:
#Split the data into training and test

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=test_size, random_state=random_state)
xgb_train = xgboost.DMatrix(X_train, label=y_train)
xgb_test = xgboost.DMatrix(X_test, label=y_test)

In [7]:
# Model Configuration For Training
params_train = {
    "eta": 0.002,
    "max_depth": 3,
    "objective": "survival:cox",
    "subsample": 0.5
}

In [None]:
#Train a model
model_train = xgboost.train(params_train, xgb_train,
                            num_boost_round=10000,
                            evals=[(xgb_test, "test")],
                            verbose_eval=10)



In [9]:
#Create an explainer
explainer = shap.TreeExplainer(model_train)
shap_values = explainer.shap_values(X_test)

In [None]:
#Compute shap interaction values
shap_interaction_values = explainer.shap_interaction_values(X_test.iloc[:1000, :])

In [None]:
#Interaction Value Dependence
shap.dependence_plot(
    ("Age", "Sex"),
    shap_interaction_values, X_test.iloc[:1000,:],
    display_features=X_test.iloc[:1000,:])