In [1]:
import sklearn.ensemble
import shap
import numpy as np
import numba
import time
import xgboost
import lightgbm
from acv_explainers import ACVTree
from shap import TreeExplainer

# Test 1: regressor - xgboost - data: Boston  

In [2]:
X, y = shap.datasets.boston()
X = X.values
model = xgboost.XGBRegressor()
model.fit(X, y)

acvtree = ACVTree(model, X)
ex = TreeExplainer(model)

100%|██████████| 100/100 [00:01<00:00, 93.54it/s]


In [4]:
X, y = shap.datasets.adult()
X.shape

(32561, 12)

In [3]:
%%timeit
sv = acvtree.shap_values(X)

8.82 s ± 204 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [4]:
%%timeit
sv_norm = acvtree.shap_values_normalized(X)

20.4 s ± 421 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [5]:
acvtree = ACVTree(model, X, cache=True, cache_normalized=True)

In [6]:
%%timeit
sv_cache = acvtree.shap_values_cache(X)

10.6 s ± 199 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [7]:
%%timeit
sv_norm_cache = acvtree.shap_values_normalized_cache(X)

14.3 s ± 302 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [8]:
%%timeit
ex.shap_values(X)

ntree_limit is deprecated, use `iteration_range` or model slicing instead.


129 ms ± 6.91 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


# Test 2: multiclass - lightgbm - data: Iris

In [9]:
import sklearn.ensemble
import shap
import numpy as np
import numba
import time
import xgboost
import lightgbm
from acv_explainers import ACVTree
from shap import TreeExplainer

X, y = shap.datasets.iris()
X = X.values

model = lightgbm.sklearn.LGBMClassifier(num_classes=3, objective="multiclass")
model.fit(X, y)


acvtree = ACVTree(model, X)
ex = TreeExplainer(model)

In [10]:
%%timeit
sv = acvtree.shap_values(X)

11.3 s ± 350 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [11]:
%%timeit
sv_norm = acvtree.shap_values_normalized(X)

12.3 s ± 890 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [12]:
acvtree = ACVTree(model, X, cache=True, cache_normalized=True)

In [13]:
%%timeit
sv_cache = acvtree.shap_values_cache(X)

11.2 s ± 258 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [14]:
%%timeit
sv_norm_cache = acvtree.shap_values_normalized_cache(X)

11.3 s ± 330 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [15]:
%%timeit
ex.shap_values(X)

53.6 ms ± 12.5 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


# Test 3: regressor -xgboost - data: Boston  

In [8]:
import sklearn.ensemble
import shap
import numpy as np
import numba
import time
import xgboost
import lightgbm
from acv_explainers import ACVTree
from shap import TreeExplainer

X, y = shap.datasets.adult()
X = X.values
model = xgboost.XGBClassifier()
model.fit(X, y)

acvtree = ACVTree(model, X)
ex = TreeExplainer(model)

In [2]:
%%timeit
sv = acvtree.shap_values(X[:1000])

1min 4s ± 1.6 s per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [None]:
%%timeit
sv_norm = acvtree.shap_values_normalized(X[:1000])

In [3]:
acvtree = ACVTree(model, X, cache=True)

In [4]:
%%timeit
sv_cache = acvtree.shap_values_cache(X[:1000])

1min 5s ± 1.73 s per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [9]:
%%timeit
sv_norm_cache = acvtree.shap_values_normalized_cache(X[:1000])

1min 41s ± 8.38 s per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [10]:
%%timeit
ex.shap_values(X)

ntree_limit is deprecated, use `iteration_range` or model slicing instead.


3.33 s ± 39.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


# Test 4 - model: Xgboost - Toy model

In [1]:
from experiments.exp_syn import *
from xgboost import XGBRFClassifier, XGBClassifier, XGBRegressor
from acv_explainers import ACVTree
p = 0.00
n = 50000
d = 500
C = [[]]

# mean 
mean = np.zeros(d)

# Determinitist Covariance
cov = p*np.ones(shape=(d, d)) + 50*np.eye(d)


model_type = 'syn4'

coefs = 4*np.random.randn(d)
exp = ExperimentsLinear(mean=mean, cov=cov, n=n, C=C, data_type=model_type)
exp.y_train = np.argmax(exp.y_train, axis=1)
exp.y_test = np.argmax(exp.y_test, axis=1)
model = XGBClassifier()
model.fit(exp.data, exp.y_train)
print('ROC on Test = {}'.format(roc_auc_score(model.predict(exp.data_test), exp.y_test)))

pandas.core.index is deprecated and will be removed in a future version.  The public classes are available in the top-level namespace.


ROC on Test = 0.6676995651414267


In [2]:
X = exp.data[:1000]
acvtree = ACVTree(model, X)
# ex = TreeExplainer(model)

100%|██████████| 100/100 [00:04<00:00, 21.06it/s]


In [None]:
t0 = time.perf_counter()
sv = acvtree.shap_values(X[:500])
t1 = time.perf_counter()
print(t1 - t0)

In [3]:
acvtree = ACVTree(model, X, cache=True)

In [4]:
%%timeit
sv_cache = acvtree.shap_values_cache(X[:1000])

1min 5s ± 1.73 s per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [10]:
%%timeit
ex.shap_values(X)

ntree_limit is deprecated, use `iteration_range` or model slicing instead.


101 ms ± 4.54 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
