In [1]:
import fasttreeshap
import pandas as pd
import lightgbm as lgb
from sklearn.datasets import fetch_california_housing

# Output LightGBM version.

In [2]:
lgb.__version__

'3.3.2'

# Output FastTreeShap version.

In [3]:
fasttreeshap.__version__

'0.1.1'

# Retrieve and prepare data.

In [4]:
dataset = fetch_california_housing()
dataset.keys()

dict_keys(['data', 'target', 'frame', 'target_names', 'feature_names', 'DESCR'])

In [5]:
X = pd.DataFrame(dataset["data"], columns=dataset["feature_names"])
X

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude
0,8.3252,41.0,6.984127,1.023810,322.0,2.555556,37.88,-122.23
1,8.3014,21.0,6.238137,0.971880,2401.0,2.109842,37.86,-122.22
2,7.2574,52.0,8.288136,1.073446,496.0,2.802260,37.85,-122.24
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25
...,...,...,...,...,...,...,...,...
20635,1.5603,25.0,5.045455,1.133333,845.0,2.560606,39.48,-121.09
20636,2.5568,18.0,6.114035,1.315789,356.0,3.122807,39.49,-121.21
20637,1.7000,17.0,5.205543,1.120092,1007.0,2.325635,39.43,-121.22
20638,1.8672,18.0,5.329513,1.171920,741.0,2.123209,39.43,-121.32


In [6]:
y = pd.Series(dataset["target"], name=dataset["target_names"][0])
y

0        4.526
1        3.585
2        3.521
3        3.413
4        3.422
         ...  
20635    0.781
20636    0.771
20637    0.923
20638    0.847
20639    0.894
Name: MedHouseVal, Length: 20640, dtype: float64

# Train LightGBM booster using the Training API instead of the Scikit-learn API.  
### LightGBM's default for the parameter `"objective"` is `"regression"`.

In [7]:
booster = lgb.train(params={}, train_set=lgb.Dataset(data=X, label=y))
booster

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1838
[LightGBM] [Info] Number of data points in the train set: 20640, number of used features: 8
[LightGBM] [Info] Start training from score 2.068558


<lightgbm.basic.Booster at 0x7ff0fafab730>

# Predict.

In [8]:
predictions = pd.Series(booster.predict(X), name="Predictions")
predictions

0        4.275814
1        3.962843
2        4.290174
3        3.330437
4        2.511359
           ...   
20635    0.740225
20636    0.927183
20637    0.871949
20638    0.938161
20639    0.920557
Name: Predictions, Length: 20640, dtype: float64

# Instantiate `TreeExplainer`.

In [9]:
tree_explainer = fasttreeshap.TreeExplainer(model=booster, shortcut=True)
tree_explainer

<fasttreeshap.explainers._tree.Tree at 0x7ff0fafabcd0>

# Attempt explanation.

In [10]:
tree_explainer(X)

KeyError: 'objective'