In [8]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from xgboost import XGBClassifier
from sklearn.metrics import roc_auc_score, r2_score, accuracy_score, f1_score, precision_score, recall_score, log_loss

In [3]:
from explainerdashboard.explainers import *
from explainerdashboard.dashboards import *
from explainerdashboard.datasets import *

In [4]:
feature_descriptions = {
    "Sex": "Gender of passenger",
    "Deck": "The deck the passenger had their cabin on",
    "PassengerClass": "The class of the ticket: 1st, 2nd or 3rd class",
    "Fare": "The amount of money people paid", 
    "No_of_relatives_on_board": "number of siblings, spouses, parents plus children on board",
    "Embarked": "the port where the passenger boarded the Titanic. Either Southampton, Cherbourg or Queenstown",
    "Age": "Age of the passenger",
    "No_of_siblings_plus_spouses_on_board": "The sum of the number of siblings plus the number of spouses on board",
    "No_of_parents_plus_children_on_board" : "The sum of the number of parents plus the number of children on board",
}

# RandomForestClassifier dashboard

In [5]:
X_train, y_train, X_test, y_test = titanic_survive()
train_names, test_names = titanic_names()

model = RandomForestClassifier(n_estimators=50, max_depth=5)
model.fit(X_train, y_train)

explainer = RandomForestClassifierBunch(model, X_test, y_test, metric=roc_auc_score, 
                               cats=['Sex', 'Deck', 'Embarked'],
                               idxs=test_names, #names of passengers 
                               descriptions=feature_descriptions,
                               labels=['Not survived', 'Survived'])

In [10]:
fig = explainer.plot_confusion_matrix()

In [70]:
explainer.plot_interactions("Sex", cats=True)

In [26]:
explainer.get_col("Deck")

0           C
1           C
2      Unkown
3      Unkown
4      Unkown
        ...  
195    Unkown
196    Unkown
197         B
198    Unkown
199    Unkown
Length: 200, dtype: object

In [27]:
explainer.get_col("Fare")

0      71.2833
1      53.1000
2      21.0750
3      11.1333
4      30.0708
        ...   
195     7.2292
196    11.1333
197     5.0000
198     9.8458
199    13.0000
Name: Fare, Length: 200, dtype: float64

In [41]:
isinstance(explainer.pred_percentiles, str)

False

In [53]:
explainer.contrib_df(0)

Unnamed: 0,col,contribution,value,cumulative,base
0,base_value,0.388191,-,0.388191,0.0
1,Sex,0.276826,female,0.665017,0.388191
2,PassengerClass,0.068943,1,0.73396,0.665017
3,Fare,0.060766,71.2833,0.794726,0.73396
4,Deck,0.058431,C,0.853157,0.794726
5,Embarked,0.028445,Cherbourg,0.881602,0.853157
6,No_of_relatives_on_board,0.010288,1,0.89189,0.881602
7,No_of_siblings_plus_spouses_on_board,0.009293,1,0.901183,0.89189
8,Age,-0.002969,38,0.898214,0.901183
9,No_of_parents_plus_children_on_board,0.002655,0,0.900869,0.898214


In [51]:
isinstance(explainer.get_pdp_result("Fare"), pdpbox.pdp.PDPIsolate)

True

In [45]:
isinstance(explainer.contrib_summary_df(0), pd.DataFrame)

True

In [15]:
import plotly.graph_objs as go

In [18]:
isinstance(fig, go.Figure)

True

In [9]:
db = ExplainerDashboard(explainer,
                        model_summary=True,
                        contributions=True,
                        shap_dependence=True,
                        shap_interaction=True,
                        decision_trees=True)
db.run(8052)

Running Model Explainer on http://localhost:8052
 * Serving Flask app "explainerdashboard.dashboards" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: off


 * Running on http://127.0.0.1:8052/ (Press CTRL+C to quit)
127.0.0.1 - - [15/May/2020 12:51:08] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [15/May/2020 12:51:09] "GET /_dash-dependencies HTTP/1.1" 200 -
127.0.0.1 - - [15/May/2020 12:51:09] "GET /_dash-layout HTTP/1.1" 200 -
127.0.0.1 - - [15/May/2020 12:51:09] "POST /_dash-update-component HTTP/1.1" 200 -
127.0.0.1 - - [15/May/2020 12:51:09] "POST /_dash-update-component HTTP/1.1" 200 -
127.0.0.1 - - [15/May/2020 12:51:09] "POST /_dash-update-component HTTP/1.1" 200 -
127.0.0.1 - - [15/May/2020 12:51:09] "POST /_dash-update-component HTTP/1.1" 204 -
127.0.0.1 - - [15/May/2020 12:51:09] "POST /_dash-update-component HTTP/1.1" 200 -
127.0.0.1 - - [15/May/2020 12:51:09] "POST /_dash-update-component HTTP/1.1" 204 -
127.0.0.1 - - [15/May/2020 12:51:09] "POST /_dash-update-component HTTP/1.1" 200 -
127.0.0.1 - - [15/May/2020 12:51:09] "POST /_dash-update-component HTTP/1.1" 204 -
127.0.0.1 - - [15/May/2020 12:51:09] "POST /_dash-update-component

# RandomForestRegressor example

In [54]:
X_train, y_train, X_test, y_test = titanic_fare()
train_names, test_names = titanic_names()

model = RandomForestRegressor(n_estimators=50, max_depth=5)
model.fit(X_train, y_train)

explainer = RandomForestRegressionBunch(model, X_test, y_test, r2_score, 
                               cats=['Sex', 'Deck', 'Embarked'],
                               idxs=test_names, units="$")

In [68]:
explainer.plot_interactions("Sex_female")

In [57]:
X_train.columns

Index(['Survived', 'No_of_relatives_on_board', 'Age', 'PassengerClass',
       'No_of_siblings_plus_spouses_on_board',
       'No_of_parents_plus_children_on_board', 'Sex_female', 'Sex_male',
       'Sex_nan', 'Deck_A', 'Deck_B', 'Deck_C', 'Deck_D', 'Deck_E', 'Deck_F',
       'Deck_G', 'Deck_T', 'Deck_Unkown', 'Embarked_Cherbourg',
       'Embarked_Queenstown', 'Embarked_Southamption', 'Embarked_Unknown'],
      dtype='object')

In [62]:
type(int(explainer.random_index()))

int

In [None]:
db = ExplainerDashboard(explainer,
                        model_summary=True,
                        contributions=True,
                        shap_dependence=True,
                        shap_interaction=True,
                        decision_trees=True)
db.run(8052)

# XGBClassifier example

In [None]:
X_train, y_train, X_test, y_test = titanic_survive()
train_names, test_names = titanic_names()

xgb_model = XGBClassifier()
xgb_model.fit(X_train, y_train)

explainer = ClassifierBunch(xgb_model, X_test, y_test, roc_auc_score, shap='tree',
                                   cats=['Sex', 'Deck', 'Embarked'],
                                   idxs=test_names, #names of passengers 
                                   labels=['Not survived', 'Survived'])

db = ExplainerDashboard(explainer,
                        model_summary=True,
                        contributions=True,
                        shap_dependence=True,
                        shap_interaction=False)
db.run(8052)