# Imports

In [127]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [3]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score

In [None]:
from explainerbunch.explainers import *
from explainerbunch.dashboards import *

# load data

In [7]:
d = pd.read_csv("titanic_data.csv")

In [8]:
d.shape
d.head().T

(891, 24)

Unnamed: 0,0,1,2,3,4
Survived,0,1,1,1,0
Fare,7.25,71.2833,7.925,53.1,8.05
Familysize,1,1,0,1,0
Age,22,38,26,35,35
Pclass,3,1,3,1,3
SibSp,1,1,0,1,0
Parch,0,0,0,0,0
Sex_female,0,1,1,1,0
Sex_male,1,0,0,0,1
Sex_nan,0,0,0,0,0


# Generate train and test set:

In [9]:
test_idxs = d.sample(200).index

X_train = d.drop(['Survived', 'Name'], axis=1)[~d.index.isin(test_idxs)]
y_train = d['Survived'][~d.index.isin(test_idxs)]
X_test = d.drop(['Survived', 'Name'], axis=1)[d.index.isin(test_idxs)]
y_test = d['Survived'][d.index.isin(test_idxs)]

X_train.shape, y_train.shape, X_test.shape, y_test.shape

((691, 22), (691,), (200, 22), (200,))

# Save onehotencoded vars en names:

In [10]:

test_names = d['Name'][d.index.isin(test_idxs)]

# Fit Random Forest model:

In [90]:
model = RandomForestClassifier(n_estimators=100)
model.fit(X_train, y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=None, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

# Build explainer object:

In [137]:
len(X_test)

200

In [141]:
explainer.precision_df(quantiles=1)

Unnamed: 0,p_min,p_max,p_avg,bin_width,precision,count
0,0.0,1.0,0.5,1.0,0.345,200


In [143]:
explainer = RandomForestClassifierBunch(model, X_test, y_test, roc_auc_score, 
                                                       idxs=d['Name'][d.index.isin(test_idxs)], #names of passengers 
                                                        cats=['Sex', 'Cabin', 'Embarked'],
                                                       labels=['Not survived', 'Survived'])

# Build dashboard object:

In [160]:
isinstance(explainer.y[0], int) or isinstance(explainer.y[0], np.int64) 

True

In [167]:
db = ClassifierDashboard(explainer,
                        contributions=True,
                        shap_dependence=True,
                        shap_interaction=True,
                        classifier_summary=True)

In [None]:
db.run(8052)

Running Model Explainer on http://localhost:8052
 * Serving Flask app "explainerbunch.dashboards" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: off


 * Running on http://127.0.0.1:8052/ (Press CTRL+C to quit)


In [14]:
db2 = RandomForestDashboard(explainer,
                        contributions=True,
                        shap_dependence=True,
                        shap_interaction=True,
                        shadow_trees=True)

In [18]:
db3 = RandomForestClassifierDashboard(explainer,
                        contributions=True,
                        shap_dependence=True,
                        shap_interaction=True,
                        shadow_trees=True,
                        classifier_summary=True)

Calculating shap values...
Generating shap TreeExplainer...


# Run dashboard:

In [None]:
db.run(8052)