# Dalexの練習

In [6]:
import dalex as dx

import pandas as pd
import numpy as np

from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

import warnings
warnings.filterwarnings('ignore')

# データセット準備

In [7]:
data = dx.datasets.load_titanic()

X = data.drop(columns='survived')
y = data.survived

In [8]:
data.head(10)

Unnamed: 0,gender,age,class,embarked,fare,sibsp,parch,survived
0,male,42.0,3rd,Southampton,7.11,0,0,0
1,male,13.0,3rd,Southampton,20.05,0,2,0
2,male,16.0,3rd,Southampton,20.05,1,1,0
3,female,39.0,3rd,Southampton,20.05,1,1,1
4,female,16.0,3rd,Southampton,7.13,0,0,1
5,male,25.0,3rd,Southampton,7.13,0,0,1
6,male,30.0,2nd,Cherbourg,24.0,1,0,0
7,female,28.0,2nd,Cherbourg,24.0,1,0,1
8,male,27.0,3rd,Cherbourg,18.1509,0,0,1
9,male,20.0,3rd,Southampton,7.1806,0,0,1


# 前処理、学習

In [13]:
numerical_features = ['age', 'fare', 'sibsp', 'parch']
numerical_transformer = Pipeline(
    steps=[
        ('imputer', SimpleImputer(strategy='median')),
        ('scaler', StandardScaler())
    ]
)

categorical_features = ['gender', 'class',  'embarked']
categorical_transformer = Pipeline(
    steps=[
        ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
        ('onehot', OneHotEncoder(handle_unknown='ignore'))
    ]
)

preprocessor = ColumnTransformer(
    transformers=[
      ('num', numerical_transformer, numerical_features), 
        ('cat', categorical_transformer, categorical_features)
    ]
)

classifier = MLPClassifier(hidden_layer_sizes=(150,100,50), max_iter=500, random_state=0)

clf = Pipeline(steps=[('preprocessor', preprocessor),
                      ('classifier', classifier)])

In [14]:
clf.fit(X, y)

Pipeline(memory=None,
         steps=[('preprocessor',
                 ColumnTransformer(n_jobs=None, remainder='drop',
                                   sparse_threshold=0.3,
                                   transformer_weights=None,
                                   transformers=[('num',
                                                  Pipeline(memory=None,
                                                           steps=[('imputer',
                                                                   SimpleImputer(add_indicator=False,
                                                                                 copy=True,
                                                                                 fill_value=None,
                                                                                 missing_values=nan,
                                                                                 strategy='median',
                                                             

# DalexのExplainerに学習済みモデルを入れる

In [16]:
exp = dx.Explainer(clf, X, y, label = 'Titanic MLP Pipline')

Preparation of a new explainer is initiated

  -> data              : 2207 rows 7 cols
  -> target variable   : Argument 'y' was a pandas.Series. Converted to a numpy.ndarray.
  -> target variable   : 2207 values
  -> model_class       : sklearn.pipeline.Pipeline (default)
  -> label             : Titanic MLP Pipline
  -> predict function  : <function yhat_proba_default at 0x1a27ccc9e0> will be used (default)
  -> predicted values  : min = 2.7205375671318314e-06, mean = 0.3367353380775521, max = 0.9999999997383016
  -> residual function : difference between y and yhat (default)
  -> residuals         : min = -0.921242269825129, mean = -0.01457856417632874, max = 0.9751663054089054
  -> model_info        : package sklearn

A new explainer has been created!


# Dalexで可視化

In [23]:
# johnデータ作成
john = pd.DataFrame({'gender':['male'],
                    'age':[25],
                    'class':['1st'],
                    'embarked':['Southanmton'],
                    'fare':[72],
                    'sibsp':[0],
                    'parch':[0]},
                   index=['John'])

In [24]:
# maryデータ作成
mary = pd.DataFrame({'gender': ['female'],
                     'age': [35],
                     'class': ['3st'],
                     'embarked': ['Cherbourg'],
                     'fare': [25],
                     'sibsp': [0],
                     'parch': [0]},
                     index = ['Mary'])

In [25]:
# johnデータで予測
exp.predict(john)

array([0.09901118])

In [26]:
# maryデータで予測
exp.predict(mary)

array([0.97830209])

# SHAPを確認

In [27]:
sh_john = exp.predict_parts(john, type='shap', B=10)
sh_mary = exp.predict_parts(mary, type='shap', B=10)

In [28]:
# johnの場合
sh_john.result.label = 'John'
sh_john.result.loc[sh_john.result.B == 0, ]

Unnamed: 0,variable,contribution,variable_name,variable_value,sign,label,B
3,age = 25.0,-0.010024,age,25,-1.0,John,0
6,class = 1st,-0.103544,class,1st,-1.0,John,0
1,embarked = Southanmton,-0.003777,embarked,Southanmton,-1.0,John,0
5,fare = 72.0,-0.064419,fare,72,-1.0,John,0
4,gender = male,-0.041953,gender,male,-1.0,John,0
0,parch = 0.0,-0.006476,parch,0,-1.0,John,0
2,sibsp = 0.0,-0.007531,sibsp,0,-1.0,John,0


In [32]:
#john可視化
sh_john.plot(bar_width = 20)

In [30]:
# maryの場合
sh_mary.result.label = 'Mary'
sh_mary.result.loc[sh_mary.result.B == 0, ]

Unnamed: 0,variable,contribution,variable_name,variable_value,sign,label,B
1,age = 35.0,0.069448,age,35,1.0,Mary,0
4,class = 3st,0.174596,class,3st,1.0,Mary,0
2,embarked = Cherbourg,0.078625,embarked,Cherbourg,1.0,Mary,0
3,fare = 25.0,0.051631,fare,25,1.0,Mary,0
6,gender = female,0.121247,gender,female,1.0,Mary,0
5,parch = 0.0,0.035425,parch,0,1.0,Mary,0
0,sibsp = 0.0,0.110595,sibsp,0,1.0,Mary,0


In [33]:
sh_mary.plot(bar_width = 20)