In [5]:
# Sklearn imports
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import RandomForestClassifier
import pandas as pd

# Tensorflow import
import tensorflow as tf
import os

In [6]:
from model_interfaces import base_model
import data, dice
import model as dice_model

In [7]:
dataset = pd.read_csv('csv/adultdataset.csv')

In [8]:
dataset.head()

Unnamed: 0,age,workclass,education,marital_status,occupation,race,gender,hours_per_week,income
0,28,Private,Bachelors,Single,White-Collar,White,Female,60,0
1,30,Self-Employed,Assoc,Married,Professional,White,Male,65,1
2,32,Private,Some-college,Married,White-Collar,White,Male,50,0
3,20,Private,Some-college,Single,Service,White,Female,35,0
4,41,Self-Employed,Some-college,Married,White-Collar,White,Male,50,0


In [9]:
d = data.Data(dataframe=dataset, continuous_features=['age', 'hours_per_week'], outcome_name='income')

# Split data into train and test
target = dataset["income"]
datasetX = dataset.drop("income", axis=1)
x_train, x_test, y_train, y_test = train_test_split(datasetX,
                                                    target,
                                                    test_size=0.2,
                                                    random_state=0,
                                                    stratify=target)

numerical = ["age", "hours_per_week"]
categorical = x_train.columns.difference(numerical)

#データの前処理
categorical_transformer = Pipeline(steps=[
    ('onehot', OneHotEncoder(handle_unknown='ignore'))])

transformations = ColumnTransformer(
    transformers=[
        ('cat', categorical_transformer, categorical)])

# Append classifier to preprocessing pipeline.
# Now we have a full prediction pipeline.
clf = Pipeline(steps=[('preprocessor', transformations),
                      ('classifier', RandomForestClassifier())])
model = clf.fit(x_train, y_train)

# Using sklearn backend
m = dice_model.Model(model=model, backend="sklearn")
exp = dice.Dice(d, m, method="random")

e1 = exp.generate_counterfactuals(x_train[0:1], total_CFs=2, desired_class="opposite")
e1.visualize_as_dataframe(show_only_changes=True)

### Explaining a Tensorflow model

In [10]:
# supress deprecation warnings from TF
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

backend = 'TF'+tf.__version__[0]  # TF2
#ML_modelpath = helpers.get_adult_income_modelpath(backend=backend)
model_ext = '.h5' if 'TF' in backend else '.pth'
ML_modelpath = os.path.join('utils', 'sample_trained_models', 'adult'+model_ext)

# Step 2: dice_ml.Model
m = dice_model.Model(model_path=ML_modelpath, backend=backend, model_type='regressor')
# Step 3: initiate DiCE
exp = dice.Dice(d, m)

In [11]:
# query instance in the form of a dictionary or a dataframe; keys: feature name, values: feature value
query_instance = {'age': 29,
                  'workclass': 'Private',
                  'education': 'HS-grad',
                  'marital_status': 'Married',
                  'occupation': 'Blue-Collar',
                  'race': 'White',
                  'gender': 'Female',
                  'hours_per_week': 38}

In [16]:
# generate counterfactuals
dice_exp = exp.generate_counterfactuals(query_instance, total_CFs=10,
                                       desired_class="opposite", desired_range=[0.29, 1.0],
                                       verbose=True)

step 1,  loss=0.460994
step 51,  loss=-0.590493
step 101,  loss=-0.678048
step 151,  loss=-0.616213
step 201,  loss=-0.692601
step 251,  loss=-0.623632
step 301,  loss=-0.623322
step 351,  loss=-0.695145
step 401,  loss=-0.695262
step 451,  loss=-0.69751
step 501,  loss=-0.626631
step 551,  loss=-0.697537
step 601,  loss=-0.698782
step 651,  loss=-0.698854
step 701,  loss=-0.627163
step 751,  loss=-0.626643
step 801,  loss=-0.698008
step 851,  loss=-0.698464
step 901,  loss=-0.698441
step 951,  loss=-0.629816
step 1001,  loss=-0.699571
step 1051,  loss=-0.701942
step 1101,  loss=-0.701285
step 1151,  loss=-0.63192
step 1201,  loss=-0.703
step 1251,  loss=-0.702648
step 1301,  loss=-0.702421
step 1351,  loss=-0.631179
step 1401,  loss=-0.700395
step 1451,  loss=-0.702634
step 1501,  loss=-0.701768
step 1551,  loss=-0.701198
step 1601,  loss=-0.631175
step 1651,  loss=-0.70319
step 1701,  loss=-0.700754
step 1751,  loss=-0.702376
step 1801,  loss=-0.631388
step 1851,  loss=-0.702084
step

In [17]:
# visualize the result, highlight only the changes
dice_exp.visualize_as_dataframe(show_only_changes=True)

Query instance (original outcome: 0)


Unnamed: 0,age,workclass,education,marital_status,occupation,race,gender,hours_per_week,income
0,29.0,Private,HS-grad,Married,Blue-Collar,White,Female,38.0,0.293



Diverse Counterfactual set (new outcome: 1.0)


Unnamed: 0,age,workclass,education,marital_status,occupation,race,gender,hours_per_week,income
0,32.0,Self-Employed,-,-,Professional,-,-,8.0,0.2906118333339691
1,27.0,-,-,-,Sales,Other,-,56.0,0.4982501864433288
2,17.0,Self-Employed,Some-college,-,White-Collar,-,-,40.0,0.4852104783058166
3,60.0,-,-,-,Other/Unknown,Other,Male,34.0,0.4238490164279938
4,90.0,-,School,-,-,-,-,43.0,0.3169827461242676
5,23.0,-,Masters,-,-,-,-,26.0,0.3779252767562866
6,48.0,-,Prof-school,Widowed,-,-,Male,16.0,0.2922870218753814
7,36.0,Government,Doctorate,Widowed,Service,-,-,39.0,0.6122257113456726
8,37.0,Other/Unknown,-,-,-,-,Male,49.0,0.4306431710720062
9,26.0,-,-,-,-,-,-,88.0,0.4538082778453827
