In [1]:
import tensorflow as tf
tf.get_logger().setLevel(40) # suppress deprecation messages
tf.compat.v1.disable_v2_behavior() 
from tensorflow.keras.layers import Dense, Dropout, Input
from tensorflow.keras.models import Model
from tensorflow.keras.utils import to_categorical
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import os
from sklearn.preprocessing import OneHotEncoder
from time import time
from alibi.datasets import fetch_adult
from alibi.explainers import CounterfactualProto
from alibi.utils import ohe_to_ord, ord_to_ohe

In [20]:
adult = fetch_adult()
data = adult.data
print(data[:2])
target = adult.target
feature_names = adult.feature_names
category_map_tmp = adult.category_map
print(category_map_tmp)
target_names = adult.target_names

[[  39    7    1    1    1    1    4    1 2174    0   40    9]
 [  50    6    1    0    8    0    4    1    0    0   13    9]]
{1: ['?', 'Federal-gov', 'Local-gov', 'Never-worked', 'Private', 'Self-emp-inc', 'Self-emp-not-inc', 'State-gov', 'Without-pay'], 2: ['Associates', 'Bachelors', 'Doctorate', 'Dropout', 'High School grad', 'Masters', 'Prof-School'], 3: ['Married', 'Never-Married', 'Separated', 'Widowed'], 4: ['?', 'Admin', 'Blue-Collar', 'Military', 'Other', 'Professional', 'Sales', 'Service', 'White-Collar'], 5: ['Husband', 'Not-in-family', 'Other-relative', 'Own-child', 'Unmarried', 'Wife'], 6: ['Amer-Indian-Eskimo', 'Asian-Pac-Islander', 'Black', 'Other', 'White'], 7: ['Female', 'Male'], 11: ['?', 'British-Commonwealth', 'China', 'Euro_1', 'Euro_2', 'Latin-America', 'Other', 'SE-Asia', 'South-America', 'United-States', 'Yugoslavia']}


In [3]:
def set_seed(s=0):
    np.random.seed(s)
    tf.random.set_seed(s)

In [4]:
set_seed()
data_perm = np.random.permutation(np.c_[data, target])
X = data_perm[:,:-1]
y = data_perm[:,-1]

In [11]:
idx = 30000
y_train, y_test = y[:idx], y[idx:]

In [12]:
X = np.c_[X[:, 1:8], X[:, 11], X[:, 0], X[:, 8:11]]

Adjust feature_names and category_map as well:

In [13]:
feature_names = feature_names[1:8] + feature_names[11:12] + feature_names[0:1] + feature_names[8:11]
print(feature_names)

['Workclass', 'Education', 'Marital Status', 'Occupation', 'Relationship', 'Race', 'Sex', 'Country', 'Age', 'Capital Gain', 'Capital Loss', 'Hours per week']


In [22]:
category_map = {}
for i, (_, v) in enumerate(category_map_tmp.items()):
    category_map[i] = v
print(category_map)

{0: ['?', 'Federal-gov', 'Local-gov', 'Never-worked', 'Private', 'Self-emp-inc', 'Self-emp-not-inc', 'State-gov', 'Without-pay'], 1: ['Associates', 'Bachelors', 'Doctorate', 'Dropout', 'High School grad', 'Masters', 'Prof-School'], 2: ['Married', 'Never-Married', 'Separated', 'Widowed'], 3: ['?', 'Admin', 'Blue-Collar', 'Military', 'Other', 'Professional', 'Sales', 'Service', 'White-Collar'], 4: ['Husband', 'Not-in-family', 'Other-relative', 'Own-child', 'Unmarried', 'Wife'], 5: ['Amer-Indian-Eskimo', 'Asian-Pac-Islander', 'Black', 'Other', 'White'], 6: ['Female', 'Male'], 7: ['?', 'British-Commonwealth', 'China', 'Euro_1', 'Euro_2', 'Latin-America', 'Other', 'SE-Asia', 'South-America', 'United-States', 'Yugoslavia']}


In [15]:
cat_vars_ord = {}
n_categories = len(list(category_map.keys()))
for i in range(n_categories):
    cat_vars_ord[i] = len(np.unique(X[:, i]))
print(cat_vars_ord)


{0: 9, 1: 7, 2: 4, 3: 9, 4: 6, 5: 5, 6: 2, 7: 11}


In [17]:
print(ord_to_ohe(X, cat_vars_ord)[0][:2])
cat_vars_ohe = ord_to_ohe(X, cat_vars_ord)[1]
print(cat_vars_ohe)

[[ 0.  0.  0.  0.  1.  0.  0.  0.  0.  0.  0.  0.  0.  1.  0.  0.  0.  0.
   1.  0.  0.  1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.  0.  0.
   0.  0.  0.  1.  1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.  0. 27.
   0.  0. 44.]
 [ 0.  0.  0.  0.  1.  0.  0.  0.  0.  0.  1.  0.  0.  0.  0.  0.  0.  1.
   0.  0.  0.  0.  0.  0.  0.  1.  0.  0.  0.  0.  1.  0.  0.  0.  0.  0.
   0.  0.  0.  1.  1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.  0. 27.
   0.  0. 40.]]
{0: 9, 9: 7, 16: 4, 20: 9, 29: 6, 35: 5, 40: 2, 42: 11}


In [23]:
X_num = X[:, -4:].astype(np.float32, copy=False)
xmin, xmax = X_num.min(axis=0), X_num.max(axis=0)
rng = (-1., 1.)
X_num_scaled = (X_num - xmin) / (xmax - xmin) * (rng[1] - rng[0]) + rng[0]

Apply OHE to categorical variables:

In [24]:
X_cat = X[:, :-4].copy()
ohe = OneHotEncoder(categories='auto', sparse=False).fit(X_cat)
X_cat_ohe = ohe.transform(X_cat)
print(X_cat_ohe[:2])

[[0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 1. 0. 0. 1. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.
  0. 1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 1. 0.]]


Combine numerical and categorical data:

In [25]:
X = np.c_[X_cat_ohe, X_num_scaled].astype(np.float32, copy=False)
X_train, X_test = X[:idx, :], X[idx:, :]
print(X_train.shape, X_test.shape)

(30000, 57) (2561, 57)


In [52]:
def fwd():
    x_in = Input(shape=(57,))
    x = Dense(60, activation='relu')(x_in)
    x = Dropout(.2)(x)
    x = Dense(60, activation='relu')(x)
    x = Dropout(.2)(x)
    x = Dense(60, activation='relu')(x)
    x = Dropout(.2)(x)
    x_out = Dense(2, activation='softmax')(x)
    nn = Model(inputs=x_in, outputs=x_out)
    nn.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return nn

In [53]:
set_seed()
nn = fwd()
nn.summary()
nn.fit(X_train, to_categorical(y_train), batch_size=256, epochs=30, verbose=0)

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 57)]              0         
                                                                 
 dense_4 (Dense)             (None, 60)                3480      
                                                                 
 dropout_3 (Dropout)         (None, 60)                0         
                                                                 
 dense_5 (Dense)             (None, 60)                3660      
                                                                 
 dropout_4 (Dropout)         (None, 60)                0         
                                                                 
 dense_6 (Dense)             (None, 60)                3660      
                                                                 
 dropout_5 (Dropout)         (None, 60)                0   

<keras.callbacks.History at 0x1120a250400>

In [40]:
X = X_test[0].reshape((1,) + X_test[0].shape)

In [55]:
shape = X.shape
beta = .01
c_init = 1.
c_steps = 5
max_iterations = 500
rng = (-1., 1.) # scale features between -1 and 1
rng_shape = (1,) + data.shape[1:]
feature_range = ((np.ones(rng_shape) * rng[0]).astype(np.float32),
(np.ones(rng_shape) * rng[1]).astype(np.float32))


In [42]:
def set_seed(s=0):
    np.random.seed(s)
    tf.random.set_seed(s)

In [43]:
set_seed()
cf = CounterfactualProto(nn,shape,beta=beta,cat_vars=cat_vars_ohe,ohe=True,
                        max_iterations=max_iterations,feature_range=feature_range,
                        c_init=c_init,c_steps=c_steps)

In [44]:
cf.fit(X_train, d_type='abdm', disc_perc=[25, 50, 75]);

In [45]:
print(cf)

CounterfactualProto(meta={
  'name': 'CounterfactualProto',
  'type': ['blackbox', 'tensorflow', 'keras'],
  'explanations': ['local'],
  'params': {
              'kappa': 0.0,
              'beta': 0.01,
              'gamma': 0.0,
              'theta': 0.0,
              'cat_vars': {
                            0: 9,
                            9: 7,
                            16: 4,
                            20: 9,
                            29: 6,
                            35: 5,
                            40: 2,
                            42: 11}
                          ,
              'ohe': True,
              'use_kdtree': False,
              'learning_rate_init': 0.01,
              'max_iterations': 500,
              'c_init': 1.0,
              'c_steps': 5,
              'eps': (0.001, 0.001),
              'clip': (-1000.0, 1000.0),
              'update_num_grad': 1,
              'write_dir': None,
              'feature_range': (array([[-1., -1., -1., -1.

In [46]:
explanation = cf.explain(X)
print(explanation.data.keys())

dict_keys(['cf', 'all', 'orig_class', 'orig_proba', 'id_proto'])


In [51]:
print(explanation.data["cf"].keys())

dict_keys(['X', 'class', 'proba', 'grads_graph', 'grads_num'])


In [26]:
def describe_instance(X, explanation, eps=1e-2):
    print('Original instance: {} -- proba: {}'.format(target_names[explanation.orig_class],
           explanation.orig_proba[0]))
    print('Counterfactual instance: {} -- proba: {}'.format(target_names[explanation.cf['class']],
    explanation.cf['proba'][0]))
    print('\nCounterfactual perturbations...')
    print('\nCategorical:')
    X_orig_ord = ohe_to_ord(X, cat_vars_ohe)[0]
    X_cf_ord = ohe_to_ord(explanation.cf['X'], cat_vars_ohe)[0]
    delta_cat = {}
    for i, (_, v) in enumerate(category_map.items()):
        cat_orig = v[int(X_orig_ord[0, i])]
        cat_cf = v[int(X_cf_ord[0, i])]
        if cat_orig != cat_cf:
            delta_cat[feature_names[i]] = [cat_orig, cat_cf]
    if delta_cat:
        for k, v in delta_cat.items():
            print('{}: {} --> {}'.format(k, v[0], v[1]))
    print('\nNumerical:')
    delta_num = X_cf_ord[0, -4:] - X_orig_ord[0, -4:]
    n_keys = len(list(cat_vars_ord.keys()))
    for i in range(delta_num.shape[0]):
        if np.abs(delta_num[i]) > eps:
            print('{}: {:.2f} --> {:.2f}'.format(feature_names[i+n_keys],
                                   X_orig_ord[0,i+n_keys],
                                   X_cf_ord[0,i+n_keys]))

In [27]:
describe_instance(X, explanation)

Original instance: <=50K -- proba: [0.75982 0.24018]
Counterfactual instance: >50K -- proba: [0.42440462 0.5755954 ]

Counterfactual perturbations...

Categorical:
Education: Associates --> Bachelors

Numerical:


In [28]:
set_seed()
cf.fit(X_train, d_type='mvdm')
explanation = cf.explain(X)
describe_instance(X, explanation)

Original instance: <=50K -- proba: [0.75982 0.24018]
Counterfactual instance: >50K -- proba: [0.42440462 0.5755954 ]

Counterfactual perturbations...

Categorical:
Education: Associates --> Bachelors

Numerical:


In [29]:
use_kdtree = True
theta = 10. # weight of prototype loss term

In [30]:
set_seed()
X = X_test[7].reshape((1,) + X_test[0].shape)
cf = CounterfactualProto(nn,shape,beta=beta,theta=theta,cat_vars=cat_vars_ohe,ohe=True,
             use_kdtree=use_kdtree,max_iterations=max_iterations,feature_range=feature_range,
             c_init=c_init,c_steps=c_steps)
cf.fit(X_train, d_type='abdm')
explanation = cf.explain(X)
describe_instance(X, explanation)

No encoder specified. Using k-d trees to represent class prototypes.


Original instance: <=50K -- proba: [0.54038644 0.4596135 ]
Counterfactual instance: >50K -- proba: [0.49809897 0.50190103]

Counterfactual perturbations...

Categorical:

Numerical:
Age: -0.53 --> -0.50


In [31]:
def ae_model():
    # encoder
    x_in = Input(shape=(57,))
    x = Dense(60, activation='relu')(x_in)
    x = Dense(30, activation='relu')(x)
    x = Dense(15, activation='relu')(x)
    encoded = Dense(10, activation=None)(x)
    encoder = Model(x_in, encoded)
    # decoder
    dec_in = Input(shape=(10,))
    x = Dense(15, activation='relu')(dec_in)
    x = Dense(30, activation='relu')(x)
    x = Dense(60, activation='relu')(x)
    decoded = Dense(57, activation=None)(x)
    decoder = Model(dec_in, decoded)
    # autoencoder = encoder + decoder
    x_out = decoder(encoder(x_in))
    autoencoder = Model(x_in, x_out)
    autoencoder.compile(optimizer='adam', loss='mse')
    return autoencoder, encoder, decoder

In [32]:
set_seed()
ae, enc, dec = ae_model()
ae.summary()
ae.fit(X_train, X_train, batch_size=128, epochs=100, validation_data=(X_test, X_test),verbose=0)

Model: "model_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 57)]              0         
                                                                 
 model_1 (Functional)        (None, 10)                5935      
                                                                 
 model_2 (Functional)        (None, 57)                5982      
                                                                 
Total params: 11,917
Trainable params: 11,917
Non-trainable params: 0
_________________________________________________________________


`Model.state_updates` will be removed in a future version. This property should not be used in TensorFlow 2.0, as `updates` are applied automatically.


<keras.callbacks.History at 0x20fc38a15b0>

In [33]:
beta = .1 # L1
gamma = 10. # autoencoder
theta = .1 # prototype

In [34]:
 set_seed()
X = X_test[19].reshape((1,) + X_test[0].shape)
cf = CounterfactualProto(nn,shape,beta=beta,enc_model=enc,ae_model=ae,gamma=gamma,theta=theta,
                 cat_vars=cat_vars_ohe,ohe=True,max_iterations=max_iterations,feature_range=feature_range,
                 c_init=c_init,c_steps=c_steps)
cf.fit(X_train, d_type='abdm')
explanation = cf.explain(X)
describe_instance(X, explanation)

`Model.state_updates` will be removed in a future version. This property should not be used in TensorFlow 2.0, as `updates` are applied automatically.


Original instance: >50K -- proba: [0.46606535 0.53393465]
Counterfactual instance: <=50K -- proba: [0.7388538  0.26114613]

Counterfactual perturbations...

Categorical:
Education: High School grad --> Dropout

Numerical:


In [35]:
use_kdtree = True
theta = 10. # weight of prototype loss term

In [37]:
set_seed()
X = X_test[24].reshape((1,) + X_test[0].shape)
# define predict function
predict_fn = lambda x: nn.predict(x)
cf = CounterfactualProto(predict_fn,shape,beta=beta,theta=theta,cat_vars=cat_vars_ohe,
                   ohe=True,use_kdtree=use_kdtree,max_iterations=max_iterations,
                   feature_range=feature_range,c_init=c_init,c_steps=c_steps)
cf.fit(X_train, d_type='abdm')
explanation = cf.explain(X)
describe_instance(X, explanation)

No encoder specified. Using k-d trees to represent class prototypes.


Original instance: >50K -- proba: [0.21370363 0.7862964 ]
Counterfactual instance: <=50K -- proba: [0.50010055 0.49989948]

Counterfactual perturbations...

Categorical:

Numerical:
Hours per week: -0.20 --> -0.54
