# Setting Up Model and Dataset

Based on [this DiCE example notebook](https://github.com/interpretml/DiCE/blob/78ca5391467ba12e38730e71577fbe421d9f0ba2/docs/source/notebooks/DiCE_with_advanced_options.ipynb).

In [1]:
# import DiCE
import dice_ml
from dice_ml.utils import helpers # helper functions

# Tensorflow libraries
import tensorflow as tf
from tensorflow import keras

In [2]:
dataset = helpers.load_adult_income_dataset()

In [3]:
dataset.head(6)

Unnamed: 0,age,workclass,education,marital_status,occupation,race,gender,hours_per_week,income
0,39,Government,Bachelors,Single,White-Collar,White,Male,40,0
1,50,Self-Employed,Bachelors,Married,White-Collar,White,Male,13,0
2,38,Private,HS-grad,Divorced,Blue-Collar,White,Male,40,0
3,53,Private,School,Married,Blue-Collar,Other,Male,40,0
4,28,Private,Bachelors,Married,Professional,Other,Female,40,0
5,37,Private,Masters,Married,White-Collar,White,Female,40,0


In [4]:
d = dice_ml.Data(dataframe=dataset, continuous_features=['age', 'hours_per_week'], outcome_name='income')

In [5]:
# seeding random numbers for reproducability
from numpy.random import seed
seed(1)
tf.random.set_seed(2)

In [6]:
train, _ = d.split_data(d.normalize_data(d.one_hot_encoded_data))
X_train = train.loc[:, train.columns != 'income']
y_train = train.loc[:, train.columns == 'income']

ann_model = keras.Sequential()
ann_model.add(keras.layers.Dense(20, input_shape=(X_train.shape[1],), kernel_regularizer=keras.regularizers.l1(0.001), activation=tf.nn.relu))
ann_model.add(keras.layers.Dense(1, activation=tf.nn.sigmoid))

ann_model.compile(loss='binary_crossentropy', optimizer=tf.keras.optimizers.Adam(0.01), metrics=['accuracy'])
ann_model.fit(X_train, y_train, validation_split=0.20, epochs=100, verbose=0, class_weight={0:1,1:2})
# the training will take some time for 100 epochs.
# you can wait or set verbose=1 to see the progress of training.

<tensorflow.python.keras.callbacks.History at 0x7fa9360fe748>

# Counterfactual Explorer

In [7]:
#INPUT: dataname, modelname, cont_feat, outcome_name
cont_feat=['age','hours_per_week']
outcome_name='income'

In [21]:
import counterfactual_explorer as cfe
cfe.explore(dataset,ann_model,cont_feat,outcome_name)

HBox(children=(Output(), VBox(children=(Dropdown(description='3. Query Input:', options=('From Dataset', 'Manu…

HBox(children=(Checkbox(value=False, description='4. Tune proximity/diversity?', indent=False), IntSlider(valu…

Output()

Output()

In [22]:
cfe.visualize_as_list(False)

Query instance (original outcome : 1)
[50.0, 'Self-Employed', 'Bachelors', 'Married', 'White-Collar', 'White', 'Male', 13.0, 0.7292361259460449]

Diverse Counterfactual set (new outcome : 0)
[17.0, 'Government', 'Bachelors', 'Married', 'White-Collar', 'White', 'Male', 13.0, 0.209]
[50.0, 'Self-Employed', 'Bachelors', 'Separated', 'Other/Unknown', 'Other', 'Male', 5.0, 0.189]
[50.0, 'Self-Employed', 'Assoc', 'Single', 'White-Collar', 'White', 'Male', 26.0, 0.303]
[50.0, 'Other/Unknown', 'Bachelors', 'Widowed', 'Blue-Collar', 'White', 'Male', 13.0, 0.168]
[84.0, 'Self-Employed', 'School', 'Married', 'White-Collar', 'White', 'Male', 13.0, 0.288]


In [23]:
cfe.visualize_as_df(True)

Query instance (original outcome : 1)


Unnamed: 0,age,workclass,education,marital_status,occupation,race,gender,hours_per_week,income
0,50.0,Self-Employed,Bachelors,Married,White-Collar,White,Male,13.0,0.729236



Diverse Counterfactual set (new outcome : 0)


Unnamed: 0,age,workclass,education,marital_status,occupation,race,gender,hours_per_week,income
0,17.0,Government,-,-,-,-,-,-,0.209
1,-,-,-,Separated,Other/Unknown,Other,-,5.0,0.189
2,-,-,Assoc,Single,-,-,-,26.0,0.303
3,-,Other/Unknown,-,Widowed,Blue-Collar,-,-,-,0.168
4,84.0,-,School,-,-,-,-,-,0.288


In [24]:
cfe.visualize_as_pcp()

In [25]:
cfe.visualize_as_radar()