# DiCE

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import pandas as pd

df = pd.read_csv("../../datasets/FICO/heloc_dataset_v1.csv")

In [3]:
TARGET = "RiskPerformance"

In [4]:
from sklearn.model_selection import train_test_split

df[TARGET] = df[TARGET].factorize()[0]

train, test = train_test_split(df, test_size=0.25, random_state=42)

In [5]:
import dice_ml
from dice_ml.utils import helpers # helper functions

In [6]:
continuous_features = list(train[df.columns[~train.columns.isin([TARGET])]].columns)

In [7]:
d = dice_ml.Data(dataframe=train, continuous_features=continuous_features, outcome_name=TARGET)

In [8]:
from xgboost.sklearn import XGBClassifier

model = XGBClassifier()

In [9]:
model.load_model("../../models/xgboost.json")

In [10]:
import utils

bounds = {name: [-20, 1000] for name in utils.data_bounds(df)['min'].keys()}

In [11]:
bounds

{'ExternalRiskEstimate': [-20, 1000],
 'MSinceOldestTradeOpen': [-20, 1000],
 'MSinceMostRecentTradeOpen': [-20, 1000],
 'AverageMInFile': [-20, 1000],
 'NumSatisfactoryTrades': [-20, 1000],
 'NumTrades60Ever2DerogPubRec': [-20, 1000],
 'NumTrades90Ever2DerogPubRec': [-20, 1000],
 'PercentTradesNeverDelq': [-20, 1000],
 'MSinceMostRecentDelq': [-20, 1000],
 'MaxDelq2PublicRecLast12M': [-20, 1000],
 'MaxDelqEver': [-20, 1000],
 'NumTotalTrades': [-20, 1000],
 'NumTradesOpeninLast12M': [-20, 1000],
 'PercentInstallTrades': [-20, 1000],
 'MSinceMostRecentInqexcl7days': [-20, 1000],
 'NumInqLast6M': [-20, 1000],
 'NumInqLast6Mexcl7days': [-20, 1000],
 'NetFractionRevolvingBurden': [-20, 1000],
 'NetFractionInstallBurden': [-20, 1000],
 'NumRevolvingTradesWBalance': [-20, 1000],
 'NumInstallTradesWBalance': [-20, 1000],
 'NumBank2NatlTradesWHighUtilization': [-20, 1000],
 'PercentTradesWBalance': [-20, 1000]}

In [17]:
X = utils.get_negative_closest(model, 0.75)

In [27]:
X = pd.DataFrame(X).T

In [28]:
m = dice_ml.Model(model=model, backend="sklearn")

In [29]:
d.get_decimal_precisions()

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

In [84]:
explainer = dice_ml.Dice(d, m, method="kdtree")

In [85]:
N=100

In [86]:
explanation = explainer.generate_counterfactuals(X, total_CFs=N, desired_class=1)

100%|██████████████████████████████████████████████████| 1/1 [00:06<00:00,  6.76s/it]


In [87]:
exp_df = explanation.visualize_as_dataframe()

Query instance (original outcome : 0)


Unnamed: 0,ExternalRiskEstimate,MSinceOldestTradeOpen,MSinceMostRecentTradeOpen,AverageMInFile,NumSatisfactoryTrades,NumTrades60Ever2DerogPubRec,NumTrades90Ever2DerogPubRec,PercentTradesNeverDelq,MSinceMostRecentDelq,MaxDelq2PublicRecLast12M,...,MSinceMostRecentInqexcl7days,NumInqLast6M,NumInqLast6Mexcl7days,NetFractionRevolvingBurden,NetFractionInstallBurden,NumRevolvingTradesWBalance,NumInstallTradesWBalance,NumBank2NatlTradesWHighUtilization,PercentTradesWBalance,RiskPerformance
0,65,43,2,26,15,1,1,100,-7,7,...,0,4,4,31,83,5,1,0,75,0



Diverse Counterfactual set (new outcome: 1.0)


Unnamed: 0,ExternalRiskEstimate,MSinceOldestTradeOpen,MSinceMostRecentTradeOpen,AverageMInFile,NumSatisfactoryTrades,NumTrades60Ever2DerogPubRec,NumTrades90Ever2DerogPubRec,PercentTradesNeverDelq,MSinceMostRecentDelq,MaxDelq2PublicRecLast12M,...,MSinceMostRecentInqexcl7days,NumInqLast6M,NumInqLast6Mexcl7days,NetFractionRevolvingBurden,NetFractionInstallBurden,NumRevolvingTradesWBalance,NumInstallTradesWBalance,NumBank2NatlTradesWHighUtilization,PercentTradesWBalance,RiskPerformance
1679,82.0,78.0,2.0,26.0,9.0,1.0,1.0,100.0,-7.0,7.0,...,0.0,1.0,1.0,5.0,83.0,2.0,1.0,0.0,60.0,1
4713,76.0,73.0,2.0,26.0,15.0,1.0,1.0,100.0,-7.0,7.0,...,4.0,1.0,1.0,21.0,83.0,5.0,1.0,0.0,54.0,1
3040,75.0,43.0,2.0,75.0,26.0,1.0,1.0,100.0,-7.0,6.0,...,0.0,4.0,4.0,19.0,83.0,5.0,5.0,0.0,64.0,1
4624,79.0,75.0,2.0,26.0,15.0,1.0,1.0,100.0,-7.0,7.0,...,0.0,1.0,1.0,20.0,83.0,2.0,3.0,0.0,56.0,1
1664,79.0,43.0,2.0,26.0,6.0,1.0,1.0,100.0,-7.0,7.0,...,0.0,2.0,2.0,1.0,64.0,1.0,2.0,0.0,38.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7967,79.0,116.0,2.0,38.0,15.0,1.0,1.0,100.0,-7.0,7.0,...,2.0,1.0,1.0,7.0,100.0,2.0,1.0,0.0,44.0,1
9984,80.0,72.0,2.0,43.0,15.0,1.0,1.0,100.0,-7.0,7.0,...,0.0,1.0,1.0,42.0,-8.0,5.0,1.0,0.0,57.0,1
3590,82.0,85.0,7.0,75.0,27.0,1.0,1.0,100.0,-7.0,7.0,...,-8.0,0.0,0.0,12.0,83.0,5.0,3.0,0.0,50.0,1
9457,86.0,119.0,9.0,66.0,15.0,1.0,1.0,100.0,-7.0,7.0,...,0.0,0.0,0.0,9.0,83.0,1.0,3.0,0.0,57.0,1


In [88]:
exp_df = explanation.cf_examples_list[0].final_cfs_df.to_numpy()

In [89]:
exp_df
    

array([[ 82.,  78.,   2., ...,   1.,   0.,  60.],
       [ 76.,  73.,   4., ...,   2.,   0.,  54.],
       [ 75.,  63.,   4., ...,   5.,   0.,  64.],
       ...,
       [ 82.,  85.,   7., ...,   3.,   1.,  50.],
       [ 86., 119.,   9., ...,   3.,   0.,  57.],
       [ 76.,  24.,   8., ...,   2.,   0.,  50.]])

In [90]:
X.to_numpy()

array([[ 65,  43,   2,  26,  15,   1,   1, 100,  -7,   7,   8,  17,   2,
         25,   0,   4,   4,  31,  83,   5,   1,   0,  75]])

In [91]:
exp_df[1,:].reshape(1, -1)

array([[ 76.,  73.,   4.,  32.,  15.,   0.,   0., 100.,  -7.,   7.,   8.,
         15.,   2.,  20.,   4.,   1.,   1.,  21.,  86.,   5.,   2.,   0.,
         54.]])

## Proximity

In [107]:
import numpy as np

diff_means = []
for i in range(N):
    diff = X.to_numpy() - exp_df[i,:].reshape(1, -1)
    diff_means.append(abs(diff))
    
print(f"Proximity mean: {np.mean(diff_means)}, std: {np.std(diff_means)}")

Proximity mean: 7.7617391304347825, std: 13.628828668982116


## Sparsity

In [106]:
diff_means = []

for i in range(N):
    diff = X.to_numpy() - exp_df[i,:].reshape(1, -1)
    diff_means.append(np.count_nonzero(diff))

print(f"Sparsity mean: {np.mean(diff_means)}, std: {np.std(diff_means)}")   

Sparsity mean: 16.51, std: 1.2922461065911555
