In [1]:
# pip install --user dice-ml

In [2]:
import shap
import matplotlib.pyplot as plt
import pickle
import pandas as pd
import dice_ml

  from .autonotebook import tqdm as notebook_tqdm


#### 1 - load your data

In [3]:
x_test=pd.read_csv("X_test.csv")
x_train=pd.read_csv("X_train.csv")
y_test=pd.read_csv("y_test.csv")
y_train=pd.read_csv("y_train.csv")
oos=pd.read_csv("OOS.csv")

with open('model.pkl', 'rb') as file:
    model = pickle.load(file)

#### 2 - Data Preprocess (to make things work with Dice)

In [4]:
#########2.A Convert categorical columns to Category
############################################

# These features can either be a 1 or a 0 which indicates if its True or False
# Without casting there may be values between the two which isn't possible
categorical_features = ['location_mismatch', 'previous_fraud_flag']
for col in categorical_features:
    x_train[col] = x_train[col].astype('category')
    x_test[col] = x_test[col].astype('category')
    oos[col] = oos[col].astype('category')

In [5]:
#########2.B Convert Numerical columns to Float
############################################

# Dice doesn't want to accept integers so you have to cast as a float
continuous_int_features = ['num_transactions', 'customer_age', 'days_since_last_transaction']
for col in continuous_int_features:
    x_train[col] = x_train[col].astype(float)
    x_test[col] = x_test[col].astype(float)
    oos[col] = oos[col].astype(float)

In [6]:
oos

Unnamed: 0,trans_amount,account_balance,num_transactions,merchant_risk_score,customer_age,days_since_last_transaction,card_decline_rate,device_trust_score,location_mismatch,previous_fraud_flag
0,4502.171939,20048.574074,16.0,0.352433,57.0,5.0,0.169705,0.735935,0.0,0.0
1,2619.67776,50686.336791,46.0,0.55491,75.0,20.0,0.71846,0.138276,0.0,0.0
2,581.38592,67222.012362,12.0,0.188041,30.0,18.0,0.770089,0.966178,0.0,0.0
3,2010.217726,91170.65873,49.0,0.859666,67.0,3.0,0.462328,0.821032,0.0,0.0
4,1041.735015,50265.984875,46.0,0.257755,54.0,6.0,0.714087,0.204483,0.0,0.0
5,3623.555294,145.790272,41.0,0.897814,26.0,0.0,0.731945,0.23054,1.0,1.0
6,4275.644359,261.587024,45.0,0.804748,24.0,1.0,0.847557,0.066494,1.0,1.0
7,4269.49005,4959.197959,48.0,1.0,21.0,0.0,1.0,0.0,1.0,1.0
8,4242.714987,1067.278295,44.0,1.0,24.0,0.0,1.0,0.0,1.0,1.0


In [7]:
model.predict_proba(oos)

array([[0.82219797, 0.17780203],
       [0.78469998, 0.21530002],
       [0.81317294, 0.18682706],
       [0.64402309, 0.35597691],
       [0.79631334, 0.20368666],
       [0.49599171, 0.50400829],
       [0.56977766, 0.43022234],
       [0.4599254 , 0.5400746 ],
       [0.46610334, 0.53389666]])

#### 3 - Let's choose a clear fraud example in our OOS

In [8]:
#########3 select out a fraud example
############################################
x0 = oos.iloc[2:3] 
print(model.predict_proba(x0))
x0

[[0.81317294 0.18682706]]


Unnamed: 0,trans_amount,account_balance,num_transactions,merchant_risk_score,customer_age,days_since_last_transaction,card_decline_rate,device_trust_score,location_mismatch,previous_fraud_flag
2,581.38592,67222.012362,12.0,0.188041,30.0,18.0,0.770089,0.966178,0.0,0.0


#### 4 - We need to tell Dice which are our coninous (float) columns

In [9]:
#########4 Get floating point columns
############################################
##all I'm doing here is filtering on the categorical features
###Uncomment
cont_cols = [x for x in x_train.columns if x not in ['previous_fraud_flag', 'location_mismatch']]
cont_cols

['trans_amount',
 'account_balance',
 'num_transactions',
 'merchant_risk_score',
 'customer_age',
 'days_since_last_transaction',
 'card_decline_rate',
 'device_trust_score']

#### 5 - Build and Generate the Dice CF

In [None]:
#########5.A Dice needs the full dataset: columns + labels
############################################
#we can create that with pandas concat

full_data = pd.concat([x_train, y_train], axis=1)

#########5.B Data prep for Dice
############################################
d = dice_ml.Data(dataframe=full_data,
                continuous_features=cont_cols,
                outcome_name='is_fraud')

#########5.C build the backend for Dice
############################################

# Pre-trained ML model
m = dice_ml.Model(model=model,backend='sklearn')

#########5.D Instantiate the dice explanation instance
############################################

# # DiCE explanation instance
exp = dice_ml.Dice(d,m)
exp

<dice_ml.explainer_interfaces.dice_random.DiceRandom at 0x125cf1290>

In [22]:
x0['location_mismatch'] = x0['location_mismatch'].astype(int)
x0['previous_fraud_flag'] = x0['previous_fraud_flag'].astype(int)
x0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  x0['location_mismatch'] = x0['location_mismatch'].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  x0['previous_fraud_flag'] = x0['previous_fraud_flag'].astype(int)


Unnamed: 0,trans_amount,account_balance,num_transactions,merchant_risk_score,customer_age,days_since_last_transaction,card_decline_rate,device_trust_score,location_mismatch,previous_fraud_flag
2,581.38592,67222.012362,12.0,0.188041,30.0,18.0,0.770089,0.966178,0,0


In [23]:
#########5.E Generate the CF
############################################

counterfactuals = exp.generate_counterfactuals(
    x0,
    total_CFs=2,  # Generate more counterfactuals
    desired_class="opposite")

######5.F Visualize it
#########################################

# Visualize the counterfactual explanations
counterfactuals.visualize_as_dataframe(show_only_changes=False)

100%|██████████| 1/1 [00:01<00:00,  1.96s/it]

Query instance (original outcome : 0)





Unnamed: 0,trans_amount,account_balance,num_transactions,merchant_risk_score,customer_age,days_since_last_transaction,card_decline_rate,device_trust_score,location_mismatch,previous_fraud_flag,is_fraud
0,581.385925,67222.015625,12.0,0.188041,30.0,18.0,0.770089,0.966178,0,0,0



Diverse Counterfactual set (new outcome: 1)


Unnamed: 0,trans_amount,account_balance,num_transactions,merchant_risk_score,customer_age,days_since_last_transaction,card_decline_rate,device_trust_score,location_mismatch,previous_fraud_flag,is_fraud
0,581.38592,67222.014632,12.0,0.188041,30.0,18.0,0.770089,0.002304,0,0,1
1,581.38592,67222.014632,15.6,0.188041,30.0,18.0,0.770089,0.002304,0,0,1
