# ALIBI
## Load data

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import pandas as pd

df = pd.read_csv("../../datasets/FICO/heloc_dataset_v1.csv")

## XGBoost

### Load model

In [3]:
from xgboost.sklearn import XGBClassifier

model = XGBClassifier()

In [4]:
model.load_model("../../models/xgboost.json")

### Standard counterfactuals

In [5]:
from alibi.explainers import Counterfactual

In [6]:
shape = (1, 23)

In [7]:
predict_fn = lambda x: model.predict_proba(x)

In [8]:
import tensorflow as tf

tf.keras.backend.clear_session()
tf.compat.v1.disable_eager_execution()
tf.compat.v1.disable_v2_behavior()

Instructions for updating:
non-resource variables are not supported in the long term


In [25]:
cf = Counterfactual(
    predict_fn,
    shape,
    distance_fn="l1",
    target_proba=0.75,
    target_class="other",
    max_iter=100,
    early_stop=100,
    lam_init=1e-2,
    max_lam_steps=100,
    tol=0.25,
    learning_rate_init=0.1,
    feature_range=(-20, 1000),
    eps=0.5,
    init="identity",
    decay=True,
    write_dir=None,
    debug=False,
)

In [26]:
import utils

X = utils.get_negative_closest(model, 0.75).to_numpy().reshape(1, -1)

In [27]:
%%time

explanation = cf.explain(X)

CPU times: user 18min 57s, sys: 2min 40s, total: 21min 37s
Wall time: 2min 30s


### Store results

In [29]:
def create_result_df(_explanation, model: str):
    N_cf = len(_explanation['data']['all'][0])
    schema = utils.get_result_schema()
    v = [[] for i in range(23)]
    o = [[] for i in range(23)]
    goalValue = []
    goalScore = []
    for i in range(N_cf):
        ccf = _explanation['data']['all'][0][i]
        values = ccf['X'].tolist()[0]
        goalValue.append(ccf['class'])
        goalScore.append(ccf['proba'].tolist()[0][1])
        for j in range(23):
            v[j].append(values[j])
            o[j].append(X[0][j])

    d = {e[0]: e[1] for e in zip(schema[23:-4], v)}
    od = {e[0]: e[1] for e in zip(schema[:23], o)}
    goal = {"GoalValue": goalValue, "GoalScore": goalScore, "GoalName": ["RiskPerformance"] * N_cf}
    method = ["Alibi-Standard"] * N_cf

    return utils.save_result(original=od, cf=d, score=goal, method=method, model=model)

result = create_result_df(explanation, "XGBoost")
result.iloc[:,23:].describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
CfExternalRiskEstimate,98.0,68.109037,0.617085,65.060661,68.001093,68.074905,68.260771,69.361565
CfMSinceOldestTradeOpen,98.0,309.0,0.0,309.0,309.0,309.0,309.0,309.0
CfMSinceMostRecentTradeOpen,98.0,36.0,0.0,36.0,36.0,36.0,36.0,36.0
CfAverageMInFile,98.0,112.0,0.0,112.0,112.0,112.0,112.0,112.0
CfNumSatisfactoryTrades,98.0,19.027384,0.362098,18.279926,18.925346,18.997072,19.06307,20.28714
CfNumTrades60Ever2DerogPubRec,98.0,2.0,0.0,2.0,2.0,2.0,2.0,2.0
CfNumTrades90Ever2DerogPubRec,98.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0
CfPercentTradesNeverDelq,98.0,68.0,0.0,68.0,68.0,68.0,68.0,68.0
CfMSinceMostRecentDelq,98.0,7.0,0.0,7.0,7.0,7.0,7.0,7.0
CfMaxDelq2PublicRecLast12M,98.0,2.065407,0.154862,1.869835,1.992775,2.025935,2.088528,2.974168


## MLP

## Load model

In [30]:
from joblib import load

mlp_model = load('../../models/mlp.joblib') 

In [31]:
mlp_predict_fn = lambda x: mlp_model.predict_proba(x)

In [32]:
tf.keras.backend.clear_session()
tf.compat.v1.disable_eager_execution()
tf.compat.v1.disable_v2_behavior()

In [33]:
mlp_cf = Counterfactual(
    mlp_predict_fn,
    shape,
    distance_fn="l1",
    target_proba=0.75,
    target_class="other",
    max_iter=100,
    early_stop=100,
    lam_init=1e-2,
    max_lam_steps=100,
    tol=0.25,
    learning_rate_init=0.1,
    feature_range=(-20, 1000),
    eps=0.5,
    init="identity",
    decay=True,
    write_dir=None,
    debug=False,
)

In [34]:
%%time

explanation = mlp_cf.explain(X)

CPU times: user 2min 39s, sys: 38.7 s, total: 3min 17s
Wall time: 34.1 s


In [35]:
result_mlp = create_result_df(explanation, "MLP")
result_mlp.iloc[:,23:].describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
CfExternalRiskEstimate,81.0,59.828022,0.438707,59.485989,59.510609,59.6185,59.994049,61.124836
CfMSinceOldestTradeOpen,81.0,308.172031,0.792486,306.993317,307.320129,308.32486,308.989075,309.079132
CfMSinceMostRecentTradeOpen,81.0,38.753151,0.290365,37.810825,38.686321,38.908264,38.939129,38.940769
CfAverageMInFile,81.0,114.792928,0.280843,113.833603,114.778931,114.931061,114.950882,114.962128
CfNumSatisfactoryTrades,81.0,19.897052,0.312077,18.861425,19.84955,20.067314,20.08399,20.086859
CfNumTrades60Ever2DerogPubRec,81.0,-0.348901,0.165058,-0.436771,-0.434987,-0.432744,-0.354834,0.270643
CfNumTrades90Ever2DerogPubRec,81.0,-2.006397,0.398475,-2.338277,-2.30363,-2.182227,-1.843013,-0.836393
CfPercentTradesNeverDelq,81.0,70.453324,0.169062,69.859474,70.351311,70.462112,70.597717,70.661133
CfMSinceMostRecentDelq,81.0,9.19927,0.140818,8.81393,9.076354,9.193195,9.329661,9.396985
CfMaxDelq2PublicRecLast12M,81.0,4.856819,0.350967,3.808527,4.718983,5.011594,5.116343,5.146358


In [36]:
final_df = pd.concat([result, result_mlp])
final_df.T

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,71,72,73,74,75,76,77,78,79,80
ExternalRiskEstimate,63,63,63,63,63,63,63,63,63,63,...,63,63,63,63,63,63,63,63,63,63
MSinceOldestTradeOpen,309,309,309,309,309,309,309,309,309,309,...,309,309,309,309,309,309,309,309,309,309
MSinceMostRecentTradeOpen,36,36,36,36,36,36,36,36,36,36,...,36,36,36,36,36,36,36,36,36,36
AverageMInFile,112,112,112,112,112,112,112,112,112,112,...,112,112,112,112,112,112,112,112,112,112
NumSatisfactoryTrades,17,17,17,17,17,17,17,17,17,17,...,17,17,17,17,17,17,17,17,17,17
NumTrades60Ever2DerogPubRec,2,2,2,2,2,2,2,2,2,2,...,2,2,2,2,2,2,2,2,2,2
NumTrades90Ever2DerogPubRec,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
PercentTradesNeverDelq,68,68,68,68,68,68,68,68,68,68,...,68,68,68,68,68,68,68,68,68,68
MSinceMostRecentDelq,7,7,7,7,7,7,7,7,7,7,...,7,7,7,7,7,7,7,7,7,7
MaxDelq2PublicRecLast12M,2,2,2,2,2,2,2,2,2,2,...,2,2,2,2,2,2,2,2,2,2


In [37]:
final_df.to_csv("../../results/cf-alibi.csv")