# ALIBI-proto
## Load data

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import pandas as pd

df = pd.read_csv("../../datasets/FICO/heloc_dataset_v1.csv")

In [3]:
TARGET = "RiskPerformance"

In [4]:
from sklearn.model_selection import train_test_split

df[TARGET] = df[TARGET].factorize()[0]

train, test = train_test_split(df, test_size=0.25, random_state=42)

In [5]:
train_x = train[df.columns[~train.columns.isin([TARGET])]]

## XGBoost

### Load model

In [6]:
from xgboost.sklearn import XGBClassifier

model = XGBClassifier()

In [7]:
model.load_model("../../models/xgboost.json")

### Standard counterfactuals

In [8]:
import tensorflow as tf

tf.keras.backend.clear_session()
tf.compat.v1.disable_eager_execution()
tf.compat.v1.disable_v2_behavior()

Instructions for updating:
non-resource variables are not supported in the long term


In [9]:
from alibi.explainers import CounterfactualProto

In [10]:
shape = (1, 23)

In [11]:
predict_fn = lambda x: model.predict_proba(x)

In [18]:
def build_cf(pfn):
    _cf = CounterfactualProto(
        pfn,
        shape,
        use_kdtree=True,
        theta=100.0,
        max_iterations=200,
        feature_range=(-20, 1000),
        c_init=2.0,
        c_steps=80,
    )
    return _cf

In [19]:
cf = build_cf(predict_fn)

In [20]:
import utils
import pandas as pd

datapoints = pd.read_csv("../../datasets/datapoints.csv")

In [21]:
cf.fit(train_x.to_numpy())

No encoder specified. Using k-d trees to represent class prototypes.


CounterfactualProto(meta={
  'name': 'CounterfactualProto',
  'type': ['blackbox', 'tensorflow', 'keras'],
  'explanations': ['local'],
  'params': {
              'kappa': 0.0,
              'beta': 0.1,
              'feature_range': (-20, 1000),
              'gamma': 0.0,
              'theta': 100.0,
              'cat_vars': None,
              'ohe': False,
              'use_kdtree': True,
              'learning_rate_init': 0.01,
              'max_iterations': 200,
              'c_init': 2.0,
              'c_steps': 80,
              'eps': (0.001, 0.001),
              'clip': (-1000.0, 1000.0),
              'update_num_grad': 1,
              'write_dir': None,
              'shape': (1, 23),
              'is_model': False,
              'is_ae': False,
              'is_enc': False,
              'enc_or_kdtree': True,
              'is_cat': False,
              'trustscore_kwargs': None,
              'd_type': 'abdm',
              'w': None,
              'disc_per

### Store results

In [22]:
def create_result_df(_explanation, X, model: str):
    N_cf = 1
    schema = utils.get_result_schema()
    v = [[] for i in range(23)]
    o = [[] for i in range(23)]
    goalValue = []
    goalScore = []
    for i in range(N_cf):
        ccf = _explanation['data']['cf']
        values = ccf['X'].tolist()[0]
        goalValue.append(ccf['class'])
        goalScore.append(ccf['proba'].tolist()[0][1])
        for j in range(23):
            v[j].append(values[j])
            o[j].append(X[0][j])

    d = {e[0]: e[1] for e in zip(schema[23:-4], v)}
    od = {e[0]: e[1] for e in zip(schema[:23], o)}
    goal = {"GoalValue": goalValue, "GoalScore": goalScore, "GoalName": ["RiskPerformance"] * N_cf}
    method = ["Alibi-Proto"] * N_cf

    return utils.save_result(original=od, cf=d, score=goal, method=method, model=model)

In [23]:
counterfactuals = []
for i in range(10):
    print(f"Running counterfactual search #{i}")
    X = datapoints.iloc[i].to_numpy().reshape(1, -1)
    explanation = cf.explain(X)
    result = create_result_df(explanation, X, "XGBoost")
    counterfactuals.append(result)

Running counterfactual search #0
Running counterfactual search #1
Running counterfactual search #2
Running counterfactual search #3
Running counterfactual search #4
Running counterfactual search #5
Running counterfactual search #6
Running counterfactual search #7
Running counterfactual search #8
Running counterfactual search #9


In [24]:
xgboost_final = pd.concat(counterfactuals)

In [25]:
xgboost_final

Unnamed: 0,ExternalRiskEstimate,MSinceOldestTradeOpen,MSinceMostRecentTradeOpen,AverageMInFile,NumSatisfactoryTrades,NumTrades60Ever2DerogPubRec,NumTrades90Ever2DerogPubRec,PercentTradesNeverDelq,MSinceMostRecentDelq,MaxDelq2PublicRecLast12M,...,CfNetFractionInstallBurden,CfNumRevolvingTradesWBalance,CfNumInstallTradesWBalance,CfNumBank2NatlTradesWHighUtilization,CfPercentTradesWBalance,GoalValue,GoalScore,GoalName,method,model
0,70,142,9,47,16,0,0,100,-7,7,...,61.06805,3.871625,2.0,1.0,55.128376,1,0.525244,RiskPerformance,Alibi-Proto,XGBoost
0,61,113,3,29,32,0,0,100,-7,7,...,64.437485,4.397884,2.397884,1.368181,38.516689,1,0.519465,RiskPerformance,Alibi-Proto,XGBoost
0,71,103,15,94,6,1,0,86,13,6,...,-8.0,2.582994,0.503786,0.582994,67.0,1,0.633392,RiskPerformance,Alibi-Proto,XGBoost
0,79,66,19,35,5,0,0,100,-7,7,...,26.1,5.9,1.1,0.0,90.099998,1,0.523881,RiskPerformance,Alibi-Proto,XGBoost
0,66,258,7,94,17,2,2,95,19,6,...,-8.0,5.0,1.0,1.573093,55.0,1,0.702892,RiskPerformance,Alibi-Proto,XGBoost
0,60,236,3,83,41,1,0,86,17,6,...,-8.0,5.417006,1.417006,2.417006,55.605118,1,0.564518,RiskPerformance,Alibi-Proto,XGBoost
0,61,264,11,110,12,4,3,69,4,2,...,-8.0,1.0,-7.503787,0.0,33.0,1,0.65211,RiskPerformance,Alibi-Proto,XGBoost
0,61,425,34,103,13,4,4,79,37,0,...,75.900002,13.9,8.9,8.9,90.099998,1,0.571569,RiskPerformance,Alibi-Proto,XGBoost
0,68,161,2,58,1,0,0,67,53,6,...,96.900002,6.9,1.0,1.9,100.0,1,0.771096,RiskPerformance,Alibi-Proto,XGBoost
0,65,49,9,23,11,0,0,100,-7,7,...,72.099998,4.0,3.9,-2.9,76.900002,1,0.539578,RiskPerformance,Alibi-Proto,XGBoost


## MLP

## Load model

In [36]:
from joblib import load

mlp_model = load('../../models/mlp.joblib') 

In [37]:
mlp_predict_fn = lambda x: mlp_model.predict_proba(x)

In [38]:
mlp_cf = build_cf(mlp_predict_fn)

In [39]:
mlp_cf.fit(train_x.to_numpy())

No encoder specified. Using k-d trees to represent class prototypes.


CounterfactualProto(meta={
  'name': 'CounterfactualProto',
  'type': ['blackbox', 'tensorflow', 'keras'],
  'explanations': ['local'],
  'params': {
              'kappa': 0.0,
              'beta': 0.1,
              'feature_range': (-20, 1000),
              'gamma': 0.0,
              'theta': 100.0,
              'cat_vars': None,
              'ohe': False,
              'use_kdtree': True,
              'learning_rate_init': 0.01,
              'max_iterations': 200,
              'c_init': 2.0,
              'c_steps': 80,
              'eps': (0.001, 0.001),
              'clip': (-1000.0, 1000.0),
              'update_num_grad': 1,
              'write_dir': None,
              'shape': (1, 23),
              'is_model': False,
              'is_ae': False,
              'is_enc': False,
              'enc_or_kdtree': True,
              'is_cat': False,
              'trustscore_kwargs': None,
              'd_type': 'abdm',
              'w': None,
              'disc_per

In [40]:
counterfactuals = []
for i in range(10):
    print(f"Running counterfactual search #{i}")
    X = datapoints.iloc[i].to_numpy().reshape(1, -1)
    explanation = mlp_cf.explain(X)
    result = create_result_df(explanation, X, "MLP")
    counterfactuals.append(result)

Running counterfactual search #0
Running counterfactual search #1
Running counterfactual search #2
Running counterfactual search #3
Running counterfactual search #4
Running counterfactual search #5
Running counterfactual search #6
Running counterfactual search #7
Running counterfactual search #8
Running counterfactual search #9


In [41]:
mlp_final = pd.concat(counterfactuals)

In [42]:
final_df = pd.concat([xgboost_final, mlp_final])

In [43]:
final_df

Unnamed: 0,ExternalRiskEstimate,MSinceOldestTradeOpen,MSinceMostRecentTradeOpen,AverageMInFile,NumSatisfactoryTrades,NumTrades60Ever2DerogPubRec,NumTrades90Ever2DerogPubRec,PercentTradesNeverDelq,MSinceMostRecentDelq,MaxDelq2PublicRecLast12M,...,CfNetFractionInstallBurden,CfNumRevolvingTradesWBalance,CfNumInstallTradesWBalance,CfNumBank2NatlTradesWHighUtilization,CfPercentTradesWBalance,GoalValue,GoalScore,GoalName,method,model
0,70,142,9,47,16,0,0,100,-7,7,...,61.06805,3.871625,2.0,1.0,55.128376,1,0.525244,RiskPerformance,Alibi-Proto,XGBoost
0,61,113,3,29,32,0,0,100,-7,7,...,64.437485,4.397884,2.397884,1.368181,38.516689,1,0.519465,RiskPerformance,Alibi-Proto,XGBoost
0,71,103,15,94,6,1,0,86,13,6,...,-8.0,2.582994,0.503786,0.582994,67.0,1,0.633392,RiskPerformance,Alibi-Proto,XGBoost
0,79,66,19,35,5,0,0,100,-7,7,...,26.1,5.9,1.1,0.0,90.099998,1,0.523881,RiskPerformance,Alibi-Proto,XGBoost
0,66,258,7,94,17,2,2,95,19,6,...,-8.0,5.0,1.0,1.573093,55.0,1,0.702892,RiskPerformance,Alibi-Proto,XGBoost
0,60,236,3,83,41,1,0,86,17,6,...,-8.0,5.417006,1.417006,2.417006,55.605118,1,0.564518,RiskPerformance,Alibi-Proto,XGBoost
0,61,264,11,110,12,4,3,69,4,2,...,-8.0,1.0,-7.503787,0.0,33.0,1,0.65211,RiskPerformance,Alibi-Proto,XGBoost
0,61,425,34,103,13,4,4,79,37,0,...,75.900002,13.9,8.9,8.9,90.099998,1,0.571569,RiskPerformance,Alibi-Proto,XGBoost
0,68,161,2,58,1,0,0,67,53,6,...,96.900002,6.9,1.0,1.9,100.0,1,0.771096,RiskPerformance,Alibi-Proto,XGBoost
0,65,49,9,23,11,0,0,100,-7,7,...,72.099998,4.0,3.9,-2.9,76.900002,1,0.539578,RiskPerformance,Alibi-Proto,XGBoost


In [44]:
final_df.to_csv("../../results/cf-alibi-proto.csv")