# ALIBI
## Load data

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import pandas as pd

df = pd.read_csv("../../datasets/FICO/heloc_dataset_v1.csv")

## XGBoost

### Load model

In [3]:
from xgboost.sklearn import XGBClassifier

model = XGBClassifier()

In [4]:
model.load_model("../../models/xgboost.json")

### Standard counterfactuals

In [5]:
from alibi.explainers import Counterfactual

In [6]:
shape = (1, 23)

In [7]:
predict_fn = lambda x: model.predict_proba(x)

In [8]:
import tensorflow as tf

tf.keras.backend.clear_session()
tf.compat.v1.disable_eager_execution()
tf.compat.v1.disable_v2_behavior()

Instructions for updating:
non-resource variables are not supported in the long term


In [9]:
def build_cf(pfn):
    _cf = Counterfactual(
        pfn,
        shape,
        distance_fn="l1",
        target_proba=0.5,
        target_class="other",
        max_iter=100,
        early_stop=100,
        lam_init=1e-3,
        max_lam_steps=100,
        tol=0.25,
        learning_rate_init=0.1,
        feature_range=(-20, 1000),
        eps=0.5,
        init="identity",
        decay=True,
        write_dir=None,
        debug=False)
    return _cf

In [10]:
cf = build_cf(predict_fn)




2022-04-04 10:11:33.506309: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [11]:
import utils

datapoints = pd.read_csv("../../datasets/datapoints.csv")

### Store results

In [12]:
def create_result_df(_explanation, model: str):
    N_cf = len(_explanation['data']['all'][0])
    schema = utils.get_result_schema()
    v = [[] for i in range(23)]
    o = [[] for i in range(23)]
    goalValue = []
    goalScore = []
    for i in range(N_cf):
        ccf = _explanation['data']['all'][0][i]
        values = ccf['X'].tolist()[0]
        goalValue.append(ccf['class'])
        goalScore.append(ccf['proba'].tolist()[0][1])
        for j in range(23):
            v[j].append(values[j])
            o[j].append(X[0][j])

    d = {e[0]: e[1] for e in zip(schema[23:-4], v)}
    od = {e[0]: e[1] for e in zip(schema[:23], o)}
    goal = {"GoalValue": goalValue, "GoalScore": goalScore, "GoalName": ["RiskPerformance"] * N_cf}
    method = ["Alibi-Standard"] * N_cf

    return utils.save_result(original=od, cf=d, score=goal, method=method, model=model)

In [None]:
counterfactuals = []
for i in range(10):
    print(f"Running counterfactual search #{i}")
    X = datapoints.iloc[i].to_numpy().reshape(1, -1)
    explanation = cf.explain(X)
    result = create_result_df(explanation, "XGBoost")
    counterfactuals.append(result)

In [None]:
xgboost_final = pd.concat(counterfactuals)

In [None]:
xgboost_final

## MLP

## Load model

In [None]:
from joblib import load

mlp_model = load('../../models/mlp.joblib') 

In [None]:
mlp_predict_fn = lambda x: mlp_model.predict_proba(x)

In [None]:
tf.keras.backend.clear_session()
tf.compat.v1.disable_eager_execution()
tf.compat.v1.disable_v2_behavior()

In [None]:
mlp_cf = build_cf(mlp_predict_fn)

In [None]:
counterfactuals = []
for i in range(10):
    print(f"Running counterfactual search #{i}")
    X = datapoints.iloc[i].to_numpy().reshape(1, -1)
    explanation = mlp_cf.explain(X)
    result = create_result_df(explanation, "MLP")
    counterfactuals.append(result)

In [None]:
mlp_final = pd.concat(counterfactuals)

In [None]:
final_df = pd.concat([xgboost_final, mlp_final])

In [None]:
final_df.to_csv("../../results/cf-alibi.csv")