# TrustyAI

## Load data

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import pandas as pd

df = pd.read_csv("../../datasets/FICO/heloc_dataset_v1.csv")

In [3]:
import utils

bounds = utils.data_bounds(df)

## Initialise TrustyAI

In [4]:
import trustyai
import os
import site

DEFAULT_DEP_PATH = os.path.join(site.getsitepackages()[0], "trustyai", "dep")

CORE_DEPS = [
    f"../../deps/*",
    f"{DEFAULT_DEP_PATH}/org/slf4j/slf4j-api/1.7.30/slf4j-api-1.7.30.jar",
    f"{DEFAULT_DEP_PATH}/org/apache/commons/commons-lang3/3.12.0/commons-lang3-3.12.0.jar",
    f"{DEFAULT_DEP_PATH}/org/optaplanner/optaplanner-core-impl/8.18.0.Final/"
    f"optaplanner-core-impl-8.18.0.Final.jar",
    f"{DEFAULT_DEP_PATH}/org/apache/commons/commons-math3/3.6.1/commons-math3-3.6.1.jar",
    f"{DEFAULT_DEP_PATH}/org/kie/kie-api/8.18.0.Beta/kie-api-8.18.0.Beta.jar",
    f"{DEFAULT_DEP_PATH}/io/micrometer/micrometer-core/1.8.2/micrometer-core-1.8.2.jar",
]

trustyai.init(path=CORE_DEPS)

## XGBoost

### Load model

In [5]:
from xgboost.sklearn import XGBClassifier

model = XGBClassifier()

In [6]:
model.load_model("../../models/xgboost.json")

### Selecting inputs

Select the input $X$ with a probability of negative outcome closest to $75\%$.

In [9]:
datapoints = pd.read_csv("../../datasets/datapoints.csv")

In [10]:
datapoints

Unnamed: 0,ExternalRiskEstimate,MSinceOldestTradeOpen,MSinceMostRecentTradeOpen,AverageMInFile,NumSatisfactoryTrades,NumTrades60Ever2DerogPubRec,NumTrades90Ever2DerogPubRec,PercentTradesNeverDelq,MSinceMostRecentDelq,MaxDelq2PublicRecLast12M,...,PercentInstallTrades,MSinceMostRecentInqexcl7days,NumInqLast6M,NumInqLast6Mexcl7days,NetFractionRevolvingBurden,NetFractionInstallBurden,NumRevolvingTradesWBalance,NumInstallTradesWBalance,NumBank2NatlTradesWHighUtilization,PercentTradesWBalance
0,63,309,36,112,17,2,1,68,7,2,...,16,0,1,1,54,-8,5,1,2,67
1,68,161,2,58,1,0,0,67,53,6,...,33,0,3,3,37,91,1,1,0,100
2,63,130,3,67,25,0,0,86,2,4,...,34,0,0,0,57,75,4,2,2,67
3,65,49,9,23,11,0,0,100,-7,7,...,25,-7,1,1,76,74,4,2,3,67
4,63,134,5,81,17,1,0,70,29,6,...,45,7,0,0,80,34,4,2,3,100
5,61,212,2,61,37,1,1,85,12,5,...,15,-7,2,2,33,-8,12,3,1,70
6,57,147,4,73,27,1,1,97,1,0,...,48,0,4,4,44,87,3,2,3,56
7,67,54,1,20,12,0,0,100,-7,7,...,38,0,1,1,53,98,5,2,1,70
8,70,100,2,50,13,0,0,93,24,6,...,50,0,4,3,54,70,2,5,1,88
9,70,210,20,27,4,0,0,100,-7,7,...,50,-7,1,1,102,97,2,2,1,100


Check the model's output for $X$

In [15]:
X = datapoints.iloc[0]
model.predict_proba(X.to_numpy().reshape(1, -1))

array([[0.75024784, 0.24975218]], dtype=float32)

TrustyAI wrapper for the XGBoost model

In [16]:
from org.kie.kogito.explainability.model import PredictionInput, PredictionOutput
from trustyai.model import output
import numpy as np

TARGET = "RiskPerformance"

def predict(_model):
    def fun(inputs):
        values = [_feature.value.as_number() for _feature in inputs[0].features]
        result = _model.predict_proba(np.array([values]))
        bad_prob, good_prob = result[0]
        if bad_prob > good_prob:
            _prediction = (0, bad_prob)
        else:
            _prediction = (1, good_prob)
        _output = output(name=TARGET, dtype="number", value=_prediction[0], score=_prediction[1])
        return [PredictionOutput([_output])]
    return fun

In [17]:
from trustyai.model import Model

provider = Model(predict(model))

Build input features with a fixed search bound of $[-20, 1000]$

In [18]:
from trustyai.model import feature

def build_input_feature(_X):
    input_feature = []
    input_dict = _X.to_dict()
    for name in input_dict:
        input_feature.append(
            feature(name=name, value=input_dict[name], dtype="number",
                    domain=(-20, 1000))
        )
    return input_feature

In [19]:
input_feature = build_input_feature(X)
predict(model)([PredictionInput(input_feature)])[0].outputs.get(0).toString()

'Output{value=0, type=number, score=0.7502478361129761, name='RiskPerformance'}'

Define the counterfactual goal as the label `1` ("Good" assessment)

In [20]:
goal = [output(name=TARGET, dtype="number", value=1)]

In [21]:
goal[0].toString()

'Output{value=1, type=number, score=1.0, name='RiskPerformance'}'

In [22]:
from trustyai.model import counterfactual_prediction

prediction = counterfactual_prediction(
    input_features=input_feature,
    outputs=goal)

Create the explainer with a maximum of $100,000$ iterations.

In [23]:
from trustyai.explainers import CounterfactualExplainer

explainer = CounterfactualExplainer(steps=200_000)

SLF4J: Failed to load class "org.slf4j.impl.StaticLoggerBinder".
SLF4J: Defaulting to no-operation (NOP) logger implementation
SLF4J: See http://www.slf4j.org/codes.html#StaticLoggerBinder for further details.


Run the explainer

In [24]:
explanation = explainer.explain(prediction, provider)

CPU times: user 19min 44s, sys: 19min 9s, total: 38min 53s
Wall time: 3min 48s


Extract the features from the counterfactual explanation

In [25]:
expl_features = [e.asFeature() for e in explanation.entities]

In [26]:
cf_p = predict(model)([PredictionInput(expl_features)])[0].outputs.get(0)
cf_p = {
    "GoalValue": [cf_p.getValue().asNumber()],
    "GoalName": [str(cf_p.getName())],
    "GoalScore": [cf_p.getScore()],
}
print(cf_p)

{'GoalValue': [1.0], 'GoalName': ['RiskPerformance'], 'GoalScore': [0.5726234912872314]}


In [27]:
def show_changes(explanation, original):
    entities = explanation.entities
    N = len(original)
    for i in range(N):
        name = original[i].name
        original_value = original[i].value.as_number()
        new_value = entities[i].as_feature().value.as_number()
        if original_value != new_value:
            print(f"Feature '{name}': {original_value} -> {new_value}")


show_changes(explanation, input_feature)

Feature 'MaxDelq2PublicRecLast12M': 2.0 -> 3.0
Feature 'NumTotalTrades': 19.0 -> 18.0
Feature 'PercentInstallTrades': 16.0 -> 15.0
Feature 'MSinceMostRecentInqexcl7days': 0.0 -> -8.0
Feature 'NetFractionRevolvingBurden': 54.0 -> 55.0
Feature 'NetFractionInstallBurden': -8.0 -> -9.0
Feature 'NumRevolvingTradesWBalance': 5.0 -> 6.0
Feature 'NumInstallTradesWBalance': 1.0 -> 2.0


### Run for all datapoints

In [28]:
import utils

counterfactuals = []
for i in range(10):
    print(f"Running counterfactual search #{i}")
    X = datapoints.iloc[i]
    input_feature = build_input_feature(X)
    prediction = counterfactual_prediction(input_features=input_feature, outputs=goal)
    explanation = explainer.explain(prediction, provider)
    expl_features = [e.asFeature() for e in explanation.entities]
    cf_p = predict(model)([PredictionInput(expl_features)])[0].outputs.get(0)
    cf_p = {
        "GoalValue": [cf_p.getValue().asNumber()],
        "GoalName": [str(cf_p.getName())],
        "GoalScore": [cf_p.getScore()],
    }
    d_cf = {f"Cf{f.name}": [f.value.as_number()] for f in expl_features}
    cf_df = utils.save_result(
        original=X, cf=d_cf, score=cf_p, method=["TrustyAI"], model=["XGBoost"]
    )
    counterfactuals.append(cf_df)

Running counterfactual search #0
Running counterfactual search #1
Running counterfactual search #2
Running counterfactual search #3
Running counterfactual search #4
Running counterfactual search #5
Running counterfactual search #6
Running counterfactual search #7
Running counterfactual search #8
Running counterfactual search #9
CPU times: user 3h 6min 49s, sys: 4h 7min 29s, total: 7h 14min 18s
Wall time: 48min 18s


In [31]:
xgboost_final = pd.concat(counterfactuals)

## MLP/Adam

### Load model

In [32]:
from joblib import load

mlp_model = load('../../models/mlp.joblib') 

In [33]:
mlp_predict = predict(mlp_model)

In [34]:
mlp_predict([PredictionInput(input_feature)])[0].outputs.get(0).toString()

'Output{value=0, type=number, score=0.865216873557407, name='RiskPerformance'}'

In [35]:
mlp_model.predict_proba(X.to_numpy().reshape(1, -1))

array([[0.86521687, 0.13478313]])

In [36]:
mlp_provider = Model(mlp_predict)

In [38]:
import utils

counterfactuals = []
for i in range(10):
    print(f"Running counterfactual search #{i}")
    X = datapoints.iloc[i]
    input_feature = build_input_feature(X)
    prediction = counterfactual_prediction(input_features=input_feature, outputs=goal)
    explanation = explainer.explain(prediction, mlp_provider)
    expl_features = [e.asFeature() for e in explanation.entities]
    cf_p = predict(model)([PredictionInput(expl_features)])[0].outputs.get(0)
    cf_p = {
        "GoalValue": [cf_p.getValue().asNumber()],
        "GoalName": [str(cf_p.getName())],
        "GoalScore": [cf_p.getScore()],
    }
    d_cf = {f"Cf{f.name}": [f.value.as_number()] for f in expl_features}
    cf_df = utils.save_result(
        original=X, cf=d_cf, score=cf_p, method=["TrustyAI"], model=["MLP"]
    )
    counterfactuals.append(cf_df)

Running counterfactual search #0
Running counterfactual search #1
Running counterfactual search #2
Running counterfactual search #3
Running counterfactual search #4
Running counterfactual search #5
Running counterfactual search #6
Running counterfactual search #7
Running counterfactual search #8
Running counterfactual search #9
CPU times: user 1h 1min 57s, sys: 17min 31s, total: 1h 19min 29s
Wall time: 14min 31s


In [39]:
mlp_final = pd.concat(counterfactuals)

In [41]:
final_df = pd.concat([xgboost_final, mlp_final])

In [43]:
final_df.T

Unnamed: 0,0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,0.10,0.11,0.12,0.13,0.14,0.15,0.16,0.17,0.18,0.19
ExternalRiskEstimate,63,68,63,65,63,61,57,67,70,70,63,68,63,65,63,61,57,67,70,70
MSinceOldestTradeOpen,309,161,130,49,134,212,147,54,100,210,309,161,130,49,134,212,147,54,100,210
MSinceMostRecentTradeOpen,36,2,3,9,5,2,4,1,2,20,36,2,3,9,5,2,4,1,2,20
AverageMInFile,112,58,67,23,81,61,73,20,50,27,112,58,67,23,81,61,73,20,50,27
NumSatisfactoryTrades,17,1,25,11,17,37,27,12,13,4,17,1,25,11,17,37,27,12,13,4
NumTrades60Ever2DerogPubRec,2,0,0,0,1,1,1,0,0,0,2,0,0,0,1,1,1,0,0,0
NumTrades90Ever2DerogPubRec,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,1,1,0,0,0
PercentTradesNeverDelq,68,67,86,100,70,85,97,100,93,100,68,67,86,100,70,85,97,100,93,100
MSinceMostRecentDelq,7,53,2,-7,29,12,1,-7,24,-7,7,53,2,-7,29,12,1,-7,24,-7
MaxDelq2PublicRecLast12M,2,6,4,7,6,5,0,7,6,7,2,6,4,7,6,5,0,7,6,7


In [44]:
final_df.to_csv("../../results/cf-trustyai.csv")