# TrustyAI

## Load data

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import pandas as pd

df = pd.read_csv("../../datasets/FICO/heloc_dataset_v1.csv")

In [3]:
import utils

bounds = utils.data_bounds(df)

## Initialise TrustyAI

In [4]:
import trustyai
import os
import site

DEFAULT_DEP_PATH = os.path.join(site.getsitepackages()[0], "trustyai", "dep")

CORE_DEPS = [
    f"../../deps/*",
    f"{DEFAULT_DEP_PATH}/org/slf4j/slf4j-api/1.7.30/slf4j-api-1.7.30.jar",
    f"{DEFAULT_DEP_PATH}/org/apache/commons/commons-lang3/3.12.0/commons-lang3-3.12.0.jar",
    f"{DEFAULT_DEP_PATH}/org/optaplanner/optaplanner-core-impl/8.18.0.Final/"
    f"optaplanner-core-impl-8.18.0.Final.jar",
    f"{DEFAULT_DEP_PATH}/org/apache/commons/commons-math3/3.6.1/commons-math3-3.6.1.jar",
    f"{DEFAULT_DEP_PATH}/org/kie/kie-api/8.18.0.Beta/kie-api-8.18.0.Beta.jar",
    f"{DEFAULT_DEP_PATH}/io/micrometer/micrometer-core/1.8.2/micrometer-core-1.8.2.jar",
]

trustyai.init(path=CORE_DEPS)

## XGBoost

### Load model

In [5]:
from xgboost.sklearn import XGBClassifier

model = XGBClassifier()

In [6]:
model.load_model("../../models/xgboost.json")

### Selecting inputs

Select the input $X$ with a probability of negative outcome closest to $75\%$.

In [7]:
X = utils.get_negative_closest(model, 0.75)

In [8]:
X.to_dict()

{'ExternalRiskEstimate': 65,
 'MSinceOldestTradeOpen': 43,
 'MSinceMostRecentTradeOpen': 2,
 'AverageMInFile': 26,
 'NumSatisfactoryTrades': 15,
 'NumTrades60Ever2DerogPubRec': 1,
 'NumTrades90Ever2DerogPubRec': 1,
 'PercentTradesNeverDelq': 100,
 'MSinceMostRecentDelq': -7,
 'MaxDelq2PublicRecLast12M': 7,
 'MaxDelqEver': 8,
 'NumTotalTrades': 17,
 'NumTradesOpeninLast12M': 2,
 'PercentInstallTrades': 25,
 'MSinceMostRecentInqexcl7days': 0,
 'NumInqLast6M': 4,
 'NumInqLast6Mexcl7days': 4,
 'NetFractionRevolvingBurden': 31,
 'NetFractionInstallBurden': 83,
 'NumRevolvingTradesWBalance': 5,
 'NumInstallTradesWBalance': 1,
 'NumBank2NatlTradesWHighUtilization': 0,
 'PercentTradesWBalance': 75}

Check the model's output for $X$

In [9]:
model.predict_proba(X.to_numpy().reshape(1, -1))

array([[0.7499573 , 0.25004265]], dtype=float32)

TrustyAI wrapper for the XGBoost model

In [10]:
from org.kie.kogito.explainability.model import PredictionInput, PredictionOutput
from trustyai.model import output
import numpy as np

TARGET = "RiskPerformance"

def predict(inputs):
    values = [_feature.value.as_number() for _feature in inputs[0].features]
    result = model.predict_proba(np.array([values]))
    bad_prob, good_prob = result[0]
    if bad_prob > good_prob:
        _prediction = (0, bad_prob)
    else:
        _prediction = (1, good_prob)
    _output = output(name=TARGET, dtype="number", value=_prediction[0], score=_prediction[1])
    return [PredictionOutput([_output])]

In [11]:
from trustyai.model import Model

provider = Model(predict)

Build input features with a fixed search bound of $[-20, 1000]$

In [12]:
from trustyai.model import feature

input_feature = []
input_dict = X.to_dict()
for name in input_dict:
    input_feature.append(
        feature(name=name, value=input_dict[name], dtype="number",
                domain=(-20, 1000))
    )

In [13]:
predict([PredictionInput(input_feature)])[0].outputs.get(0).toString()

'Output{value=0, type=number, score=0.7499573230743408, name='RiskPerformance'}'

Define the counterfactual goal as the label `1` ("Good" assessment)

In [14]:
goal = [output(name=TARGET, dtype="number", value=1)]

In [15]:
goal[0].toString()

'Output{value=1, type=number, score=1.0, name='RiskPerformance'}'

In [16]:
from trustyai.model import counterfactual_prediction

prediction = counterfactual_prediction(
    input_features=input_feature,
    outputs=goal)

Create the explainer with a maximum of $100,000$ iterations.

In [17]:
from trustyai.explainers import CounterfactualExplainer

explainer = CounterfactualExplainer(steps=100_000)

Bad value in file '/Users/rui/.matplotlib/matplotlibrc', line 1 ('backend: inline'): Key backend: 'inline' is not a valid value for backend; supported values are ['GTK3Agg', 'GTK3Cairo', 'GTK4Agg', 'GTK4Cairo', 'MacOSX', 'nbAgg', 'QtAgg', 'QtCairo', 'Qt5Agg', 'Qt5Cairo', 'TkAgg', 'TkCairo', 'WebAgg', 'WX', 'WXAgg', 'WXCairo', 'agg', 'cairo', 'pdf', 'pgf', 'ps', 'svg', 'template']
SLF4J: Failed to load class "org.slf4j.impl.StaticLoggerBinder".
SLF4J: Defaulting to no-operation (NOP) logger implementation
SLF4J: See http://www.slf4j.org/codes.html#StaticLoggerBinder for further details.


Run the explainer

In [18]:
%%time

explanation = explainer.explain(prediction, provider)

CPU times: user 10min 24s, sys: 22min 52s, total: 33min 17s
Wall time: 3min 54s


Extract the features from the counterfactual explanation

In [19]:
expl_features = [e.asFeature() for e in explanation.entities]

In [26]:
cf_p = predict([PredictionInput(expl_features)])[0].outputs.get(0)
cf_p = {
    "GoalValue": [cf_p.getValue().asNumber()],
    "GoalName": [str(cf_p.getName())],
    "GoalScore": [cf_p.getScore()],
}
print(cf_p)

{'GoalValue': [1.0], 'GoalName': ['RiskPerformance'], 'GoalScore': [0.557973325252533]}


In [27]:
def show_changes(explanation, original):
    entities = explanation.entities
    N = len(original)
    for i in range(N):
        name = original[i].name
        original_value = original[i].value.as_number()
        new_value = entities[i].as_feature().value.as_number()
        if original_value != new_value:
            print(f"Feature '{name}': {original_value} -> {new_value}")


show_changes(explanation, input_feature)

Feature 'NumTrades60Ever2DerogPubRec': 1.0 -> 0.0
Feature 'MaxDelq2PublicRecLast12M': 7.0 -> 6.0
Feature 'MSinceMostRecentInqexcl7days': 0.0 -> -8.0
Feature 'NetFractionRevolvingBurden': 31.0 -> 32.0
Feature 'NumRevolvingTradesWBalance': 5.0 -> 4.0
Feature 'NumInstallTradesWBalance': 1.0 -> 2.0
Feature 'NumBank2NatlTradesWHighUtilization': 0.0 -> 1.0
Feature 'PercentTradesWBalance': 75.0 -> 76.0


In [28]:
d_cf = {f"Cf{f.name}": [f.value.as_number()] for f in expl_features}

In [29]:
import utils

cf_df = utils.save_result(
    original=X, cf=d_cf, score=cf_p, method=["TrustyAI"], model=["XGBoost"]
)
cf_df.T

Unnamed: 0,0
ExternalRiskEstimate,65
MSinceOldestTradeOpen,43
MSinceMostRecentTradeOpen,2
AverageMInFile,26
NumSatisfactoryTrades,15
NumTrades60Ever2DerogPubRec,1
NumTrades90Ever2DerogPubRec,1
PercentTradesNeverDelq,100
MSinceMostRecentDelq,-7
MaxDelq2PublicRecLast12M,7


## MLP/Adam

In [30]:
cf_df.to_csv("../../results/cf-trustyai.csv")