# TrustyAI

## Load data

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import pandas as pd

df = pd.read_csv("../../datasets/FICO/heloc_dataset_v1.csv")

In [3]:
TARGET = 'RiskPerformance'

In [4]:
df[TARGET] = df[TARGET].factorize()[0]

In [5]:
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
RiskPerformance,10459.0,0.478057,0.499542,0.0,0.0,0.0,1.0,1.0
ExternalRiskEstimate,10459.0,67.425758,21.121621,-9.0,63.0,71.0,79.0,94.0
MSinceOldestTradeOpen,10459.0,184.205373,109.683816,-9.0,118.0,178.0,249.5,803.0
MSinceMostRecentTradeOpen,10459.0,8.543455,13.301745,-9.0,3.0,5.0,11.0,383.0
AverageMInFile,10459.0,73.843293,38.782803,-9.0,52.0,74.0,95.0,383.0
NumSatisfactoryTrades,10459.0,19.428052,13.004327,-9.0,12.0,19.0,27.0,79.0
NumTrades60Ever2DerogPubRec,10459.0,0.042738,2.51391,-9.0,0.0,0.0,1.0,19.0
NumTrades90Ever2DerogPubRec,10459.0,-0.142843,2.367397,-9.0,0.0,0.0,0.0,19.0
PercentTradesNeverDelq,10459.0,86.661536,25.999584,-9.0,87.0,96.0,100.0,100.0
MSinceMostRecentDelq,10459.0,6.762406,20.50125,-9.0,-7.0,-7.0,14.0,83.0


## Load model

In [6]:
from sklearn.model_selection import train_test_split

train, test = train_test_split(df, test_size=0.25, random_state=42)

In [7]:
train_x = train[df.columns[~train.columns.isin([TARGET])]]
train_y = train[TARGET]

In [8]:
from xgboost.sklearn import XGBClassifier

model = XGBClassifier()

In [9]:
model.load_model("../../models/xgboost.json")

## Counterfactuals search

### Selecting inputs

In [10]:
test_x = test[test.columns[~test.columns.isin([TARGET])]]
test_y = test[TARGET]

In [11]:
probabilities = model.predict_proba(test_x)

In [12]:
import numpy as np

bad_probs = np.abs(probabilities[:, 0] - 0.75)
index = bad_probs.argmin()

In [13]:
test_x.iloc[index].to_dict()

{'ExternalRiskEstimate': 65,
 'MSinceOldestTradeOpen': 43,
 'MSinceMostRecentTradeOpen': 2,
 'AverageMInFile': 26,
 'NumSatisfactoryTrades': 15,
 'NumTrades60Ever2DerogPubRec': 1,
 'NumTrades90Ever2DerogPubRec': 1,
 'PercentTradesNeverDelq': 100,
 'MSinceMostRecentDelq': -7,
 'MaxDelq2PublicRecLast12M': 7,
 'MaxDelqEver': 8,
 'NumTotalTrades': 17,
 'NumTradesOpeninLast12M': 2,
 'PercentInstallTrades': 25,
 'MSinceMostRecentInqexcl7days': 0,
 'NumInqLast6M': 4,
 'NumInqLast6Mexcl7days': 4,
 'NetFractionRevolvingBurden': 31,
 'NetFractionInstallBurden': 83,
 'NumRevolvingTradesWBalance': 5,
 'NumInstallTradesWBalance': 1,
 'NumBank2NatlTradesWHighUtilization': 0,
 'PercentTradesWBalance': 75}

In [14]:
model.predict_proba(test_x.iloc[[index]])

array([[0.7499573 , 0.25004265]], dtype=float32)

### Define problem

In [15]:
import trustyai
import os
import site

DEFAULT_DEP_PATH = os.path.join(site.getsitepackages()[0], "trustyai", "dep")

CORE_DEPS = [
    f"../../deps/*",
    f"{DEFAULT_DEP_PATH}/org/slf4j/slf4j-api/1.7.30/slf4j-api-1.7.30.jar",
    f"{DEFAULT_DEP_PATH}/org/apache/commons/commons-lang3/3.12.0/commons-lang3-3.12.0.jar",
    f"{DEFAULT_DEP_PATH}/org/optaplanner/optaplanner-core-impl/8.18.0.Final/"
    f"optaplanner-core-impl-8.18.0.Final.jar",
    f"{DEFAULT_DEP_PATH}/org/apache/commons/commons-math3/3.6.1/commons-math3-3.6.1.jar",
    f"{DEFAULT_DEP_PATH}/org/kie/kie-api/8.18.0.Beta/kie-api-8.18.0.Beta.jar",
    f"{DEFAULT_DEP_PATH}/io/micrometer/micrometer-core/1.8.2/micrometer-core-1.8.2.jar",
]

trustyai.init(path=CORE_DEPS)

In [16]:
from org.kie.kogito.explainability.model import PredictionInput, PredictionOutput
from trustyai.model import output
import numpy as np

def predict(inputs):
    values = [_feature.value.as_number() for _feature in inputs[0].features]
    result = model.predict_proba(np.array([values]))
    bad_prob, good_prob = result[0]
    if bad_prob > good_prob:
        _prediction = (0, bad_prob)
    else:
        _prediction = (1, good_prob)
    _output = output(name=TARGET, dtype="number", value=_prediction[0], score=_prediction[1])
    return [PredictionOutput([_output])]

In [17]:
from trustyai.model import Model

provider = Model(predict)

In [18]:
summary = df.describe().T

In [19]:
bounds = summary[['min', 'max']].to_dict()

In [20]:
from trustyai.model import feature

input_feature = []
input_dict = test_x.iloc[index].to_dict()
for name in input_dict:
    input_feature.append(
        feature(name=name, value=input_dict[name], dtype="number",
                domain=(bounds['min'][name], bounds['max'][name] + 20))
    )

In [21]:
predict([PredictionInput(input_feature)])[0].outputs.get(0).toString()

'Output{value=0, type=number, score=0.7499573230743408, name='RiskPerformance'}'

In [22]:
goal = [output(name=TARGET, dtype="number", value=1)]

In [23]:
goal[0].toString()

'Output{value=1, type=number, score=1.0, name='RiskPerformance'}'

In [24]:
from trustyai.model import counterfactual_prediction

prediction = counterfactual_prediction(
    input_features=input_feature,
    outputs=goal)

In [26]:
from trustyai.explainers import CounterfactualExplainer

explainer = CounterfactualExplainer(steps=10_000)

In [27]:
explanation = explainer.explain(prediction, provider)

In [28]:
expl_features = [e.asFeature() for e in explanation.entities]

In [29]:
predict([PredictionInput(expl_features)])[0].outputs.get(0).toString()

'Output{value=1, type=number, score=0.514225959777832, name='RiskPerformance'}'

In [30]:
def show_changes(explanation, original):
    entities = explanation.entities
    N = len(original)
    for i in range(N):
        name = original[i].name
        original_value = original[i].value.as_number()
        new_value = entities[i].as_feature().value.as_number()
        if original_value != new_value:
            print(f"Feature '{name}': {original_value} -> {new_value}")


show_changes(explanation, input_feature)

Feature 'ExternalRiskEstimate': 65.0 -> 78.0
