# How to contribute?

Here we report a simple guide with the key features of RobustX in order to implement your own (robust) counterfactual explanation method.

# Setup preparation

In [2]:
# Import necessary components
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
import torch

from robustx.lib.models.pytorch_models.SimpleNNModel import SimpleNNModel
from robustx.datasets.ExampleDatasets import get_example_dataset
from robustx.lib.tasks.ClassificationTask import ClassificationTask
from robustx.generators.CE_methods.Wachter import Wachter
from robustx.generators.CEGenerator import CEGenerator
from robustx.generators.robust_CE_methods.EntropicRiskCE import EntropicRiskCE
from robustx.lib.models.pytorch_models.EnsembleModelWrapper import EnsembleModelWrapper




In [3]:
# Load and preprocess dataset
dl = get_example_dataset("iris")
dl.preprocess(
    impute_strategy_numeric='mean',  # Impute missing numeric values with mean
    scale_method='minmax',           # Apply min-max scaling
    encode_categorical=False         # No categorical encoding needed (since no categorical features)
)

# remove the target column from the dataset that has labels 2
dl.data = dl.data[dl.data['target'] != 2]

# Load model, note some RecourseGenerators may only work with a certain type of model,
# e.g., MCE only works with a SimpleNNModel
n_models = 10
model_ensemble = [SimpleNNModel(4, [10], 2, seed=0) for _ in range(n_models)]

target_column = "target"
X_train, X_test, y_train, y_test = train_test_split(dl.data.drop(columns=[target_column]), dl.data[target_column], test_size=0.35, random_state=0)


# Train each model in the ensemble
all_indexes = np.arange(X_train.shape[0])
for model in model_ensemble:
    np.random.shuffle(all_indexes)
    sampled_indexes = all_indexes[:int(0.8 * len(all_indexes))]
    model.train(X_train.iloc[sampled_indexes], y_train.iloc[sampled_indexes], epochs=50, batch_size=16, verbose=0)
    print(f"model accuracy: {model.compute_accuracy(X_test.values, y_test.values):0.4f}")

emodel = EnsembleModelWrapper(
    model_ensemble=[simple_model._model for simple_model in model_ensemble],
    device='cpu',
    ensemble_type=EnsembleModelWrapper.ENSEMBLE_SINGLE_OUTPUT_SINGLE_TARGET_LIST,
    aggregation_method=EnsembleModelWrapper.AGGREGATION_MAJORITY_VOTE
)
preds = emodel.predict_tensor(torch.tensor(X_test.to_numpy(), dtype=torch.float), apply_softmax=True)
print(f"ensemble accuracy: {emodel.compute_accuracy(X_test, y_test):0.4f}")


# Create task
task = ClassificationTask(emodel, dl)

model accuracy: 0.9143
model accuracy: 0.9429
model accuracy: 0.9143
model accuracy: 0.9714
model accuracy: 0.9714
model accuracy: 0.8286
model accuracy: 0.9429
model accuracy: 0.9429
model accuracy: 0.9714
model accuracy: 0.8286
ensemble accuracy: 0.9429


# Example of an already implemented CE generation method in RobustX

In [4]:
emodel.predict(dl.data.drop(columns=['target']))

Unnamed: 0,prediction
0,0
1,0
2,0
3,0
4,0
...,...
95,1
96,1
97,1
98,1


In [5]:
# Each counterfactual explanation generator takes the task on creation, it can also take a custom distance function, but for now we will use the default one.


ce_gen = EntropicRiskCE(task)


# Get negative instances, the default column_name is always "target" but you can set it to the name of your dataset's target variable
# negs = dl.get_negative_instances(neg_value=0, column_name="target")
negs = dl.data.drop(columns=['target']).head(1)
print("Negative instances shape: ", negs.shape)
print(f"Example of a prediction for a negative instance:\n")
print(negs.head(1))
print("Output: ", emodel.predict(negs.head(1)).values.item())
print("Class: ", int(emodel.predict(negs.head(1)).values.item() > 0.5))  # Assuming binary classification with threshold 0.5

# You can generate for a set of instances stored in a DataFrame
print("\nGenerating counterfactual explanations using EntropicRiskCF for the first 1 negative instances:")
instance = negs.head(1)
ce = ce_gen.generate_for_instance(
    instance=instance,
    target_weights=None,
    tau=0.45,
    max_iter=50,
    device='cpu',
    verbose=True
)
print(ce)
print("Original Output: ", emodel.predict(instance).values.item())
print("CF Output: ", emodel.predict(ce).values.item())

Negative instances shape:  (1, 4)
Example of a prediction for a negative instance:

   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)
0           0.222222             0.625           0.067797          0.041667
Output:  0
Class:  0

Generating counterfactual explanations using EntropicRiskCF for the first 1 negative instances:
Iteration 01: Entropic risk = 0.5128
Current CE: [[0.21222223 0.635      0.07779661 0.05166667]]
Iteration 02: Entropic risk = 0.5105
Current CE: [[0.20222218 0.64500004 0.08779666 0.06166672]]
Iteration 03: Entropic risk = 0.5083
Current CE: [[0.19222207 0.65500015 0.09779678 0.07166684]]
Iteration 04: Entropic risk = 0.5060
Current CE: [[0.18222189 0.6650003  0.10779699 0.08166704]]
Iteration 05: Entropic risk = 0.5037
Current CE: [[0.17222165 0.67500055 0.11779728 0.09166731]]
Iteration 06: Entropic risk = 0.5014
Current CE: [[0.16222136 0.68500084 0.12779763 0.10166764]]
Iteration 07: Entropic risk = 0.4991
Current CE: [[0.15222105 0.

In [12]:
# You can also implement a method to generate CEs for all the negative instance in one shot
ces = ce_gen.generate_for_all(
                            target_weights=None,
                            tau=0.8,
                            max_iter=50,
                            device='cpu',
                            verbose=False
)
print("All query outputs are positive? ", np.all(emodel.predict(negs)>0.5))
print("All query outputs are negative? ", np.all(emodel.predict(negs)<0.5))
print("All CF outputs are positive? ", np.all(emodel.predict(ces)>0.5))

All query outputs are positive?  False
All query outputs are negative?  True
All CF outputs are positive?  False


# Benchmarking your method

After you have finished implementing your method, you can include it into DefaultBenchmark.py file and test it against other methods supported in the library using this lines of code:

In [14]:
from robustx.lib.DefaultBenchmark import default_benchmark
methods = ["KDTreeNNCE", "EntropicRiskCE"]
evaluations = ["Validity", "Distance"]
default_benchmark(task, methods, evaluations, 
                  neg_value=0, 
                  column_name="target", 
                  delta=0.005,
                  base_cf_gen_class=base_cf_gen_class,
                  base_cf_gen_args=base_cf_gen_args,
                  tau=0.8,
                  device='cpu')

+----------------+----------------------+------------+------------+
| Method         |   Execution Time (s) |   Validity |   Distance |
| KDTreeNNCE     |             0.147826 |   1        |   0.447422 |
+----------------+----------------------+------------+------------+
| EntropicRiskCE |             0.654669 |   0.617284 |   0.270493 |
+----------------+----------------------+------------+------------+
