# How to contribute?

Here we report a simple guide with the key features of RobustX in order to implement your own (robust) counterfactual explanation method.

# Setup preparation

In [1]:
# Import necessary components
from robustx.lib.models.BaseModel import BaseModel
from sklearn.model_selection import train_test_split
from robustx.lib.models.pytorch_models.SimpleNNModel import SimpleNNModel
from robustx.datasets.ExampleDatasets import get_example_dataset
from robustx.lib.tasks.ClassificationTask import ClassificationTask
from robustx.generators.CE_methods.Wachter import Wachter
import torch
from sklearn.metrics import accuracy_score, classification_report
import numpy as np
import pandas as pd
from robustx.generators.CEGenerator import CEGenerator
from robustx.generators.robust_CE_methods.EntropicRiskCE import EntropicRiskCE


class EnsembleModelWrapper(BaseModel):
    def __init__(self, model_ensemble: list[SimpleNNModel], aggregation_method: str = 'majority_vote'):
        super().__init__(EnsembleModelWrapper)
        self.model_ensemble = model_ensemble
        self.pt_model_ensemble = [model._model for model in model_ensemble]
        self.aggregation_method = aggregation_method
    
    def train(self, X: pd.DataFrame, y: pd.DataFrame) -> None:
        print("Training should be done on individual models in the ensemble.")
    
    def predict(self, X: pd.DataFrame) -> pd.DataFrame:
        device = next(self.pt_model_ensemble[0].parameters()).device
        X_tensor = torch.Tensor(X.to_numpy()).to(device)
        preds_ensemble = []
        for model in self.pt_model_ensemble:
            model.eval()
            with torch.no_grad():
                outputs = model(X_tensor).cpu().numpy()
                preds_ensemble.append(outputs)
        preds_ensemble = np.array(preds_ensemble)  # Shape: (n_models, n_samples, n_classes)
        if self.aggregation_method == 'majority_vote':
            final_preds = np.round(np.mean(preds_ensemble, axis=0)) # Shape: (n_samples, n_classes)
        else:
            raise ValueError(f"Unknown aggregation method: {self.aggregation_method}")
        predictions = final_preds.astype(int)
        
        return pd.DataFrame(predictions, columns=['prediction'], index=X.index)
    
    def predict_single(self, X: pd.DataFrame) -> int:
        return self.predict(X).values.item()
    
    def predict_ensemble_proba_tensor(self, X: torch.Tensor) -> torch.Tensor:
        device = next(self.pt_model_ensemble[0].parameters()).device
        X = X.to(device)
        probs_ensemble = []
        for model in self.pt_model_ensemble:
            model.eval()
            outputs = model(X)
            probs_ensemble.append(outputs)
        probs_ensemble = torch.stack(probs_ensemble, dim=0)  # Shape: (n_models, n_samples, n_classes)
        return probs_ensemble
    
    def predict_proba(self, X: pd.DataFrame) -> pd.DataFrame:
        X_tensor = torch.Tensor(X.to_numpy())
        probs_ensemble = self.predict_ensemble_proba_tensor(X_tensor).numpy()  # Shape: (n_models, n_samples, n_classes)
        if self.aggregation_method == 'majority_vote':
            aggregated_probs = np.mean(probs_ensemble, axis=0)
        return pd.DataFrame(aggregated_probs, columns=[f'class_{i}' for i in range(aggregated_probs.shape[1])], index=X.index)
        
    def predict_proba_tensor(self, X: torch.Tensor) -> torch.Tensor:
        X_numpy = X.numpy()
        probabilities = self.predict_proba(X_numpy)
        return torch.tensor(probabilities)
    
    def evaluate(self, X: pd.DataFrame, y: pd.DataFrame):
        y_pred = self.predict(X)
        accuracy = accuracy_score(y, y_pred)
        report = classification_report(y, y_pred)
        return {
            'accuracy': accuracy,
            'classification_report': report
        }
    
    def compute_accuracy(self, X_test, y_test):            
        return self.evaluate(X_test, y_test)['accuracy']

In [8]:
# Load and preprocess dataset
dl = get_example_dataset("iris")
dl.preprocess(
    impute_strategy_numeric='mean',  # Impute missing numeric values with mean
    scale_method='minmax',           # Apply min-max scaling
    encode_categorical=False         # No categorical encoding needed (since no categorical features)
)

# remove the target column from the dataset that has labels 2
dl.data = dl.data[dl.data['target'] != 2]

# Load model, note some RecourseGenerators may only work with a certain type of model,
# e.g., MCE only works with a SimpleNNModel
n_models = 10
model_ensemble = [SimpleNNModel(4, [10], 1, seed=0) for _ in range(n_models)]

target_column = "target"
X_train, X_test, y_train, y_test = train_test_split(dl.data.drop(columns=[target_column]), dl.data[target_column], test_size=0.35, random_state=0)


# Train each model in the ensemble
all_indexes = np.arange(X_train.shape[0])
for model in model_ensemble:
    np.random.shuffle(all_indexes)
    sampled_indexes = all_indexes[:int(0.8 * len(all_indexes))]
    model.train(X_train.iloc[sampled_indexes], y_train.iloc[sampled_indexes], epochs=100, batch_size=16, verbose=0)
    print(f"model accuracy: {model.compute_accuracy(X_test.values, y_test.values):0.4f}")

emodel = EnsembleModelWrapper(model_ensemble=model_ensemble, aggregation_method='majority_vote')
print(f"ensemble accuracy: {emodel.compute_accuracy(X_test, y_test):0.4f}")


# Create task
task = ClassificationTask(emodel, dl)

model accuracy: 0.5143
model accuracy: 1.0000
model accuracy: 0.6000
model accuracy: 0.5143
model accuracy: 1.0000
model accuracy: 0.4286
model accuracy: 0.4286
model accuracy: 0.5143
model accuracy: 0.5714
model accuracy: 0.4571
ensemble accuracy: 0.6571


# Example of an already implemented CE generation method in RobustX

In [9]:
# Each counterfactual explanation generator takes the task on creation, it can also take a custom distance function, but for now we will use the default one.
ce_gen = EntropicRiskCE(task)
base_cf_gen_class = Wachter
base_cf_gen_args = {
    'target_class': 1,
    'max_iter': 100,
    'lr': 0.01,
    'lambda_param': 0.1,
    'device': 'cpu'
}

# Get negative instances, the default column_name is always "target" but you can set it to the name of your dataset's target variable
negs = dl.get_negative_instances(neg_value=0, column_name="target")
print("Negative instances shape: ", negs.shape)
print(f"Example of a prediction for a negative instance:\n")
print(negs.head(1))
print("Output: ", emodel.predict(negs.head(1)).values.item())
print("Class: ", int(emodel.predict(negs.head(1)).values.item() > 0.5))  # Assuming binary classification with threshold 0.5

# You can generate for a set of instances stored in a DataFrame
print("\nGenerating counterfactual explanations using STCE for the first 5 negative instances:")
ce = ce_gen.generate_for_instance(negs.iloc[0],
                                  base_cf_gen_class=base_cf_gen_class,
                                  base_cf_gen_args=base_cf_gen_args,
                                  verbose=True, 
                                  device='cpu')
print(ce)
print("Output: ", model.predict(ce).values.item())
print("Class: ", int(model.predict(ce).values.item() > 0.5))  # Assuming binary classification with threshold 0.5

Negative instances shape:  (50, 4)
Example of a prediction for a negative instance:

   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)
0           0.222222             0.625           0.067797          0.041667
Output:  0
Class:  0

Generating counterfactual explanations using STCE for the first 5 negative instances:
Iteration 01: Entropic risk = 0.4783
Current CE: [[ 0.48739973  0.3623934   0.32340688 -0.22069548]]
   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)
0             0.4874          0.362393           0.323407         -0.220695
Output:  0.5105839967727661
Class:  1


In [10]:
# You can also implement a method to generate CEs for all the negative instance in one shot
ces = ce_gen.generate_for_all(neg_value=0, 
                              column_name="target", 
                              base_cf_gen_class=base_cf_gen_class, 
                              base_cf_gen_args=base_cf_gen_args, 
                              device='cpu')
print("All outputs are positive? ", np.all(model.predict(ces)>0.5))

All outputs are positive?  False


# Benchmarking your method

After you have finished implementing your method, you can include it into DefaultBenchmark.py file and test it against other methods supported in the library using this lines of code:

In [14]:
from robustx.lib.DefaultBenchmark import default_benchmark
methods = ["KDTreeNNCE", "EntropicRiskCE"]
evaluations = ["Validity", "Distance"]
default_benchmark(task, methods, evaluations, 
                  neg_value=0, 
                  column_name="target", 
                  delta=0.005,
                  base_cf_gen_class=base_cf_gen_class,
                  base_cf_gen_args=base_cf_gen_args,
                  tau=0.8,
                  device='cpu')

+----------------+----------------------+------------+------------+
| Method         |   Execution Time (s) |   Validity |   Distance |
| KDTreeNNCE     |             0.147826 |   1        |   0.447422 |
+----------------+----------------------+------------+------------+
| EntropicRiskCE |             0.654669 |   0.617284 |   0.270493 |
+----------------+----------------------+------------+------------+
