In [1]:
import tensorflow as tf
tf.compat.v1.disable_eager_execution()
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import alibi
print(alibi.__version__)
from alibi.explainers import CEM
#models
from sklearn.ensemble import RandomForestClassifier
from sklearn import svm
from xgboost import XGBClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.neural_network import MLPClassifier

2025-05-27 15:00:33.398452: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-05-27 15:00:33.612383: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-05-27 15:00:33.617246: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2025-05-27 15:00:33.617268: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc

0.9.6


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
dataset = load_breast_cancer()
X = dataset.data
y = dataset.target
feature_names = dataset.feature_names

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [5]:
def get_models():
    return {
        "RandomForest": lambda: RandomForestClassifier(n_estimators=100, max_depth=5, random_state=42),
        "XGBoost": lambda: XGBClassifier(n_estimators=2, max_depth=2, learning_rate=1, objective='binary:logistic'),
        "AdaBoost": lambda: AdaBoostClassifier(n_estimators=100),
        "SVM": lambda: svm.SVC(kernel='linear', probability=True, random_state=42),
        "MLP": lambda: MLPClassifier(hidden_layer_sizes=(100,), activation='relu', solver='adam', max_iter=500, random_state=42)
    }

def apply_cem(instance: np.ndarray, predict_fn, X_train) -> np.ndarray:
    cem = CEM(
        predict_fn,
        mode='PN',
        shape=instance.shape,
        kappa=0.2,
        beta=0.01,
        gamma=0.0,
        c_init=10.0,
        c_steps=10,
        max_iterations=1000,
        feature_range=(X_train.min(axis=0), X_train.max(axis=0)),
        clip=(X_train.min(axis=0), X_train.max(axis=0)),
        learning_rate_init=1e-2
    )
    cem.fit(instance)
    explanation = cem.explain(instance)

    return explanation

def predict(x: np.ndarray, model) -> np.ndarray:
    preds = model.predict_proba(x)
    return np.atleast_2d(preds)

def train_models():
    model_fns = get_models()
    trained_models = {}
    for name, model_fn in model_fns.items():
        model = model_fn()
        model.fit(X_train, y_train)
        trained_models[name] = model
    return trained_models

def run_cem():
    trained_models = train_models()

    for model_name, model in trained_models.items():
        res = []
        test = [10, 20, 30]
        for i in test: # size: 569
            instance = X_test[i].reshape(1, -1)
            original_pred = predict(instance, model).argmax(axis=1)[0]
            original_prob = predict(instance, model)[0][original_pred]

            cem_explanation = apply_cem(instance, lambda x: predict(x, model), X_train)

            counterfactual = cem_explanation.PN if cem_explanation.PN is not None else instance
            success = 1 if cem_explanation.PN is not None else 0

            cf_pred = predict(counterfactual, model).argmax(axis=1)[0]
            cf_prob = predict(counterfactual, model)[0][cf_pred]

            row = {
                "model": model_name,
                "success": success,
                "original_pred": original_pred,
                "original_prob": original_prob,
                "cf_pred": cf_pred,
                "cf_prob": cf_prob,
            }
            diff = (counterfactual - instance)[0]
            for j, name in enumerate(feature_names):
                row[f"Δ_{name}"] = diff[j]

            res.append(row)

        df = pd.DataFrame(res)
        df.to_csv(f"cem_{model_name}_breastcancer.csv", index=False)



In [6]:
if __name__ == "__main__":
    run_cem()




2025-05-27 15:01:58.516669: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-05-27 15:01:58.595637: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:357] MLIR V1 optimization pass is not enabled
No PN found!
No PN found!
No PN found!
No PN found!
No PN found!
No PN found!
No PN found!
No PN found!
No PN found!
