In [44]:
import tensorflow as tf
tf.compat.v1.disable_eager_execution()
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
import alibi
print(alibi.__version__)
from alibi.explainers import CEM
#models
from sklearn.ensemble import RandomForestClassifier
from sklearn import svm
from xgboost import XGBClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.neural_network import MLPClassifier

0.9.6


In [45]:
dataset = load_breast_cancer()
X = dataset.data
y = dataset.target
feature_names = dataset.feature_names

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [46]:
def get_models():
    return {
        "RandomForest": lambda: RandomForestClassifier(n_estimators=100, max_depth=5, random_state=42),
        "XGBoost": lambda: XGBClassifier(n_estimators=2, max_depth=2, learning_rate=1, objective='binary:logistic'),
        "AdaBoost": lambda: AdaBoostClassifier(n_estimators=100),
        "SVM": lambda: svm.SVC(kernel='linear', probability=True, random_state=42),
        "MLP": lambda: MLPClassifier(hidden_layer_sizes=(100,), activation='relu', solver='adam', max_iter=500, random_state=42)
    }

def apply_cem(instance: np.ndarray, predict_fn, X_train) -> np.ndarray:
    cem = CEM(
        predict_fn,
        mode='PN',
        shape=instance.shape,
        kappa=0.2,
        beta=0.01,
        gamma=0.0,
        c_init=10.0,
        c_steps=10,
        max_iterations=1000,
        feature_range=(X_train.min(axis=0), X_train.max(axis=0)),
        clip=(X_train.min(axis=0), X_train.max(axis=0)),
        learning_rate_init=1e-2
    )
    cem.fit(instance)
    explanation = cem.explain(instance)

    return explanation

def predict(x: np.ndarray, model) -> np.ndarray:
    preds = model.predict_proba(x)
    return np.atleast_2d(preds)

def run_cem():
    for model_name, model in get_models().items():
        model = model()
        model.fit(X_train, y_train)

        res = []
        test = [10, 20, 30]
        for i in test: # size: 569
            instance = X_test[i].reshape(1, -1)
            original_pred = predict(instance, model).argmax(axis=1)[0]
            original_prob = predict(instance, model)[0][original_pred]

            cem_explanation = apply_cem(instance, lambda x: predict(x, model), X_train)

            counterfactual = cem_explanation.PN if cem_explanation.PN is not None else instance
            success = 1 if cem_explanation.PN is not None else 0

            cf_pred = predict(counterfactual, model).argmax(axis=1)[0]
            cf_prob = predict(counterfactual, model)[0][cf_pred]

            row = {
                "model": model_name,
                "success": success,
                "original_pred": original_pred,
                "original_prob": original_prob,
                "cf_pred": cf_pred,
                "cf_prob": cf_prob,
            }
            diff = (counterfactual - instance)[0]
            for j, name in enumerate(feature_names):
                row[f"Δ_{name}"] = diff[j]

            res.append(row)

        df = pd.DataFrame(res)
        df.to_csv(f"cem_{model_name}_breastcancer.csv", index=False)



In [47]:
if __name__ == "__main__":
    run_cem()

No PN found!
No PN found!
No PN found!
No PN found!
No PN found!
No PN found!
No PN found!
No PN found!
No PN found!
