In [9]:
import warnings
warnings.filterwarnings('ignore')
import tensorflow as tf
tf.compat.v1.disable_eager_execution()
import numpy as np
import pandas as pd
import shap
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris, load_breast_cancer
from alibi.explainers import Counterfactual

In [43]:
tf.compat.v1.reset_default_graph()

iris = load_iris(as_frame=True)
X = iris['data']
y = iris['target']
feature_names = iris['feature_names']
target_names = iris['target_names']
X.shape, y.shape

((150, 4), (150,))

In [44]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [45]:
model = XGBClassifier()
model.fit(X_train, y_train)
model.score(X_train, y_train), model.score(X_test, y_test)

(1.0, 1.0)

In [46]:
instance_to_explain = X_test.iloc[[0]]
original_prediction_idx = model.predict(instance_to_explain)[0]
instance_to_explain, original_prediction_idx, target_names[original_prediction_idx]

(    sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)
 73                6.1               2.8                4.7               1.2,
 1,
 'versicolor')

In [47]:
target_prediction = 2

cf_explainer = Counterfactual(model.predict_proba, shape=(1, X_test.shape[1]), target_class=target_prediction)
                              #tol=0.05, max_iter=1000, lam_init=0.01, max_lam_steps=10)
cf_explainer.fit(X_train.values, y_train.values)

Counterfactual(meta={
  'name': 'Counterfactual',
  'type': ['blackbox', 'tensorflow', 'keras'],
  'explanations': ['local'],
  'params': {
              'shape': (1, 4),
              'distance_fn': 'l1',
              'target_proba': 1.0,
              'target_class': 2,
              'max_iter': 1000,
              'early_stop': 50,
              'lam_init': 0.1,
              'max_lam_steps': 10,
              'tol': 0.05,
              'learning_rate_init': 0.1,
              'eps': 0.01,
              'init': 'identity',
              'decay': True,
              'write_dir': None,
              'debug': False,
              'feature_range': (-10000000000.0, 10000000000.0),
              'is_model': False}
            ,
  'version': '0.9.6'}
)

In [48]:
explanation = cf_explainer.explain(instance_to_explain.values)
explanation

Explanation(meta={
  'name': 'Counterfactual',
  'type': ['blackbox', 'tensorflow', 'keras'],
  'explanations': ['local'],
  'params': {
              'shape': (1, 4),
              'distance_fn': 'l1',
              'target_proba': 1.0,
              'target_class': 2,
              'max_iter': 1000,
              'early_stop': 50,
              'lam_init': 0.1,
              'max_lam_steps': 10,
              'tol': 0.05,
              'learning_rate_init': 0.1,
              'eps': 0.01,
              'init': 'identity',
              'decay': True,
              'write_dir': None,
              'debug': False,
              'feature_range': (-10000000000.0, 10000000000.0),
              'is_model': False}
            ,
  'version': '0.9.6'}
, data={
  'cf': {
          'X': array([[6.1009345, 2.7996817, 5.208495 , 1.2      ]], dtype=float32),
          'distance': 0.5097482204437256,
          'lambda': 0.07525000000000001,
          'index': 1864,
          'class': 2,
          '

In [49]:
cf = explanation.cf['X']
explanation.cf, cf, model.predict(cf)[0]

({'X': array([[6.1009345, 2.7996817, 5.208495 , 1.2      ]], dtype=float32),
  'distance': 0.5097482204437256,
  'lambda': 0.07525000000000001,
  'index': 1864,
  'class': 2,
  'proba': array([[0.0049561 , 0.03232411, 0.9627198 ]], dtype=float32),
  'loss': 0.039748367042984345},
 array([[6.1009345, 2.7996817, 5.208495 , 1.2      ]], dtype=float32),
 2)

In [50]:
diff = cf - instance_to_explain.values
diff

array([[ 9.34505463e-04, -3.18336487e-04,  5.08495140e-01,
         4.76837159e-08]])

In [51]:
changed_indices = np.where(np.abs(diff[0]) > 1e-2)[0]
changed_indices

array([2], dtype=int64)

In [52]:
for idx in changed_indices:
    print(f"- {feature_names[idx]}: {instance_to_explain.values[0, idx]:.2f}  ->  {cf[0, idx]:.2f}")

- petal length (cm): 4.70  ->  5.21


In [53]:
explanation.all

{0: [{'X': array([[6.3229117, 2.5853248, 5.2001143, 1.2      ]], dtype=float32),
   'distance': 0.9377014636993408,
   'lambda': 0.0505,
   'index': 11,
   'class': 2,
   'proba': array([[0.00318627, 0.0168295 , 0.9799842 ]], dtype=float32),
   'loss': 0.04775455521199902},
  {'X': array([[6.3437514, 2.5691135, 5.2197075, 1.2      ]], dtype=float32),
   'distance': 0.9943456649780273,
   'lambda': 0.0505,
   'index': 12,
   'class': 2,
   'proba': array([[0.00318627, 0.0168295 , 0.9799842 ]], dtype=float32),
   'loss': 0.05061508737657269},
  {'X': array([[6.361905 , 2.5553868, 5.2366757, 1.2      ]], dtype=float32),
   'distance': 1.04319429397583,
   'lambda': 0.0505,
   'index': 13,
   'class': 2,
   'proba': array([[0.00318627, 0.0168295 , 0.9799842 ]], dtype=float32),
   'loss': 0.05308194314096173},
  {'X': array([[6.3776603, 2.5438845, 5.251298 , 1.2      ]], dtype=float32),
   'distance': 1.0850739479064941,
   'lambda': 0.0505,
   'index': 14,
   'class': 2,
   'proba': array(

In [54]:
tf.compat.v1.reset_default_graph()

# --- 1. 데이터 로드 및 모델 학습 ---
cancer = load_breast_cancer()
X = pd.DataFrame(cancer.data, columns=cancer.feature_names)
y = cancer.target
feature_names = X.columns.tolist()
target_names = cancer.target_names

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 이진 분류 모델 학습
model = XGBClassifier(random_state=42)
model.fit(X_train, y_train)


# --- 2. 설명할 인스턴스 선택 ---
# 예시: 모델이 '악성(Malignant)'(Class 0)으로 예측한 첫 번째 테스트 데이터
instance_to_explain = X_test.iloc[[0]]
original_prediction_idx = model.predict(instance_to_explain)[0]
print(f"원본 데이터의 예측: Class {original_prediction_idx} ({target_names[original_prediction_idx]})")
print("="*50)


# --- 3. Alibi 카운터팩추얼 설명기 생성 ---
# 목표: 예측을 '양성(Benign)'(Class 1)으로 바꾸고 싶다
target_prediction = 0

cf_explainer = Counterfactual(
    model.predict_proba,
    shape=(1, X_train.shape[1]),
    target_class='other',
    tol=0.05,
    max_iter=10000,
    lam_init=0.001,
    max_lam_steps=10,
    learning_rate_init=0.01
)

# 데이터 분포 학습
cf_explainer.fit(X_train.values, y_train)


# --- 4. 카운터팩추얼 설명 생성 ---
print("카운터팩추얼을 탐색합니다...")
# DataFrame을 NumPy 배열로 변환하여 전달
explanation = cf_explainer.explain(instance_to_explain.values)


# --- 5. 결과 확인 ---
if explanation.cf is None:
    print("\n지정된 조건 내에서 카운터팩추얼을 찾지 못했습니다.")
else:
    counterfactual = explanation.cf['X']
    cf_prediction_idx = model.predict(counterfactual)[0]

    print(f"\n카운터팩추얼의 예측: Class {cf_prediction_idx} ({target_names[cf_prediction_idx]})")
    print("\n--- 예측을 바꾸기 위해 변경된 피처 값 ---")

    diff = counterfactual - instance_to_explain.values
    changed_indices = np.where(np.abs(diff[0]) > 1e-4)[0]

    if len(changed_indices) == 0:
        print("최소한의 변경으로 예측을 바꿀 수 없어, 원본과 동일한 값이 반환되었습니다.")
    else:
        for idx in changed_indices:
            print(f"- {feature_names[idx]}: {instance_to_explain.values[0, idx]:.2f}  ->  {counterfactual[0, idx]:.2f}")

원본 데이터의 예측: Class 1 (benign)
카운터팩추얼을 탐색합니다...


No appropriate lambda range found, try decreasing lam_init



지정된 조건 내에서 카운터팩추얼을 찾지 못했습니다.


In [55]:
explanation.all

{0: [], 1: [], 2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: []}