In [None]:
import tensorflow as tf
tf.compat.v1.disable_eager_execution()
import numpy as np
import pandas as pd
import shap
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
from alibi.explainers import CounterFactual

In [2]:
iris = load_iris(as_frame=True)
X = iris['data']
y = iris['target']
feature_names = iris['feature_names']
target_names = iris['target_names']
X.shape, y.shape

((150, 4), (150,))

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [4]:
model = XGBClassifier()
model.fit(X_train, y_train)
model.score(X_train, y_train), model.score(X_test, y_test)

(1.0, 1.0)

In [5]:
instance_to_explain = X_test.iloc[[0]]
original_prediction_idx = model.predict(instance_to_explain)[0]
instance_to_explain, original_prediction_idx

(     sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)
 114                5.8               2.8                5.1               2.4,
 2)

In [6]:
target_prediction = 1

In [None]:
cf_explainer = CounterFactual(model.predict_proba, shape=(1, X_train.shape[1]), target_class=target_prediction)
cf_explainer.fit(X_train.values, y_train)



Counterfactual(meta={
  'name': 'Counterfactual',
  'type': ['blackbox', 'tensorflow', 'keras'],
  'explanations': ['local'],
  'params': {
              'shape': (1, 4),
              'distance_fn': 'l1',
              'target_proba': 1.0,
              'target_class': 1,
              'max_iter': 1000,
              'early_stop': 50,
              'lam_init': 0.1,
              'max_lam_steps': 10,
              'tol': 0.05,
              'learning_rate_init': 0.1,
              'eps': 0.01,
              'init': 'identity',
              'decay': True,
              'write_dir': None,
              'debug': False,
              'feature_range': (-10000000000.0, 10000000000.0),
              'is_model': False}
            ,
  'version': '0.9.6'}
)

In [12]:
explanation = cf_explainer.explain(instance_to_explain.values)
explanation

No appropriate lambda range found, try decreasing lam_init


Explanation(meta={
  'name': 'Counterfactual',
  'type': ['blackbox', 'tensorflow', 'keras'],
  'explanations': ['local'],
  'params': {
              'shape': (1, 4),
              'distance_fn': 'l1',
              'target_proba': 1.0,
              'target_class': 1,
              'max_iter': 1000,
              'early_stop': 50,
              'lam_init': 0.1,
              'max_lam_steps': 10,
              'tol': 0.05,
              'learning_rate_init': 0.1,
              'eps': 0.01,
              'init': 'identity',
              'decay': True,
              'write_dir': None,
              'debug': False,
              'feature_range': (-10000000000.0, 10000000000.0),
              'is_model': False}
            ,
  'version': '0.9.6'}
, data={
  'cf': None,
  'all': {0: [], 1: [], 2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: []},
  'orig_class': 2,
  'orig_proba': 0.9886096715927124,
  'success': None}
)

In [13]:
if explanation.cf is None:
    print("\n지정된 조건 내에서 카운터팩추얼을 찾지 못했습니다.")
else:
    counterfactual = explanation.cf['X']
    cf_prediction_idx = model.predict(counterfactual)[0]

    print(f"\n카운터팩추얼의 예측: Class {cf_prediction_idx} ({target_names[cf_prediction_idx]})")
    print("\n--- 예측을 바꾸기 위해 변경된 피처 값 ---")

    diff = counterfactual - instance_to_explain.values
    changed_indices = np.where(np.abs(diff[0]) > 1e-4)[0]

    if len(changed_indices) == 0:
        print("최소한의 변경으로 예측을 바꿀 수 없어, 원본과 동일한 값이 반환되었습니다.")
    else:
        for idx in changed_indices:
            print(f"- {feature_names[idx]}: {instance_to_explain.values[0, idx]:.2f}  ->  {counterfactual[0, idx]:.2f}")


지정된 조건 내에서 카운터팩추얼을 찾지 못했습니다.


In [16]:
# --- 1. 데이터 로드 및 모델 학습 ---
iris = load_iris()
X = pd.DataFrame(iris.data, columns=iris.feature_names)
y = iris.target
feature_names = X.columns.tolist()
target_names = iris.target_names

# 훈련/테스트 데이터 분리
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# XGBoost 다중 클래스 분류 모델 학습
model = XGBClassifier(random_state=42)
model.fit(X_train, y_train)


# --- 2. 설명할 인스턴스 선택 ---
instance_to_explain = X_test.iloc[[0]]
original_prediction_idx = model.predict(instance_to_explain)[0]
print(f"원본 데이터의 예측: Class {original_prediction_idx} ({target_names[original_prediction_idx]})")
print("="*50)


# --- 3. Alibi 카운터팩추얼 설명기 생성 ---
target_prediction = 2

cf_explainer = CounterFactual(
    model.predict_proba,
    shape=(1, X_train.shape[1]),
    target_class=target_prediction
)

# --- 이 부분이 수정되었습니다 ---
# 데이터 분포 학습 시 y_train도 함께 전달
cf_explainer.fit(X_train.values, y_train)


# --- 4. 카운터팩추얼 설명 생성 ---
print("카운터팩추얼을 탐색합니다...")
explanation = cf_explainer.explain(instance_to_explain.values)


# --- 5. 결과 확인 ---
if explanation.cf is None:
    print("\n지정된 조건 내에서 카운터팩추얼을 찾지 못했습니다.")
else:
    counterfactual = explanation.cf['X']
    cf_prediction_idx = model.predict(counterfactual)[0]

    print(f"\n카운터팩추얼의 예측: Class {cf_prediction_idx} ({target_names[cf_prediction_idx]})")
    print("\n--- 예측을 바꾸기 위해 변경된 피처 값 ---")

    diff = counterfactual - instance_to_explain.values
    changed_indices = np.where(np.abs(diff[0]) > 1e-4)[0]

    if len(changed_indices) == 0:
        print("최소한의 변경으로 예측을 바꿀 수 없어, 원본과 동일한 값이 반환되었습니다.")
    else:
        for idx in changed_indices:
            print(f"- {feature_names[idx]}: {instance_to_explain.values[0, idx]:.2f}  ->  {counterfactual[0, idx]:.2f}")

원본 데이터의 예측: Class 1 (versicolor)
카운터팩추얼을 탐색합니다...





카운터팩추얼의 예측: Class 2 (virginica)

--- 예측을 바꾸기 위해 변경된 피처 값 ---
- sepal length (cm): 6.10  ->  6.10
- sepal width (cm): 2.80  ->  2.80
- petal length (cm): 4.70  ->  5.21
