# AUCの計算と解釈

このノートブックでは、AUC（Area Under the Curve）の計算方法と解釈を学習します。


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_curve, auc, roc_auc_score

plt.rcParams['font.family'] = 'DejaVu Sans'
plt.rcParams['figure.figsize'] = (12, 8)

# データの準備
cancer = load_breast_cancer()
X, y = cancer.data, cancer.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

model = LogisticRegression(random_state=42, max_iter=1000)
model.fit(X_train_scaled, y_train)
y_pred_proba = model.predict_proba(X_test_scaled)[:, 1]

# AUCの計算
fpr, tpr, _ = roc_curve(y_test, y_pred_proba)
auc_manual = auc(fpr, tpr)
auc_sklearn = roc_auc_score(y_test, y_pred_proba)

print(f'=== AUCの計算 ===')
print(f'手動実装: {auc_manual:.4f}')
print(f'scikit-learn: {auc_sklearn:.4f}')
print(f'差: {abs(auc_manual - auc_sklearn):.6f}')

# AUCの可視化
plt.figure(figsize=(10, 8))
plt.plot(fpr, tpr, 'b-', lw=2, label=f'ROC curve')
plt.fill_between(fpr, tpr, alpha=0.3, label=f'AUC = {auc_sklearn:.3f}')
plt.plot([0, 1], [0, 1], 'k--', lw=2, label='Random')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve with AUC')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

print('=== AUCの解釈 ===')
print(f'AUC = {auc_sklearn:.4f}')
print('\n解釈:')
if auc_sklearn >= 0.9:
    print('  優れた分類器')
elif auc_sklearn >= 0.8:
    print('  良い分類器')
elif auc_sklearn >= 0.7:
    print('  まあまあの分類器')
elif auc_sklearn >= 0.6:
    print('  悪い分類器')
else:
    print('  ランダムな分類器')

print('\n確率的解釈:')
print(f'ランダムに選んだ正例と負例のペアで、')
print(f'正例のスコアが負例より高い確率: {auc_sklearn:.1%}')
