# F1-scoreの実装と活用

このノートブックでは、F1-scoreの理論から実装、活用方法まで詳しく学習します。

## 学習目標
- F1-scoreの理論的理解
- 手動実装とscikit-learnの比較
- 実データでのF1-score計算
- 実務での活用方法


In [None]:
# 必要なライブラリのインポート
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import make_classification, load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (f1_score, precision_score, recall_score,
                           accuracy_score, classification_report)
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

# 日本語フォントの設定
plt.rcParams['font.family'] = 'DejaVu Sans'
plt.rcParams['figure.figsize'] = (12, 8)
sns.set_style("whitegrid")


## 1. F1-scoreの理論


In [None]:
def f1_score_manual(y_true, y_pred):
    """
    F1-scoreの手動実装
    
    Parameters:
    y_true: 実際のラベル
    y_pred: 予測ラベル
    
    Returns:
    f1: F1-score
    """
    # PrecisionとRecallの計算
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    
    # F1-scoreの計算
    if precision + recall == 0:
        f1 = 0
    else:
        f1 = 2 * (precision * recall) / (precision + recall)
    
    return f1, precision, recall

def f_beta_score_manual(y_true, y_pred, beta=1):
    """
    F-beta scoreの手動実装
    
    Parameters:
    y_true: 実際のラベル
    y_pred: 予測ラベル
    beta: 重みパラメータ
    
    Returns:
    f_beta: F-beta score
    """
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    
    if precision + recall == 0:
        f_beta = 0
    else:
        f_beta = (1 + beta**2) * (precision * recall) / (beta**2 * precision + recall)
    
    return f_beta

# 簡単な例でF1-scoreを理解
y_true_example = np.array([1, 0, 1, 1, 0, 1, 0, 0, 1, 0])
y_pred_example = np.array([1, 0, 0, 1, 0, 1, 1, 0, 1, 0])

f1_manual, precision_manual, recall_manual = f1_score_manual(y_true_example, y_pred_example)
f1_sklearn = f1_score(y_true_example, y_pred_example)

print("=== F1-scoreの基本例 ===")
print(f"実際のラベル: {y_true_example}")
print(f"予測ラベル: {y_pred_example}")
print(f"\n手動実装:")
print(f"  Precision: {precision_manual:.4f}")
print(f"  Recall: {recall_manual:.4f}")
print(f"  F1-score: {f1_manual:.4f}")
print(f"\nscikit-learn:")
print(f"  F1-score: {f1_sklearn:.4f}")
print(f"  実装の一致: {abs(f1_manual - f1_sklearn) < 1e-10}")

# F-beta scoreの可視化
betas = np.arange(0.1, 3.0, 0.1)
f_beta_scores = [f_beta_score_manual(y_true_example, y_pred_example, beta) for beta in betas]

plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(betas, f_beta_scores, 'b-', linewidth=2, label='F-beta Score')
plt.axvline(x=1, color='r', linestyle='--', alpha=0.7, label='F1-score (β=1)')
plt.xlabel('Beta (β)')
plt.ylabel('F-beta Score')
plt.title('F-beta Score vs Beta')
plt.legend()
plt.grid(True, alpha=0.3)

plt.subplot(1, 2, 2)
plt.plot(betas, f_beta_scores, 'b-', linewidth=2, label='F-beta Score')
plt.axvline(x=1, color='r', linestyle='--', alpha=0.7, label='F1-score (β=1)')
plt.xlabel('Beta (β)')
plt.ylabel('F-beta Score')
plt.title('F-beta Score vs Beta (Zoomed)')
plt.xlim(0.5, 2.0)
plt.legend()
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print("\n=== F1-scoreの理論 ===")
print("F1-score = 2 × (Precision × Recall) / (Precision + Recall)")
print("F-beta score = (1 + β²) × (Precision × Recall) / (β² × Precision + Recall)")
print("\nβの意味:")
print("  β < 1: Precisionを重視")
print("  β = 1: PrecisionとRecallを等しく重視 (F1-score)")
print("  β > 1: Recallを重視")


## 2. 実データでのF1-score


In [None]:
# 乳がんデータセットでのF1-score
cancer = load_breast_cancer()
X, y = cancer.data, cancer.target

# データの分割と標準化
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ロジスティック回帰モデルの訓練
model = LogisticRegression(random_state=42, max_iter=1000)
model.fit(X_train_scaled, y_train)

# 予測
y_pred = model.predict(X_test_scaled)
y_pred_proba = model.predict_proba(X_test_scaled)[:, 1]

# F1-scoreの計算
f1_cancer = f1_score(y_test, y_pred)
precision_cancer = precision_score(y_test, y_pred)
recall_cancer = recall_score(y_test, y_pred)
accuracy_cancer = accuracy_score(y_test, y_pred)

print("=== 乳がんデータセットでのF1-score ===")
print(f"データセットの情報:")
print(f"  サンプル数: {len(y_test)}")
print(f"  クラス分布: {np.bincount(y_test)}")
print(f"  正例の割合: {np.mean(y_test):.2%}")

print(f"\n評価指標:")
print(f"  Accuracy: {accuracy_cancer:.4f}")
print(f"  Precision: {precision_cancer:.4f}")
print(f"  Recall: {recall_cancer:.4f}")
print(f"  F1-score: {f1_cancer:.4f}")

# 異なる閾値でのF1-score
def evaluate_f1_thresholds(y_true, y_pred_proba, thresholds):
    """
    異なる閾値でのF1-scoreを計算
    """
    results = []
    
    for threshold in thresholds:
        y_pred_thresh = (y_pred_proba >= threshold).astype(int)
        
        f1 = f1_score(y_true, y_pred_thresh)
        precision = precision_score(y_true, y_pred_thresh)
        recall = recall_score(y_true, y_pred_thresh)
        
        results.append({
            'threshold': threshold,
            'f1': f1,
            'precision': precision,
            'recall': recall
        })
    
    return results

# 異なる閾値での評価
thresholds_f1 = np.arange(0.1, 1.0, 0.05)
results_f1 = evaluate_f1_thresholds(y_test, y_pred_proba, thresholds_f1)

# 結果の可視化
df_f1 = pd.DataFrame(results_f1)

plt.figure(figsize=(15, 5))

# F1-score vs Threshold
plt.subplot(1, 3, 1)
plt.plot(df_f1['threshold'], df_f1['f1'], 'o-', color='green', linewidth=2, label='F1-score')
plt.xlabel('Threshold')
plt.ylabel('F1-score')
plt.title('F1-score vs Threshold')
plt.legend()
plt.grid(True, alpha=0.3)

# Precision vs Threshold
plt.subplot(1, 3, 2)
plt.plot(df_f1['threshold'], df_f1['precision'], 'o-', color='blue', linewidth=2, label='Precision')
plt.xlabel('Threshold')
plt.ylabel('Precision')
plt.title('Precision vs Threshold')
plt.legend()
plt.grid(True, alpha=0.3)

# Recall vs Threshold
plt.subplot(1, 3, 3)
plt.plot(df_f1['threshold'], df_f1['recall'], 'o-', color='red', linewidth=2, label='Recall')
plt.xlabel('Threshold')
plt.ylabel('Recall')
plt.title('Recall vs Threshold')
plt.legend()
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# 最適なF1-score
best_f1_idx = df_f1['f1'].idxmax()
best_threshold = df_f1.loc[best_f1_idx, 'threshold']
best_f1 = df_f1.loc[best_f1_idx, 'f1']

print(f"\n=== 最適なF1-score ===")
print(f"最適なF1-score: {best_f1:.4f} (閾値: {best_threshold:.2f})")
print(f"その時のPrecision: {df_f1.loc[best_f1_idx, 'precision']:.4f}")
print(f"その時のRecall: {df_f1.loc[best_f1_idx, 'recall']:.4f}")

# 分類レポート
print(f"\n=== 詳細な分類レポート ===")
print(classification_report(y_test, y_pred, target_names=cancer.target_names))


## 3. 多クラス分類でのF1-score


In [None]:
# 多クラス分類用データセット
X_multi, y_multi = make_classification(n_samples=1000, n_features=4, n_redundant=0, n_informative=4, n_clusters_per_class=1, n_classes=3, random_state=42)
X_train_multi, X_test_multi, y_train_multi, y_test_multi = train_test_split(X_multi, y_multi, test_size=0.2, random_state=42, stratify=y_multi)

scaler_multi = StandardScaler()
X_train_multi_scaled = scaler_multi.fit_transform(X_train_multi)
X_test_multi_scaled = scaler_multi.transform(X_test_multi)

# 多クラス分類モデル
model_multi = LogisticRegression(multi_class='multinomial', random_state=42, max_iter=1000)
model_multi.fit(X_train_multi_scaled, y_train_multi)
y_pred_multi = model_multi.predict(X_test_multi_scaled)

# 異なる平均化方法でのF1-score
f1_macro = f1_score(y_test_multi, y_pred_multi, average='macro')
f1_micro = f1_score(y_test_multi, y_pred_multi, average='micro')
f1_weighted = f1_score(y_test_multi, y_pred_multi, average='weighted')

print("=== 多クラス分類でのF1-score ===")
print(f"Macro F1-score: {f1_macro:.4f}")
print(f"Micro F1-score: {f1_micro:.4f}")
print(f"Weighted F1-score: {f1_weighted:.4f}")
print(f"\n分類レポート:")
print(classification_report(y_test_multi, y_pred_multi))


## まとめ

**学習した内容**：
- F1-scoreの理論と実装
- F-beta scoreの理解
- 実データでのF1-score計算
- 多クラス分類でのF1-score

**重要なポイント**：
- F1-scoreはPrecisionとRecallの調和平均
- バランスの取れた評価指標
- 多クラス分類では平均化方法が重要

**次のステップ**：
- ROC曲線とAUCの学習
- より高度な評価手法
