# 5.5 Leave-One-Study-Out Summary

**任务**: 汇总 5.0 - 5.4 的实验结果，对比不同模型架构在 Zero-shot 和 LOO 设置下的泛化性能。

In [None]:
import os
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.filterwarnings("ignore")

In [None]:
MODELS = {
    "Z-Score": "LOO_vCross_zscore",
    "Baseline": "LOO_vCross_baseline",
    "OnlyValue": "LOO_vCross_onlyValue",
    "Value+Cond": "LOO_vCross_value_condition",
    "Base (Full)": "LOO_vCross_base"
}

STUDIES = [
    'PRJNA439311', 'PRJNA282010', 'PRJNA296567', 
    'PRJNA632472', 'PRJNA1108737', 'PRJNA820972'
]

ROOT_DIR = "../models"

In [None]:
results = []

for model_name, folder in MODELS.items():
    for study in STUDIES:
        # 1. Zero-shot
        zs_path = os.path.join(ROOT_DIR, folder, "zero_shot", study, "metrics.json")
        if os.path.exists(zs_path):
            with open(zs_path, "r") as f:
                metrics = json.load(f)
                results.append({
                    "Model": model_name,
                    "Study": study,
                    "Setting": "Zero-shot",
                    "AUC": metrics.get("auc", np.nan),
                    "F1": metrics.get("f1_weighted", np.nan),
                    "Acc": metrics.get("accuracy", np.nan)
                })
        
        # 2. LOO Fine-tuned
        loo_path = os.path.join(ROOT_DIR, folder, "finetuned", study, "metrics.json")
        if os.path.exists(loo_path):
            with open(loo_path, "r") as f:
                metrics = json.load(f)
                results.append({
                    "Model": model_name,
                    "Study": study,
                    "Setting": "LOO Fine-tuning",
                    "AUC": metrics.get("auc", np.nan),
                    "F1": metrics.get("f1_weighted", np.nan),
                    "Acc": metrics.get("accuracy", np.nan)
                })

df = pd.DataFrame(results)
print(f"Loaded {len(df)} records.")
df.head()

In [None]:
def plot_comparison(metric="AUC"):
    plt.figure(figsize=(14, 6))
    
    plt.subplot(1, 2, 1)
    sns.boxplot(data=df[df["Setting"]=="Zero-shot"], x="Model", y=metric, palette="Set2")
    plt.title(f"Zero-shot Performance ({metric})")
    plt.xticks(rotation=45)
    plt.grid(axis='y', linestyle='--', alpha=0.7)
    
    plt.subplot(1, 2, 2)
    sns.boxplot(data=df[df["Setting"]=="LOO Fine-tuning"], x="Model", y=metric, palette="Set2")
    plt.title(f"LOO Fine-tuning Performance ({metric})")
    plt.xticks(rotation=45)
    plt.grid(axis='y', linestyle='--', alpha=0.7)
    
    plt.tight_layout()
    plt.show()

plot_comparison("AUC")
plot_comparison("F1")

In [None]:
print("=== Average Performance ===")
summary = df.groupby(["Setting", "Model"])[["AUC", "F1", "Acc"]].mean().round(4)
summary