In [2]:
import os
import json
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, balanced_accuracy_score

# 디렉토리 안의 모든 JSON 파일 가져오기
input_dir = "../results_coarse/cnndm_tune"
input_files = [
    os.path.join(input_dir, f) for f in os.listdir(input_dir)
    if f.endswith(".json") and os.path.isfile(os.path.join(input_dir, f))
]

print(input_files)

results = []

# 전략 1: Balanced Accuracy 기반 threshold (val score 중에서 선택)
def choose_threshold_by_bacc(scores, labels):
    best_thresh = 0.0
    best_bacc = 0.0

    for thresh in sorted(set(scores)):
        preds = [1 if s > thresh else 0 for s in scores]
        bacc = balanced_accuracy_score(labels, preds)
        if bacc > best_bacc:
            best_bacc = bacc
            best_thresh = thresh

    return best_thresh, best_bacc

# 전략 2: sqrt(TPR * (1 - FPR)) 기반 threshold (val score 중에서 선택)
def choose_threshold_by_sqrt_tpr(scores, labels):
    best_thresh = 0.0
    best_score = 0.0

    for thresh in sorted(set(scores)):
        preds = [1 if s > thresh else 0 for s in scores]
        tp = sum((p == 1 and l == 1) for p, l in zip(preds, labels))
        fp = sum((p == 1 and l == 0) for p, l in zip(preds, labels))
        fn = sum((p == 0 and l == 1) for p, l in zip(preds, labels))
        tn = sum((p == 0 and l == 0) for p, l in zip(preds, labels))

        tpr = tp / (tp + fn) if (tp + fn) > 0 else 0
        fpr = fp / (fp + tn) if (fp + tn) > 0 else 0

        score = np.sqrt(tpr * (1 - fpr))
        if score > best_score:
            best_score = score
            best_thresh = thresh

    return best_thresh, best_score

# 테스트 세트 평가
def evaluate_predictions(labels, preds):
    return {
        "accuracy": accuracy_score(labels, preds),
        "precision": precision_score(labels, preds, zero_division=0),
        "recall": recall_score(labels, preds, zero_division=0),
        "f1": f1_score(labels, preds, zero_division=0),
        "balanced_accuracy": balanced_accuracy_score(labels, preds)
    }

# 평가 함수
def evaluate_file(file_path):
    with open(file_path, "r", encoding="utf-8") as f:
        data = json.load(f)

    val_scores, val_labels = [], []
    test_scores, test_labels = [], []

    for entry in data:
        if entry.get("cut") == "val":
            val_scores.append(entry["score"])
            val_labels.append(entry["label"])
        elif entry.get("cut") == "test":
            test_scores.append(entry["score"])
            test_labels.append(entry["label"])

    if not val_scores or not test_scores:
        return []

    strategies = []

    # 전략 1: Balanced Accuracy
    thresh1, bacc1 = choose_threshold_by_bacc(val_scores, val_labels)
    preds1 = [1 if s > thresh1 else 0 for s in test_scores]
    eval1 = evaluate_predictions(test_labels, preds1)
    eval1.update({
        "filename": os.path.basename(file_path),
        "strategy": "Balanced Accuracy (val score 기반)",
        "threshold": thresh1,
        "val_score": bacc1
    })
    strategies.append(eval1)

    return strategies  # ✅ sqrt 전략 제거


# 모든 파일 평가
for file in input_files:
    result = evaluate_file(file)
    results.extend(result)

# balanced_accuracy 기준 내림차순 정렬
results = sorted(results, key=lambda x: x["balanced_accuracy"], reverse=True)

# 결과 저장
output_txt_path = "coarse_cnndm.txt"
with open(output_txt_path, "w", encoding="utf-8") as fout:
    for r in results:
        fout.write(f"File: {r['filename']}\n")
        fout.write(f"Strategy: {r['strategy']}\n")
        fout.write(f"  Threshold (from val set): {r['threshold']:.4f}\n")
        fout.write(f"  Validation Strategy Score: {r['val_score']:.4f}\n")
        fout.write(f"  Test Accuracy: {r['accuracy']:.4f}\n")
        fout.write(f"  Test Precision: {r['precision']:.4f}\n")
        fout.write(f"  Test Recall: {r['recall']:.4f}\n")
        fout.write(f"  Test F1 Score: {r['f1']:.4f}\n")
        fout.write(f"  Test Balanced Accuracy: {r['balanced_accuracy']:.4f}\n")
        fout.write("\n")

print(f"✅ 모든 평가 결과가 {output_txt_path}에 저장되었습니다.")


['../results_coarse/cnndm_tune/coarse_wr0p3_wb0p7_wc1_wm0p3_ww0p7.json', '../results_coarse/cnndm_tune/coarse_wr0p3_wb0p7_wc1_wm0p5_ww0p5.json', '../results_coarse/cnndm_tune/coarse_wr0p3_wb0p7_wc1_wm0p7_ww0p3.json', '../results_coarse/cnndm_tune/coarse_wr0p3_wb0p7_wc1_wm0_ww1.json', '../results_coarse/cnndm_tune/coarse_wr0p3_wb0p7_wc1_wm1_ww0.json', '../results_coarse/cnndm_tune/coarse_wr0p5_wb0p5_wc1_wm0p3_ww0p7.json', '../results_coarse/cnndm_tune/coarse_wr0p5_wb0p5_wc1_wm0p5_ww0p5.json', '../results_coarse/cnndm_tune/coarse_wr0p5_wb0p5_wc1_wm0_ww1.json', '../results_coarse/cnndm_tune/coarse_wr0p7_wb0p3_wc1_wm0p3_ww0p7.json', '../results_coarse/cnndm_tune/coarse_wr0p7_wb0p3_wc1_wm0p5_ww0p5.json', '../results_coarse/cnndm_tune/coarse_wr0p7_wb0p3_wc1_wm0p7_ww0p3.json', '../results_coarse/cnndm_tune/coarse_wr0p7_wb0p3_wc1_wm0_ww1.json', '../results_coarse/cnndm_tune/coarse_wr0p7_wb0p3_wc1_wm1_ww0.json', '../results_coarse/cnndm_tune/coarse_wr0_wb1_wc1_wm0p3_ww0p7.json', '../results_coa

In [1]:
import os
import json
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, balanced_accuracy_score

# 디렉토리 안의 모든 JSON 파일 가져오기
input_dir = "../results/eunk"
input_files = [
    os.path.join(input_dir, f) for f in os.listdir(input_dir)
    if f.endswith(".json") and os.path.isfile(os.path.join(input_dir, f))
]

print(input_files)

results = []

# Balanced Accuracy 기반 threshold (val score 중에서 선택)
def choose_threshold_by_bacc(scores, labels):
    best_thresh = 0.0
    best_bacc = 0.0
    for thresh in sorted(set(scores)):
        preds = [1 if s > thresh else 0 for s in scores]
        bacc = balanced_accuracy_score(labels, preds)
        if bacc > best_bacc:
            best_bacc = bacc
            best_thresh = thresh
    return best_thresh, best_bacc

# 테스트 세트 평가
def evaluate_predictions(labels, preds):
    return {
        "accuracy": accuracy_score(labels, preds),
        "precision": precision_score(labels, preds, zero_division=0),
        "recall": recall_score(labels, preds, zero_division=0),
        "f1": f1_score(labels, preds, zero_division=0),
        "balanced_accuracy": balanced_accuracy_score(labels, preds)
    }

# 평가 함수
def evaluate_file(file_path):
    try:
        with open(file_path, "r", encoding="utf-8") as f:
            data = json.load(f)
    except json.JSONDecodeError:
        print(f"❌ JSON Decode Error 발생: {file_path} 스킵합니다.")
        return []  # 오류 발생 시 결과 없음

    val_scores, val_labels = [], []
    test_scores, test_labels = [], []

    for entry in data:
        if entry.get("cut") == "val":
            val_scores.append(entry["score"])
            val_labels.append(entry["label"])
        elif entry.get("cut") == "test":
            test_scores.append(entry["score"])
            test_labels.append(entry["label"])

    if not val_scores or not test_scores:
        return []

    strategies = []

    # 전략: Balanced Accuracy
    thresh1, bacc1 = choose_threshold_by_bacc(val_scores, val_labels)
    preds1 = [1 if s > thresh1 else 0 for s in test_scores]
    eval1 = evaluate_predictions(test_labels, preds1)
    eval1.update({
        "filename": os.path.basename(file_path),
        "strategy": "Balanced Accuracy (val score 기반)",
        "threshold": thresh1,
        "val_score": bacc1
    })
    strategies.append(eval1)

    return strategies

# 모든 파일 평가
for file in input_files:
    result = evaluate_file(file)
    results.extend(result)

# balanced_accuracy 기준 내림차순 정렬
results = sorted(results, key=lambda x: x["balanced_accuracy"], reverse=True)

# 결과 저장
output_txt_path = "eunk.txt"
with open(output_txt_path, "w", encoding="utf-8") as fout:
    for r in results:
        fout.write(f"File: {r['filename']}\n")
        fout.write(f"Strategy: {r['strategy']}\n")
        fout.write(f"  Threshold (from val set): {r['threshold']:.4f}\n")
        fout.write(f"  Validation Strategy Score: {r['val_score']:.4f}\n")
        fout.write(f"  Test Accuracy: {r['accuracy']:.4f}\n")
        fout.write(f"  Test Precision: {r['precision']:.4f}\n")
        fout.write(f"  Test Recall: {r['recall']:.4f}\n")
        fout.write(f"  Test F1 Score: {r['f1']:.4f}\n")
        fout.write(f"  Test Balanced Accuracy: {r['balanced_accuracy']:.4f}\n")
        fout.write("\n")

print(f"✅ 모든 평가 결과가 {output_txt_path}에 저장되었습니다.")

['../results/eunk/original_xsum_infuse.json', '../results/eunk/original_cnndm_infuse.json', '../results/eunk/original_cnndm_infuse_ablated.json', '../results/eunk/original_xsum_infuse_ablated.json']
✅ 모든 평가 결과가 eunk.txt에 저장되었습니다.
