In [4]:
import os
import json
import numpy as np

# 현재 디렉토리 내 모든 JSON 파일 필터링
input_dir = "./"
input_files = [
    os.path.join(input_dir, f) for f in os.listdir(input_dir)
    if f.endswith(".json") and os.path.isfile(os.path.join(input_dir, f))
]

# 결과 저장
for file_path in input_files:
    try:
        with open(file_path, "r", encoding="utf-8") as f:
            data = json.load(f)
        
        var_scores_label_0 = [item["var_score"] for item in data if item.get("label") == 0 and "var_score" in item]
        var_scores_label_1 = [item["var_score"] for item in data if item.get("label") == 1 and "var_score" in item]
        var_scores_all = [item["var_score"] for item in data if "var_score" in item]

        avg_0 = np.mean(var_scores_label_0) if var_scores_label_0 else None
        avg_1 = np.mean(var_scores_label_1) if var_scores_label_1 else None
        avg_all = np.mean(var_scores_all) if var_scores_all else None

        print(f"📄 {os.path.basename(file_path)}")
        print(f"  ▶ label=0 평균 var_score: {avg_0}")
        print(f"  ▶ label=1 평균 var_score: {avg_1}")
        print(f"  ▶ 전체 평균 var_score: {avg_all}")
        print()

    except Exception as e:
        print(f"⚠️ 파일 처리 오류: {file_path} → {e}")


📄 fizz_original_xsumfaith_e_min.json
  ▶ label=0 평균 var_score: 0.06225821029217383
  ▶ label=1 평균 var_score: 0.05218711764295149
  ▶ 전체 평균 var_score: 0.06123498727901284

📄 fizz_original_frank_e-c_mean.json
  ▶ label=0 평균 var_score: 0.11732147984667203
  ▶ label=1 평균 var_score: 0.025648303362380432
  ▶ 전체 평균 var_score: 0.08685483550092384

📄 fizz_original_xsumfaith_e-c_min.json
  ▶ label=0 평균 var_score: 0.12257852405360056
  ▶ label=1 평균 var_score: 0.08150942682583785
  ▶ 전체 평균 var_score: 0.11840590377525986

📄 fizz_original_frank_e_mean.json
  ▶ label=0 평균 var_score: 0.05685804429996666
  ▶ label=1 평균 var_score: 0.014599536615264645
  ▶ 전체 평균 var_score: 0.04281386067300907

📄 fizz_original_factcc_e_min.json
  ▶ label=0 평균 var_score: 0.03674570167984871
  ▶ label=1 평균 var_score: 0.005752452769549995
  ▶ 전체 평균 var_score: 0.010146769478636386

📄 fizz_original_factcc_e-c_min.json
  ▶ label=0 평균 var_score: 0.08148113981927804
  ▶ label=1 평균 var_score: 0.009330928816630276
  ▶ 전체 평균 var_sco

In [1]:
import os
import json
import numpy as np
from collections import defaultdict

# 현재 디렉토리 내 모든 JSON 파일 필터링
input_dir = "./"
input_files = [
    os.path.join(input_dir, f) for f in os.listdir(input_dir)
    if f.endswith(".json") and os.path.isfile(os.path.join(input_dir, f))
]

# 결과 저장
for file_path in input_files:
    try:
        with open(file_path, "r", encoding="utf-8") as f:
            data = json.load(f)

        # length_of_summary에 따라 그룹핑
        grouped = defaultdict(lambda: {"label_0": [], "label_1": [], "all": []})

        for item in data:
            if "var_score" not in item or "length_of_summary" not in item:
                continue
            length = item["length_of_summary"]
            var_score = item["var_score"]
            label = item.get("label", None)

            grouped[length]["all"].append(var_score)
            if label == 0:
                grouped[length]["label_0"].append(var_score)
            elif label == 1:
                grouped[length]["label_1"].append(var_score)

        # 출력
        print(f"📄 {os.path.basename(file_path)}")
        for length, group in sorted(grouped.items()):
            avg_0 = np.mean(group["label_0"]) if group["label_0"] else None
            avg_1 = np.mean(group["label_1"]) if group["label_1"] else None
            avg_all = np.mean(group["all"]) if group["all"] else None

            print(f"  ▶ length_of_summary = {length}")
            print(f"    - label=0 평균 var_score: {avg_0}")
            print(f"    - label=1 평균 var_score: {avg_1}")
            print(f"    - 전체 평균 var_score: {avg_all}")
        print()

    except Exception as e:
        print(f"⚠️ 파일 처리 오류: {file_path} → {e}")


📄 fizz_original_xsumfaith_e_min.json
  ▶ length_of_summary = 1
    - label=0 평균 var_score: 0.0
    - label=1 평균 var_score: 0.0
    - 전체 평균 var_score: 0.0
  ▶ length_of_summary = 2
    - label=0 평균 var_score: 0.045825338724132254
    - label=1 평균 var_score: 0.035422385186078306
    - 전체 평균 var_score: 0.044520308710397895
  ▶ length_of_summary = 3
    - label=0 평균 var_score: 0.06227295582412078
    - label=1 평균 var_score: 0.056129085876222916
    - 전체 평균 var_score: 0.06155993416672452
  ▶ length_of_summary = 4
    - label=0 평균 var_score: 0.0753788830961029
    - label=1 평균 var_score: 0.061771430817575704
    - 전체 평균 var_score: 0.07439088326069829
  ▶ length_of_summary = 5
    - label=0 평균 var_score: 0.0781822486053144
    - label=1 평균 var_score: 0.08265797857275706
    - 전체 평균 var_score: 0.07852987811734878
  ▶ length_of_summary = 6
    - label=0 평균 var_score: 0.09110048906118684
    - label=1 평균 var_score: None
    - 전체 평균 var_score: 0.09110048906118684
  ▶ length_of_summary = 7
    - l

In [2]:
import os
import json
import numpy as np
from collections import defaultdict

# 현재 디렉토리 내 모든 JSON 파일 필터링
input_dir = "./"
input_files = [
    os.path.join(input_dir, f) for f in os.listdir(input_dir)
    if f.endswith(".json") and os.path.isfile(os.path.join(input_dir, f))
]

# 결과 저장
for file_path in input_files:
    try:
        with open(file_path, "r", encoding="utf-8") as f:
            data = json.load(f)

        # length_of_summary에 따라 그룹핑
        grouped = defaultdict(lambda: {
            "label_0": [],
            "label_1": [],
            "all": [],
            "count_0": 0,
            "count_1": 0
        })

        for item in data:
            if "var_score" not in item or "length_of_summary" not in item:
                continue
            length = item["length_of_summary"]
            var_score = item["var_score"]
            label = item.get("label", None)

            grouped[length]["all"].append(var_score)
            if label == 0:
                grouped[length]["label_0"].append(var_score)
                grouped[length]["count_0"] += 1
            elif label == 1:
                grouped[length]["label_1"].append(var_score)
                grouped[length]["count_1"] += 1

        # 출력
        print(f"📄 {os.path.basename(file_path)}")
        for length, group in sorted(grouped.items()):
            total_count = group["count_0"] + group["count_1"]
            avg_0 = np.mean(group["label_0"]) if group["label_0"] else None
            avg_1 = np.mean(group["label_1"]) if group["label_1"] else None
            avg_all = np.mean(group["all"]) if group["all"] else None

            print(f"  ▶ length_of_summary = {length} (총 {total_count}개)")
            print(f"    - label=0: {group['count_0']}개, 평균 var_score: {avg_0}")
            print(f"    - label=1: {group['count_1']}개, 평균 var_score: {avg_1}")
            print(f"    - 전체 평균 var_score: {avg_all}")
        print()

    except Exception as e:
        print(f"⚠️ 파일 처리 오류: {file_path} → {e}")


📄 fizz_original_xsumfaith_e_min.json
  ▶ length_of_summary = 1 (총 30개)
    - label=0: 28개, 평균 var_score: 0.0
    - label=1: 2개, 평균 var_score: 0.0
    - 전체 평균 var_score: 0.0
  ▶ length_of_summary = 2 (총 279개)
    - label=0: 244개, 평균 var_score: 0.045825338724132254
    - label=1: 35개, 평균 var_score: 0.035422385186078306
    - 전체 평균 var_score: 0.044520308710397895
  ▶ length_of_summary = 3 (총 517개)
    - label=0: 457개, 평균 var_score: 0.06227295582412078
    - label=1: 60개, 평균 var_score: 0.056129085876222916
    - 전체 평균 var_score: 0.06155993416672452
  ▶ length_of_summary = 4 (총 303개)
    - label=0: 281개, 평균 var_score: 0.0753788830961029
    - label=1: 22개, 평균 var_score: 0.061771430817575704
    - 전체 평균 var_score: 0.07439088326069829
  ▶ length_of_summary = 5 (총 103개)
    - label=0: 95개, 평균 var_score: 0.0781822486053144
    - label=1: 8개, 평균 var_score: 0.08265797857275706
    - 전체 평균 var_score: 0.07852987811734878
  ▶ length_of_summary = 6 (총 15개)
    - label=0: 15개, 평균 var_score: 0.09110048

In [1]:
import os
import json
import numpy as np
from collections import defaultdict

# 현재 디렉토리 내 모든 JSON 파일 필터링
input_dir = "./"
input_files = [
    os.path.join(input_dir, f) for f in os.listdir(input_dir)
    if f.endswith(".json") and os.path.isfile(os.path.join(input_dir, f))
]

# 결과 저장
for file_path in input_files:
    try:
        with open(file_path, "r", encoding="utf-8") as f:
            data = json.load(f)

        # length_of_summary에 따라 그룹핑
        grouped = defaultdict(lambda: {
            "label_0_var": [],
            "label_1_var": [],
            "label_0_mean": [],
            "label_1_mean": [],
            "all_var": [],
            "all_mean": [],
            "count_0": 0,
            "count_1": 0
        })

        for item in data:
            if "var_score" not in item or "length_of_summary" not in item or "mean_score" not in item:
                continue
            length = item["length_of_summary"]
            var_score = item["var_score"]
            mean_score = item["mean_score"]
            label = item.get("label", None)

            grouped[length]["all_var"].append(var_score)
            grouped[length]["all_mean"].append(mean_score)
            if label == 0:
                grouped[length]["label_0_var"].append(var_score)
                grouped[length]["label_0_mean"].append(mean_score)
                grouped[length]["count_0"] += 1
            elif label == 1:
                grouped[length]["label_1_var"].append(var_score)
                grouped[length]["label_1_mean"].append(mean_score)
                grouped[length]["count_1"] += 1

        # 출력
        print(f"📄 {os.path.basename(file_path)}")
        for length, group in sorted(grouped.items()):
            total_count = group["count_0"] + group["count_1"]

            # var_score 평균
            avg_var_0 = np.mean(group["label_0_var"]) if group["label_0_var"] else None
            avg_var_1 = np.mean(group["label_1_var"]) if group["label_1_var"] else None
            avg_var_all = np.mean(group["all_var"]) if group["all_var"] else None

            # mean_score 평균
            avg_mean_0 = np.mean(group["label_0_mean"]) if group["label_0_mean"] else None
            avg_mean_1 = np.mean(group["label_1_mean"]) if group["label_1_mean"] else None
            avg_mean_all = np.mean(group["all_mean"]) if group["all_mean"] else None

            print(f"  ▶ length_of_summary = {length} (총 {total_count}개)")
            print(f"    - label=0: {group['count_0']}개")
            print(f"        · 평균 var_score: {avg_var_0}")
            print(f"        · 평균 mean_score: {avg_mean_0}")
            print(f"    - label=1: {group['count_1']}개")
            print(f"        · 평균 var_score: {avg_var_1}")
            print(f"        · 평균 mean_score: {avg_mean_1}")
            print(f"    - 전체 평균 var_score: {avg_var_all}")
            print(f"    - 전체 평균 mean_score: {avg_mean_all}")
        print()

    except Exception as e:
        print(f"⚠️ 파일 처리 오류: {file_path} → {e}")


📄 fizz_original_factcc_e-c_min.json
  ▶ length_of_summary = 1 (총 70개)
    - label=0: 18개
        · 평균 var_score: 0.0
        · 평균 mean_score: 0.2631594410114404
    - label=1: 52개
        · 평균 var_score: 0.0
        · 평균 mean_score: 0.9369321889602221
    - 전체 평균 var_score: 0.0
    - 전체 평균 mean_score: 0.7636763394876782
  ▶ length_of_summary = 2 (총 346개)
    - label=0: 57개
        · 평균 var_score: 0.10148429826699011
        · 평균 mean_score: 0.4803799584598828
    - label=1: 289개
        · 평균 var_score: 0.009712075532176503
        · 평균 mean_score: 0.9312817836568045
    - 전체 평균 var_score: 0.024830620896004168
    - 전체 평균 mean_score: 0.8570002691012423
  ▶ length_of_summary = 3 (총 336개)
    - label=0: 42개
        · 평균 var_score: 0.07934407841346378
        · 평균 mean_score: 0.5295425392174538
    - label=1: 294개
        · 평균 var_score: 0.00803740880105808
        · 평균 mean_score: 0.9367903402956732
    - 전체 평균 var_score: 0.016950742502608793
    - 전체 평균 mean_score: 0.8858843651608959
  ▶