In [None]:
# remove LLM-detected errors from original VariErr

import json

varierr_file = '/Users/phoebeeeee/ongoing/LLM_AED/dataset/varierr/varierr.json'
model_file = '/Users/phoebeeeee/ongoing/LLM_AED/new_processing/validation_result/all/llama_8b_all/threshold/with_validation_0.8.jsonl'
output_file = '/Users/phoebeeeee/ongoing/LLM_AED/dataset/varierr/varierr_original.json'

with open(varierr_file, 'r', encoding='utf-8') as f:
    varierr_data = {json.loads(line)['id']: json.loads(line) for line in f}

with open(model_file, 'r', encoding='utf-8') as f:
    model_data = {json.loads(line)['id']: json.loads(line) for line in f}
label_map = {'e': 'entailment', 'n': 'neutral', 'c': 'contradiction'}

merged = []

for uid, var_entry in varierr_data.items():
    model_entry = model_data.get(uid, {})

    var_entry.pop('entailment', None)
    var_entry.pop('contradiction', None)
    var_entry.pop('neutral', None)
    var_entry.pop('idk', None)

    if 'error' in model_entry:
        error_raw = model_entry['error']
        error_mapped = [label_map.get(lbl, lbl) for lbl in error_raw]
        var_entry['error_llm'] = error_mapped
    if 'not_validated_exp' in model_entry:
        var_entry['not_validated_exp_llm'] = model_entry['not_validated_exp']

    original_labels = set(var_entry.get('label_set_round_1', []))
    error_labels = set(var_entry.get('error_llm', []))
    label_set_llm = sorted(original_labels - error_labels)

    var_entry['label_set_llm'] = label_set_llm
    merged.append(var_entry)

with open(output_file, 'w', encoding='utf-8') as out:
    for item in merged:
        json.dump(item, out, ensure_ascii=False)
        out.write('\n')

print(f"Done.")


Done.


In [18]:
import json
from collections import Counter

input_path = "/Users/phoebeeeee/ongoing/LLM_AED/new_processing/validation_result/all/llama_8b_all/without_llm_error/varierr_without_0.8.json"
output_path = "/Users/phoebeeeee/ongoing/LLM_AED/new_processing/validation_result/all/llama_8b_all/without_llm_error/varierr_without_0.8_dist.json"

def convert_label_list_to_dist(label_list):
    counter = Counter(label_list or [])
    e = counter.get("entailment", 0)
    n = counter.get("neutral", 0)
    c = counter.get("contradiction", 0)

    total = e + n + c
    if total == 0:
        return [0.0, 0.0, 0.0]
    return [e / total, n / total, c / total] 

with open(input_path, "r") as infile, \
     open(output_path, "w") as outfile:

    for line in infile:
        item = json.loads(line)

        label_list = item.get("label_set_llm")
        if label_list:
            item["label"] = convert_label_list_to_dist(label_list)
        else:
            item["label"] = [0.0, 0.0, 0.0]
        json.dump(item, outfile)
        outfile.write("\n")

print("Done")


Done


In [19]:
## clean

import json
from pathlib import Path
from tqdm import tqdm

def normalize_label_dist(chaos_dict):
   
    label_order = ["entailment", "neutral", "contradiction"]
    values = [(chaos_dict.get(k) or 0.0) for k in label_order] 
    total = sum(values)
    if total == 0:
        return [0.0] * 3
    return [v / total for v in values]


def process_file(input_path, output_path):
    with open(input_path, "r", encoding="utf-8") as fin, open(output_path, "w", encoding="utf-8") as fout:
        for line in tqdm(fin, desc=f"Processing {input_path}"):
            raw = json.loads(line)
            out = {
                "uid": raw.get("id", raw.get("uid")),
                "premise": raw.get("context"),
                "hypothesis": raw.get("statement"),
                "label": raw.get("label")
            }
            fout.write(json.dumps(out, ensure_ascii=False) + "\n")

if __name__ == "__main__":
    input_path =  "/Users/phoebeeeee/ongoing/LLM_AED/new_processing/validation_result/all/llama_8b_all/without_llm_error/varierr_without_0.8_dist.json"
    output_path = "/Users/phoebeeeee/ongoing/LLM_AED/new_processing/validation_result/all/llama_8b_all/without_llm_error/varierr_without_0.8_cleaned.json"
    process_file(input_path, output_path)


Processing /Users/phoebeeeee/ongoing/LLM_AED/new_processing/validation_result/all/llama_8b_all/without_llm_error/varierr_without_0.8_dist.json: 500it [00:00, 102480.06it/s]


In [21]:
import json
from collections import Counter
from pathlib import Path
from tqdm import tqdm

# 固定路径
VARIERR_FILE = "/Users/phoebeeeee/ongoing/LLM_AED/dataset/varierr/varierr.json"
MODEL_DIR = "/Users/phoebeeeee/ongoing/LLM_AED/new_processing/validation_result/all/llama_8b_all/threshold"
OUT_DIR = "/Users/phoebeeeee/ongoing/LLM_AED/new_processing/validation_result/all/llama_8b_all/threshold/without_llm_error"

THRESHOLDS = [f"{x/10:.1f}" for x in range(1, 10)]  # 0.1 ~ 0.9

label_map_short2long = {'e': 'entailment', 'n': 'neutral', 'c': 'contradiction'}
label_order = ["entailment", "neutral", "contradiction"]

def load_jsonl_to_dict_by_id(path: Path):
    data = {}
    with path.open("r", encoding="utf-8") as f:
        for line in f:
            if not line.strip():
                continue
            obj = json.loads(line)
            data[obj["id"]] = obj
    return data

def convert_label_list_to_dist(label_list):
    counter = Counter(label_list or [])
    e = counter.get("entailment", 0)
    n = counter.get("neutral", 0)
    c = counter.get("contradiction", 0)
    total = e + n + c
    if total == 0:
        return [0.0, 0.0, 0.0]
    return [e / total, n / total, c / total]

def make_clean_record(raw):
    return {
        "uid": raw.get("id", raw.get("uid")),
        "premise": raw.get("context"),
        "hypothesis": raw.get("statement"),
        "label": raw.get("label"),
    }

def process_one_threshold(thr_str: str):
    model_file = Path(MODEL_DIR) / f"with_validation_{thr_str}.jsonl"
    out_file_cleaned = Path(OUT_DIR) / f"varierr_without_{thr_str}_cleaned.json"

    if not model_file.exists():
        print(f"[WARN] Model file not found for threshold {thr_str}: {model_file}")
        return

    # 1) 读取 varierr 与 model
    print(f"\n==> Processing threshold {thr_str}")
    varierr_path = Path(VARIERR_FILE)
    model_path = Path(model_file)
    out_file_cleaned.parent.mkdir(parents=True, exist_ok=True)

    varierr_data = load_jsonl_to_dict_by_id(varierr_path)
    model_data = load_jsonl_to_dict_by_id(model_path)

    merged_cleaned = []

    # 2) 合并 & 去除模型检测到的错误
    for uid, var_entry in varierr_data.items():
        # 浅拷贝，避免就地修改原字典带来副作用（也可以深拷）
        var_entry = dict(var_entry)

        model_entry = model_data.get(uid, {})

        var_entry.pop('entailment', None)
        var_entry.pop('contradiction', None)
        var_entry.pop('neutral', None)
        var_entry.pop('idk', None)

        # 附带模型的错误标签（短 -> 长）
        if 'error' in model_entry:
            error_raw = model_entry['error'] or []
            error_mapped = [label_map_short2long.get(lbl, lbl) for lbl in error_raw]
            var_entry['error_llm'] = error_mapped

        # 附带未验证解释（可选）
        if 'not_validated_exp' in model_entry:
            var_entry['not_validated_exp_llm'] = model_entry.get('not_validated_exp', {})

        # 计算剔除错误后的标签集合
        original_labels = set(var_entry.get('label_set_round_1', []))
        error_labels = set(var_entry.get('error_llm', []))
        label_set_llm = sorted(original_labels - error_labels)
        var_entry['label_set_llm'] = label_set_llm

        # 3) 将剩余标签转成分布
        var_entry["label"] = convert_label_list_to_dist(label_set_llm)

        # 4) 清洗成最简记录
        cleaned = make_clean_record(var_entry)
        merged_cleaned.append(cleaned)

    # 写 cleaned 输出
    with out_file_cleaned.open("w", encoding="utf-8") as fout:
        for item in merged_cleaned:
            fout.write(json.dumps(item, ensure_ascii=False) + "\n")

    print(f"[OK] Saved cleaned: {out_file_cleaned}")

def main():
    for thr in THRESHOLDS:
        process_one_threshold(thr)

if __name__ == "__main__":
    main()



==> Processing threshold 0.1
[OK] Saved cleaned: /Users/phoebeeeee/ongoing/LLM_AED/new_processing/validation_result/all/llama_8b_all/threshold/without_llm_error/varierr_without_0.1_cleaned.json

==> Processing threshold 0.2
[OK] Saved cleaned: /Users/phoebeeeee/ongoing/LLM_AED/new_processing/validation_result/all/llama_8b_all/threshold/without_llm_error/varierr_without_0.2_cleaned.json

==> Processing threshold 0.3
[OK] Saved cleaned: /Users/phoebeeeee/ongoing/LLM_AED/new_processing/validation_result/all/llama_8b_all/threshold/without_llm_error/varierr_without_0.3_cleaned.json

==> Processing threshold 0.4
[OK] Saved cleaned: /Users/phoebeeeee/ongoing/LLM_AED/new_processing/validation_result/all/llama_8b_all/threshold/without_llm_error/varierr_without_0.4_cleaned.json

==> Processing threshold 0.5
[OK] Saved cleaned: /Users/phoebeeeee/ongoing/LLM_AED/new_processing/validation_result/all/llama_8b_all/threshold/without_llm_error/varierr_without_0.5_cleaned.json

==> Processing threshold