In [None]:
## add id to original label guided dataset

import json
from tqdm import tqdm
from pathlib import Path

file_a_path = "../VariErr-Label-Guided-longest.json"
file_b_path = "../varierr.json"
output_path = "../VariErr-Label-Guided-longest-with-ID.json"

with open(file_b_path, "r") as f:
    full_dataset = [json.loads(line) for line in f]
    pair_to_id = {
        (sample["context"].strip(), sample["statement"].strip()): sample["id"]
        for sample in full_dataset
    }

print(f"We have {len(pair_to_id)} pairs.")

with open(file_a_path, "r") as f:
    data = [json.loads(line) for line in f]

with open(output_path, "w") as f_out:
    for sample in tqdm(data):
        premise = sample["premise"].strip()
        hypothesis = sample["hypothesis"].strip()
        key = (premise, hypothesis)

        if key in pair_to_id:
            sample["id"] = pair_to_id[key]
        else:
            print(f"Not found: premise='{premise}...', hypothesis='{hypothesis}...'")
            sample["id"] = None

        f_out.write(json.dumps(sample, ensure_ascii=False) + "\n")

print("Done.")


In [None]:
# integrate explanations generated by LLMs to a singel file

import json, re
from pathlib import Path
from tqdm import tqdm

explanation_root = Path("../gpt_4.1_generation_raw")
input_jsonl = Path("../varierr.json")
output_jsonl = Path("../gpt_4.1_explanation_raw.jsonl")

# suffix = ".txt"

def clean_explanation(text: str) -> str:
    return re.sub(r"^\s*(?:[\d]+[\.\)]|[-•*]|[a-zA-Z][\.\)]|\(\w+\))\s+", "", text).strip()

label_map = {"E": "e", "N": "n", "C": "c"}

with open(input_jsonl, "r", encoding="utf-8") as f:
    instances = [json.loads(line) for line in f]

with open(output_jsonl, "w", encoding="utf-8") as fout:
    for instance in tqdm(instances, desc="Inject explanations"):
        sample_id = str(instance["id"])
        subfolder = explanation_root / sample_id
        new_comments = []

        if not subfolder.exists():
            print(f"missing folder: {subfolder}")
        else:
            for label in ["E", "N", "C"]:
                tried_files = [
                    f"{label}_third.txt"
                    # f"{label}_second.txt",
                    # f"{label}_first.txt",
                    # label,
                ]
        
                file_found = False
                for fname in tried_files:
                    file_path = subfolder / f"{fname}"
                    if file_path.exists():
                        with open(file_path, "r", encoding="utf-8") as f:
                            explanations = [
                                clean_explanation(line)
                                for line in f
                                if line.strip()
                            ]
                        new_comments.extend([[exp, label_map[label]] for exp in explanations])
                        file_found = True
                        break

                if not file_found:
                    print(f"No file found for {label} in {subfolder}")
        new_instance = {
            "id": instance["id"],
            "premise": instance["context"],
            "hypothesis": instance["statement"],
            "generated_explanations": new_comments
        }

        fout.write(json.dumps(new_instance, ensure_ascii=False) + "\n")

print("Done.")


In [None]:
# count number

import os
import re
import pandas as pd

ROOT_FOLDER = "../llama_elaborate_generation_raw"

def count_generations(file_path):
    with open(file_path, 'r', encoding='utf-8') as f:
        return sum(1 for line in f if line.strip())

def count_all_generations():
    records = []
    total = {"E": 0, "N": 0, "C": 0}

    for subfolder in os.listdir(ROOT_FOLDER):
        sub_path = os.path.join(ROOT_FOLDER, subfolder)
        if not os.path.isdir(sub_path):
            continue

        row = {"folder": subfolder}
        for label in ["E", "N", "C"]:
            file_path = os.path.join(sub_path, f"{label}_third.txt")
            if os.path.isfile(file_path):
                count = count_generations(file_path)
            else:
                count = 0
            row[label] = count
            total[label] += count

        records.append(row)

    df = pd.DataFrame(records)
    df.loc["TOTAL"] = ["TOTAL"] + [total["E"], total["N"], total["C"]]
    # print(df)

    print(f"]E={total['E']}，N={total['N']}，C={total['C']}，total: {total['E'] + total['N'] + total['C']}")

count_all_generations()


In [None]:
# get avg score for instance

import json
from collections import defaultdict

with open('../scores.json', 'r') as f:
    data = json.load(f)

groups = defaultdict(list)
for key, value in data.items():
    try:
        id_label, _ = key.rsplit('-', 1)
        groups[id_label].append(value)
    except ValueError:
        print(f"'{key}' does not match.")
        continue

averaged_data = {k: sum(v) / len(v) for k, v in groups.items()}
with open('../avg_llama3.1_scores.jsonn', 'w') as f:
    json.dump(averaged_data, f, indent=2)


In [None]:
## Thresholding for ChaosNLI
import json

def process_label_distribution(label_probs, threshold=0.2):
    valid_indices = [i for i, p in enumerate(label_probs) if p >= threshold]
    count = len(valid_indices)
    if count == 0:
        return [0.0, 0.0, 0.0]
    return [1.0 / count if i in valid_indices else 0.0 for i in range(3)]

input_file = '../dev_cleaned.json'
output_file = '../dev_cleaned_20.json'

with open(input_file, 'r', encoding='utf-8') as fin, open(output_file, 'w', encoding='utf-8') as fout:
    for line in fin:
        item = json.loads(line)
        raw_label = item['label']
        new_label = process_label_distribution(raw_label)
        item['label'] = new_label
        fout.write(json.dumps(item, ensure_ascii=False) + '\n')
