In [None]:
import json

input_file = "/llama-8b/scores.json"
output_file = "/8b_ranking.json"

LABELS = ["entailment", "neutral", "contradiction"]
LABEL_SHORT = {"entailment": "e", "neutral": "n", "contradiction": "c"}

def compute_rank_by_amount(count_dict, use_short=True):
    counts = {}
    if use_short:
        for label in ["e", "n", "c"]:
            counts[label] = count_dict.get(label, 0) or 0
    else:
        for label in LABELS:
            key = LABEL_SHORT[label]
            value = count_dict.get(label, 0)
            counts[key] = value if value is not None else 0

    sorted_items = sorted(counts.items(), key=lambda x: (-x[1], x[0]))

    ranks = {}
    current_rank = 1
    previous_count = None

    for i, (label, count) in enumerate(sorted_items):
        if i == 0:
            rank = current_rank
        else:
            if count == previous_count:
                rank = current_rank
            else:
                current_rank += 1
                rank = current_rank
        ranks[label] = rank
        previous_count = count

    return ranks

data = []
with open(input_file, "r", encoding="utf-8") as f:
    for i, line in enumerate(f, start=1):
        line = line.strip()
        if not line:
            continue
        item = json.loads(line)

        r1_counts = item.get("label_count_round_1", {})
        r2_counts = item.get("label_count_round_2", {})
        # chaosnli_counts = item.get("chaosnli_labels", {})

        item["ranking_round_1"] = compute_rank_by_amount(r1_counts)
        item["ranking_round_2"] = compute_rank_by_amount(r2_counts)
        # item["ranking_chaosnli"] = compute_rank_by_amount(chaosnli_counts, use_short=True)

        data.append(item)

with open(output_file, "w", encoding="utf-8") as f:
    for item in data:
        f.write(json.dumps(item, ensure_ascii=False) + "\n")


In [None]:
## llm vs. chaosnli

import json
import pandas as pd
from scipy.stats import kendalltau

file1 = "../gpt_ranking.json"
file2 = "../varierr_ranking.json"

LABEL_MAP = {
    "entailment": "e",
    "contradiction": "c",
    "neutral": "n",
    "e": "e",
    "n": "n",
    "c": "c"
}

def load_llm_rankings(filepath):
    data = {}
    with open(filepath, "r", encoding="utf-8") as f:
        for line in f:
            item = json.loads(line)
            id_ = item.get("id")
            if not id_:
                continue
            r1 = {LABEL_MAP.get(k, k): v for k, v in item.get("ranking_round_1", {}).items()}
            # r2 = {LABEL_MAP.get(k, k): v for k, v in item.get("ranking_round_2", {}).items()}
            # data[id_] = {"round_1": r1, "round_2": r2}
            data[id_] = {"round_1": r1}
    return data

def load_chaosnli_rankings(filepath):
    data = {}
    with open(filepath, "r", encoding="utf-8") as f:
        for line in f:
            item = json.loads(line)
            id_ = item.get("id")
            if not id_:
                continue
            chaos_rank = {LABEL_MAP.get(k, k): v for k, v in item.get("ranking_round_1", {}).items()}
            data[id_] = chaos_rank
    return data

def compute_tau(vec1, vec2):
    if len(vec1) != len(vec2) or len(vec1) < 2:
        return None
    tau, _ = kendalltau(vec1, vec2)
    return tau

llm_rankings = load_llm_rankings(file1)
chaosnli_rankings = load_chaosnli_rankings(file2)

results = []

for id_ in sorted(set(llm_rankings) & set(chaosnli_rankings)):
    chaos = chaosnli_rankings[id_]
    # for round_name in ["round_1", "round_2"]:
    for round_name in ["round_1"]:
        llm_rank = llm_rankings[id_].get(round_name, {})

        desired_order = ['e', 'n', 'c']
        common_keys = [k for k in desired_order if k in llm_rank and k in chaos]
        if len(common_keys) < 2:
            continue

        llm_vec = [llm_rank[k] for k in common_keys]
        chaos_vec = [chaos[k] for k in common_keys]

        tau = compute_tau(llm_vec, chaos_vec)

        results.append({
            "id": id_,
            "round": round_name,
            "labels": ",".join(common_keys),
            "llm_ranks": llm_vec,
            "chaosnli_ranks": chaos_vec,
            "kendall_tau": tau
        })
df = pd.DataFrame(results)
df.to_csv("kendall_tau_gpt_vs_varierr_before.csv", index=False)

print("Avg Kendall’s Tau:", df["kendall_tau"].mean())
