## check numbers in label-count-round

In [None]:
import json
from collections import defaultdict

input_path = "../varierr_with_peer.json"

def sum_label_frequencies(input_file):
    labels = ["entailment", "neutral", "contradiction"]
    total_freq = defaultdict(float)

    with open(input_file, 'r', encoding='utf-8') as fin:
        for line in fin:
            data = json.loads(line.strip())
            label_count_round_1 = data.get("label_count_round_3", {})

            for label in labels:
                freq = label_count_round_1.get(label, 0.0)
                if freq is None:
                    freq = 0.0
                total_freq[label] += freq


    print("label_count_round_3 ：")
    for label in labels:
        print(f"{label:<15}: {total_freq[label]}")


sum_label_frequencies(input_path)


## chech numbers in label-count-round, but aggregated

In [None]:
import json
from collections import defaultdict

input_path = "../varierr_with_peer.json"

def count_label_occurrences(input_file):
    labels = ["entailment", "neutral", "contradiction"]
    label_counter = defaultdict(int)

    with open(input_file, 'r', encoding='utf-8') as fin:
        for line in fin:
            data = json.loads(line.strip())
            label_count_round_1 = data.get("label_count_round_3", {})

            for label in labels:
                freq = label_count_round_1.get(label)
                if freq is not None:
                    label_counter[label] += 1

    print("label_count_round_1 aggredated：")
    for label in labels:
        print(f"{label:<15}: {label_counter[label]}")

count_label_occurrences(input_path)


## chech numbers in label-set-round (should be the same as previous block)

In [None]:
import json
from collections import defaultdict

input_path = "../varierr_with_peer.json"

def count_label_occurrences(input_file):
    labels = ["entailment", "neutral", "contradiction"]
    label_counter = defaultdict(int)

    with open(input_file, 'r', encoding='utf-8') as fin:
        for line in fin:
            data = json.loads(line.strip())
            label_set = data.get("label_set_round_3", [])

            for label in label_set:
                if label in labels:
                    label_counter[label] += 1


    print("label_set_round_3 ：")
    for label in labels:
        print(f"{label:<15}: {label_counter[label]}")

count_label_occurrences(input_path)


## check error numbers

In [None]:
import json
from collections import defaultdict

input_path = "../varierr_with_peer_validated.json"

def count_error_peer_labels(input_file):
    labels = ["entailment", "neutral", "contradiction"]
    error_peer_counter = defaultdict(int)

    with open(input_file, "r", encoding="utf-8") as infile:
        for line in infile:
            data = json.loads(line)
            error_peer_labels = data.get("error_labels", [])

            for label in error_peer_labels:
                if label in labels:
                    error_peer_counter[label] += 1

    print("number of labels in error_peer:")
    for label in labels:
        print(f"{label:<15}: {error_peer_counter[label]}")

count_error_peer_labels(input_path)


In [None]:
import json
from collections import defaultdict

input_path = "../varierr.json"
labels = ["entailment", "neutral", "contradiction"]

# repeated
peer_validated_repeated = defaultdict(int)

# aggregated
peer_validated_aggregated = defaultdict(set)

with open(input_path, "r", encoding="utf-8") as infile:
    for line in infile:
        data = json.loads(line)
        data_id = data.get("id", "unknown")

        for label in labels:
            explanations = data.get(label, [])
            label_has_peer_validated = False

            for entry in explanations:
                if not isinstance(entry, dict):
                    continue
                ak = entry.get("annotator")
                peer_yes_votes = 0

                for j in entry.get("judgments", []):
                    if not isinstance(j, dict):
                        continue
                    if j.get("annotator") != ak and j.get("makes_sense") is True:
                        peer_yes_votes += 1

                if peer_yes_votes >= 2:
                    peer_validated_repeated[label] += 1
                    label_has_peer_validated = True

            if label_has_peer_validated:
                peer_validated_aggregated[label].add(data_id)

print("Peer-validated — repeated:")
for label in labels:
    print(f"  {label:<15}: {peer_validated_repeated[label]} explanations")

print("Peer-validated — aggregated:")
for label in labels:
    print(f"  {label:<15}: {len(peer_validated_aggregated[label])} labels")
