In [1]:
import json
import os

path_prefix = "/home/jyc/prefix-learning/analysis/records/vqav2"
data = {
    "vqav2": {
        "path": [
            os.path.join(path_prefix, p)
            for p in [
                "icl-32-shot.json",
                "licv.json",
                "lora-r-16.json",
                "mimic.json",
            ]
        ],
        "data": {},
    }
}
for name, item in data.items():
    for p in item["path"]:
        raw_file = json.load(open(p))
        if p != os.path.join(path_prefix, "icl-32-shot.json"):
            data[name]["data"][p] = raw_file["records"]
        else:
            data[name]["data"][p] = raw_file

In [24]:
import re


def post_process(text):
    text = re.split(r"Question|\n|Short|Answer", text, 1)[0]
    return re.sub(r"[^\w\s]", "", text).strip().lower()


def collect_yes_no_hallucination(record):
    yes_no_but_other = []
    other_but_yes_no = []
    if isinstance(record, dict):
        record = record.values()
    for item in record:
        answer_type = item["answer_type"]
        pred = post_process(item["prediction"])
        meta_info = {
            "answer_type": answer_type,
            "prediction": item["prediction"],
            "question_id": item["question_id"],
            "question": item["question"] if "question" in item else "",
            # "answer": item["answer"],
        }

        if answer_type != "yes/no" and (pred == "yes" or pred == "no"):
            other_but_yes_no.append(meta_info)
        elif answer_type == "yes/no" and (pred != "yes" and pred != "no"):
            yes_no_but_other.append(meta_info)

    return yes_no_but_other, other_but_yes_no


def collect_number_hallucination(record):
    other_but_number = []
    numer_but_other = []
    if isinstance(record, dict):
        record = record.values()
    manual_mapping = {
        "none": "0",
        "zero": "0",
        "one": "1",
        "two": "2",
        "three": "3",
        "four": "4",
        "five": "5",
        "six": "6",
        "seven": "7",
        "eight": "8",
        "nine": "9",
        "ten": "10",
    }
    for item in record:
        answer_type = item["answer_type"]
        pred = post_process(item["prediction"])
        meta_info = {
            "answer_type": answer_type,
            "prediction": item["prediction"],
            "question_id": item["question_id"],
            "question": item["question"],
            # "answer": item["answer"],
        }
        if answer_type != "number" and (pred.isdigit() or pred in manual_mapping):
            other_but_number.append(meta_info)
        elif answer_type == "number" and (
            (not pred.isdigit()) and pred not in manual_mapping
        ):
            other_but_number.append(meta_info)

    return numer_but_other, other_but_number


for name, item in data.items():
    for p in item["path"]:
        print(os.path.basename(p))
        yes_no_but_other, other_but_yes_no = collect_yes_no_hallucination(
            item["data"][p]
        )
        print(yes_no_but_other)
        print(other_but_yes_no)
        print(len(yes_no_but_other), len(other_but_yes_no))

icl-32-shot.json
[{'answer_type': 'yes/no', 'prediction': 'turtles', 'question_id': 152499003, 'question': ''}, {'answer_type': 'yes/no', 'prediction': 'blender', 'question_id': 210108002, 'question': ''}, {'answer_type': 'yes/no', 'prediction': 'old Question', 'question_id': 248019008, 'question': ''}, {'answer_type': 'yes/no', 'prediction': 'crumbs', 'question_id': 417023010, 'question': ''}, {'answer_type': 'yes/no', 'prediction': 'white Question', 'question_id': 479440017, 'question': ''}]
[{'answer_type': 'other', 'prediction': 'no Question', 'question_id': 19047002, 'question': ''}, {'answer_type': 'other', 'prediction': 'no Question', 'question_id': 36557000, 'question': ''}, {'answer_type': 'other', 'prediction': 'no Question', 'question_id': 38321004, 'question': ''}, {'answer_type': 'number', 'prediction': 'yes Question', 'question_id': 38685000, 'question': ''}, {'answer_type': 'other', 'prediction': 'no Question', 'question_id': 50124014, 'question': ''}, {'answer_type': 'n