In [None]:
import json
import re
import ast
import pandas as pd

def parse_raw_json(raw):
    # Handle empty input
    if not raw or pd.isna(raw):
        return {"answer": None, "confidence": None, "rationale": None}

    raw = str(raw)

    # Handle dictionary-like strings using AST
    if raw.strip().startswith("{'raw'") or raw.strip().startswith('{"raw"'):
        try:
            raw_dict = ast.literal_eval(raw)
            raw = raw_dict.get("raw", "")
        except Exception as e:
            print("AST eval failed:", e)
            return {"answer": None, "confidence": None, "rationale": None}

    # Normalize newlines and tabs
    raw = raw.replace("\n", " ").replace("\t", " ")

    # Replace double double-quotes with single double-quote
    raw = raw.replace('""', '"')

    # Remove ```json or ``` markers
    raw = re.sub(r"```(?:json)?", "", raw).strip()

    # Remove any remaining line breaks
    raw = " ".join(raw.splitlines())

    # Try to extract JSON-like content
    match = re.search(r"\{.*\}", raw, re.DOTALL)
    if not match:
        return {"answer": None, "confidence": None, "rationale": None}

    json_str = match.group(0)

    # Escape backslashes
    json_str = json_str.replace('\\', '\\\\')

    # Count quotes inside JSON string
    num_quotes = json_str.count('"')
    if num_quotes % 2 != 0:
        # If odd, add a closing quote at the end
        json_str += '"'

    try:
        data = json.loads(json_str)
        return {
            "answer": data.get("answer"),
            "confidence": data.get("confidence"),
            "rationale": data.get("rationale"),
        }
    except json.JSONDecodeError as e:
        print("JSON decode failed:", e)
        return {"answer": None, "confidence": None, "rationale": None}


In [63]:
import pandas as pd

models = ["llava-v1.6-mistral-7b-hf", "Qwen2.5-VL-7B-Instruct"]
datasets = ["cvr", "bp", "marsvqa", "raven"]
ver = "ver1"
strategies = ["classification", "direct", "contrastive", "descriptive"]
results = ["results"]

for dataset in datasets:
    for strategy in strategies:
        for model in models:
            for result in results:
                print(f"Processing: {dataset} | {strategy} | {model} | {ver}")
                df = pd.read_csv(
                    f"../results/{dataset}/{strategy}/{model}/{ver}/{result}.csv",
                    dtype={"problem_id": str}, 
                )

                df["problem_id"] = df["problem_id"].str.strip()

                mask = df["answer"].isna() | df["confidence"].isna() | df["rationale"].isna() | (df["answer"] == '')

                parsed = df.loc[mask, "raw_response"].apply(parse_raw_json)

                df.loc[mask, "answer"] = parsed.apply(lambda x: x["answer"])
                df.loc[mask, "confidence"] = parsed.apply(lambda x: x["confidence"])
                df.loc[mask, "rationale"] = parsed.apply(lambda x: x["rationale"])

                df.to_csv(f"../results/{dataset}/{strategy}/{model}/{ver}/{result}.csv", index=False)


Processing: cvr | classification | llava-v1.6-mistral-7b-hf | ver1
Processing: cvr | classification | Qwen2.5-VL-7B-Instruct | ver1
Processing: cvr | direct | llava-v1.6-mistral-7b-hf | ver1
Processing: cvr | direct | Qwen2.5-VL-7B-Instruct | ver1
Processing: cvr | contrastive | llava-v1.6-mistral-7b-hf | ver1
Processing: cvr | contrastive | Qwen2.5-VL-7B-Instruct | ver1
Processing: cvr | descriptive | llava-v1.6-mistral-7b-hf | ver1
Processing: cvr | descriptive | Qwen2.5-VL-7B-Instruct | ver1
Processing: bp | classification | llava-v1.6-mistral-7b-hf | ver1
Processing: bp | classification | Qwen2.5-VL-7B-Instruct | ver1
Processing: bp | direct | llava-v1.6-mistral-7b-hf | ver1
Processing: bp | direct | Qwen2.5-VL-7B-Instruct | ver1
Processing: bp | contrastive | llava-v1.6-mistral-7b-hf | ver1
Processing: bp | contrastive | Qwen2.5-VL-7B-Instruct | ver1
Processing: bp | descriptive | llava-v1.6-mistral-7b-hf | ver1
Processing: bp | descriptive | Qwen2.5-VL-7B-Instruct | ver1
Processi

  df.loc[mask, "confidence"] = parsed.apply(lambda x: x["confidence"])
  df.loc[mask, "confidence"] = parsed.apply(lambda x: x["confidence"])
  df.loc[mask, "confidence"] = parsed.apply(lambda x: x["confidence"])
  df.loc[mask, "confidence"] = parsed.apply(lambda x: x["confidence"])
  df.loc[mask, "confidence"] = parsed.apply(lambda x: x["confidence"])
  df.loc[mask, "confidence"] = parsed.apply(lambda x: x["confidence"])
  df.loc[mask, "confidence"] = parsed.apply(lambda x: x["confidence"])
  df.loc[mask, "confidence"] = parsed.apply(lambda x: x["confidence"])
  df.loc[mask, "confidence"] = parsed.apply(lambda x: x["confidence"])
  df.loc[mask, "confidence"] = parsed.apply(lambda x: x["confidence"])
  df.loc[mask, "confidence"] = parsed.apply(lambda x: x["confidence"])
  df.loc[mask, "confidence"] = parsed.apply(lambda x: x["confidence"])
  df.loc[mask, "confidence"] = parsed.apply(lambda x: x["confidence"])
  df.loc[mask, "confidence"] = parsed.apply(lambda x: x["confidence"])
  df.l