In [None]:
import glob
import os

import pandas as pd

CASESTUDIES = ["wellbeing", "achievement", "delinquency", "ptsd"]
MODELS = {
    "gpt-5-mini": "gpt-5-mini",
    "phi-4": "Phi-4",
    "microsoft_phi-4": "Phi-4",
    "Qwen_Qwen3-30B": "Qwen-3-30B",
    "Qwen_Qwen3-30B-A3B-Instruct-2507": "Qwen-3-30B",
    "Qwen_Qwen3-Next-80B-A3B-Instruct": "Qwen-Next-80B",
    "meta-llama_Llama-3.3-70B-Instruct": "Llama-3.3-70B",
    "mistralai_Magistral-Small-2509": "Magistral-Small",
}
QUESTION_SET = [0, 4]
HUMAN_FILES = {
    "wellbeing": "./human_labels/wellbeing.csv",
    "achievement": "./human_labels/achievement.csv",
    "delinquency": "./human_labels/delinquency.csv",
    "ptsd": "./human_labels/ptsd.csv",
}

In [None]:
files = glob.glob("./outputs/*.csv")
# Container for all dataframes
dfs = []

for file in files:
    df = pd.read_csv(file)
    filename = os.path.basename(file).replace(".csv", "")

    # Extract question_set (last number in filename)
    question_set = int(filename.split("_")[-1])

    # Extract casestudy (second-to-last element)
    casestudy = next((cs for cs in CASESTUDIES if cs in filename.lower()), "unknown")

    # Extract model by matching keys in MODELS
    model_key = None
    for key in MODELS.keys():
        if key.lower() in filename.lower():
            model_key = key
            break
    model = MODELS.get(model_key, "unknown")

    # Add as new columns
    df["model"] = model
    df["casestudy"] = casestudy
    df["question_set"] = question_set

    dfs.append(df)


human_dfs = []
for casestudy, path in HUMAN_FILES.items():
    df = pd.read_csv(path, sep=";")

    # Melt columns 0–18 into long format
    df_long = df.melt(id_vars=["paper_id"], var_name="question_id", value_name="answer")

    # Add metadata columns
    df_long["casestudy"] = casestudy
    df_long["model"] = "human"
    df_long["question_set"] = 0 if casestudy == "ptsd" else 3
    df_long["reasoning"] = "-"
    df_long["evidence"] = "-"

    # Ensure correct column order and types
    df_long["question_id"] = df_long["question_id"].astype(int)
    df_long = df_long[
        [
            "casestudy",
            "model",
            "question_set",
            "paper_id",
            "question_id",
            "reasoning",
            "evidence",
            "answer",
        ]
    ]

    human_dfs.append(df_long)

# Combine all human annotations
human_combined = pd.concat(human_dfs, ignore_index=True)

# Combine all
combined_df = pd.concat(dfs, ignore_index=True)

# Reorder columns
column_order = [
    "casestudy",
    "model",
    "question_set",
    "paper_id",
    "question_id",
    "reasoning",
    "evidence",
    "answer",
]
combined_df = combined_df[column_order]

# Merge with your existing model results
final_df = pd.concat([combined_df, human_combined], ignore_index=True)

# Optional: save to Excel
final_df.to_excel("./combined.xlsx", index=False)