In [None]:
import pandas as pd
from tqdm import tqdm
import json
from google.colab import drive

# ===== File paths =====
folder_path = "/content/drive/MyDrive/phi-3-mini"
files = {
    "Flux-Dev": "meta_captions_Flux-Dev_entities.csv",
    "sd_2": "meta_captions_sd_2_entities.csv",
    "sdxl": "meta_captions_sdxl_entities.csv"
}
baseline_file = "DrawBenchPrompts_entities.csv"

# ===== Load baseline =====
baseline_df = pd.read_csv(f"{folder_path}/{baseline_file}")
baseline_df["Prompts_entities"] = baseline_df["Prompts_entities"].apply(lambda x: eval(x) if pd.notna(x) else [])

# ===== Function to compute MHalDetect for one model =====
def compute_mhal(model_file, model_name):
    df = pd.read_csv(f"{folder_path}/{model_file}")
    df["Meta Caption_entities"] = df["Meta Caption_entities"].apply(lambda x: eval(x) if pd.notna(x) else [])

    mhal_scores = []
    for i, row in baseline_df.iterrows():
        baseline_entities = set(row["Prompts_entities"])
        model_entities = set(df.loc[i, "Meta Caption_entities"])

        if len(model_entities) == 0:
            score = 0.0
        else:
            # Hallucinated entities = model entities NOT in baseline
            hallucinated = model_entities - baseline_entities
            score = len(hallucinated) / len(model_entities)
        mhal_scores.append(score)

    avg_mhal = sum(mhal_scores) / len(mhal_scores)
    print(f"{model_name} → Average MHalDetect: {avg_mhal:.4f}")
    return avg_mhal

# ===== Compute for all models =====
results = {}
for model_name, model_file in files.items():
    results[model_name] = compute_mhal(model_file, model_name)

# ===== Save to JSON =====
out_path = f"{folder_path}/MHalDetect_scores.json"
with open(out_path, "w") as f:
    json.dump(results, f, indent=4)

print(f"MHalDetect scores saved to {out_path}")
