In [1]:
import pandas as pd
from tqdm import tqdm
import json

# ===== File paths =====
folder_path = "/content/drive/MyDrive/phi3mini"
files = {
    "Flux-Dev": "fluxdev_meta_entities.csv",
    "sd_2": "sd2_meta_entities.csv",
    "sdxl": "sdxl_meta_entities.csv"
}
baseline_file = "mscoco_captions_entities.csv"

# ===== Load baseline =====
baseline_df = pd.read_csv(f"{folder_path}/{baseline_file}")
baseline_df["mscoco_caption_entities"] = baseline_df["mscoco_caption_entities"].apply(
    lambda x: eval(x) if pd.notna(x) else []
)

# ===== Function to compute MHalDetect for one model =====
def compute_mhal(model_file, model_name):
    df = pd.read_csv(f"{folder_path}/{model_file}")
    df["Meta Caption_entities"] = df["Meta Caption_entities"].apply(
        lambda x: eval(x) if pd.notna(x) else []
    )

    # Merge baseline and model by image_name
    merged = pd.merge(
        baseline_df, df, on="image_name", how="inner", suffixes=("_baseline", "_model")
    )

    mhal_scores = []

    for _, row in tqdm(merged.iterrows(), total=len(merged), desc=f"Computing MHalDetect for {model_name}"):
        baseline_entities = set(row["mscoco_caption_entities"])
        model_entities = set(row["Meta Caption_entities"])

        if len(model_entities) == 0:
            score = 0.0
        else:
            # Hallucinated entities = model entities NOT in baseline
            hallucinated = model_entities - baseline_entities
            score = len(hallucinated) / len(model_entities)

        mhal_scores.append(score)

    avg_mhal = sum(mhal_scores) / len(mhal_scores) if mhal_scores else 0.0
    print(f"{model_name} → Average MHalDetect: {avg_mhal:.4f}")
    return avg_mhal

# ===== Compute for all models =====
results = {}
for model_name, model_file in files.items():
    results[model_name] = compute_mhal(model_file, model_name)

# ===== Save to JSON =====
out_path = f"{folder_path}/MHalDetect_scores.json"
with open(out_path, "w") as f:
    json.dump(results, f, indent=4)

print(f"MHalDetect scores saved to {out_path}")


Computing MHalDetect for Flux-Dev: 100%|██████████| 200/200 [00:00<00:00, 8132.59it/s]


Flux-Dev → Average MHalDetect: 0.9572


Computing MHalDetect for sd_2: 100%|██████████| 195/195 [00:00<00:00, 14802.35it/s]


sd_2 → Average MHalDetect: 0.9659


Computing MHalDetect for sdxl: 100%|██████████| 200/200 [00:00<00:00, 16555.05it/s]

sdxl → Average MHalDetect: 0.9856
MHalDetect scores saved to /content/drive/MyDrive/phi3mini/MHalDetect_scores.json



