In [4]:
import pandas as pd
from tqdm import tqdm
import json

# ===== File paths =====
folder_path = "/content/drive/MyDrive/phi3mini"
files = {
    "Flux-Dev": "fluxdev_meta_entities.csv",
    "sd_2": "sd2_meta_entities.csv",
    "sdxl": "sdxl_meta_entities.csv"
}
baseline_file = "mscoco_captions_entities.csv"

# ===== Load baseline =====
baseline_df = pd.read_csv(f"{folder_path}/{baseline_file}")
baseline_df["mscoco_caption_entities"] = baseline_df["mscoco_caption_entities"].apply(
    lambda x: eval(x) if pd.notna(x) else []
)

# ===== Function to compute ALOHA for one model =====
def compute_aloha(model_file, model_name):
    df = pd.read_csv(f"{folder_path}/{model_file}")
    df["Meta Caption_entities"] = df["Meta Caption_entities"].apply(
        lambda x: eval(x) if pd.notna(x) else []
    )

    # Merge baseline and model by image_name
    merged = pd.merge(
        baseline_df, df, on="image_name", how="inner", suffixes=("_baseline", "_model")
    )

    aloha_scores = []

    for _, row in tqdm(merged.iterrows(), total=len(merged), desc=f"Computing ALOHA for {model_name}"):
        baseline_entities = set(row["mscoco_caption_entities"])
        model_entities = set(row["Meta Caption_entities"])

        if len(baseline_entities) == 0:
            score = 1.0 if len(model_entities) == 0 else 0.0
        else:
            score = len(baseline_entities & model_entities) / len(baseline_entities)

        aloha_scores.append(score)

    avg_aloha = sum(aloha_scores) / len(aloha_scores) if aloha_scores else 0.0
    print(f"{model_name} → Average ALOHA: {avg_aloha:.4f}")
    return avg_aloha

# ===== Compute for all models =====
results = {}
for model_name, model_file in files.items():
    results[model_name] = compute_aloha(model_file, model_name)

# ===== Save to JSON =====
out_path = f"{folder_path}/aloha_scores.json"
with open(out_path, "w") as f:
    json.dump(results, f, indent=4)

print(f"ALOHA scores saved to {out_path}")


Computing ALOHA for Flux-Dev: 100%|██████████| 200/200 [00:00<00:00, 9709.37it/s]


Flux-Dev → Average ALOHA: 0.1693


Computing ALOHA for sd_2: 100%|██████████| 195/195 [00:00<00:00, 9472.12it/s]


sd_2 → Average ALOHA: 0.1351


Computing ALOHA for sdxl: 100%|██████████| 200/200 [00:00<00:00, 4324.85it/s]

sdxl → Average ALOHA: 0.0513
ALOHA scores saved to /content/drive/MyDrive/phi3mini/aloha_scores.json



