In [3]:
import nltk

# Download necessary NLTK resources for METEOR
nltk.download('wordnet')
nltk.download('omw-1.4')  # WordNet multilingual data, sometimes needed


[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...


True

In [9]:
import os
import pandas as pd
import json
from nltk.translate.meteor_score import meteor_score
from tqdm import tqdm
import nltk

# Make sure NLTK wordnet is downloaded
nltk.download('wordnet')

# ===== Paths =====
root_csv = "/content/drive/MyDrive/csvs"
baseline_file = os.path.join(root_csv, "DrawBenchPrompts.csv")

# Generated captions (full paths)
generated_files = {
    "Flux-Dev": os.path.join(root_csv, "meta_captions_Flux-Dev.csv"),
    "SDXL": os.path.join(root_csv, "meta_captions_sdxl.csv"),
    "SD2": os.path.join(root_csv, "meta_captions_sd_2.csv")
}

# Output JSON file
output_json = os.path.join(root_csv, "meteor_average_scores.json")

# ===== Load baseline prompts =====
baseline_df = pd.read_csv(baseline_file)
baseline_prompts_dict = dict(zip(baseline_df["image_name"], baseline_df["Prompts"]))

# ===== Function to compute METEOR for one model =====
def compute_meteor(file_path):
    df = pd.read_csv(file_path)
    scores = []

    # Drop rows where either 'Prompts' or 'Meta Caption' is empty or NaN
    df = df.dropna(subset=["Prompts", "Meta Caption"])
    df = df[(df["Prompts"].str.strip() != "") & (df["Meta Caption"].str.strip() != "")]

    for idx, row in tqdm(df.iterrows(), total=len(df), desc=f"Calculating METEOR for {os.path.basename(file_path)}"):
        ref_prompt = str(row["Prompts"]).split()          # tokenize reference
        candidate_caption = str(row["Meta Caption"]).split()  # tokenize candidate

        # Compute METEOR
        score = meteor_score([ref_prompt], candidate_caption)
        scores.append(score)

    return scores

# ===== Compute for all models =====
average_scores = {}
for model_name, file_path in generated_files.items():
    scores = compute_meteor(file_path)
    avg = sum(scores) / len(scores) if scores else 0
    average_scores[model_name] = avg

# ===== Save averages to JSON =====
with open(output_json, "w") as f:
    json.dump(average_scores, f, indent=4)

print("Average METEOR scores saved to:", output_json)
print(average_scores)


[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
Calculating METEOR for meta_captions_Flux-Dev.csv: 100%|██████████| 170/170 [00:00<00:00, 520.32it/s]
Calculating METEOR for meta_captions_sdxl.csv: 100%|██████████| 200/200 [00:00<00:00, 246.98it/s]
Calculating METEOR for meta_captions_sd_2.csv: 100%|██████████| 200/200 [00:00<00:00, 272.05it/s]

Average METEOR scores saved to: /content/drive/MyDrive/csvs/meteor_average_scores.json
{'Flux-Dev': 0.18172076704307052, 'SDXL': 0.15261947975059628, 'SD2': 0.14240416027352182}



