# Evaluate Generated Captions

In [None]:
!pip install evaluate

In [2]:
import json

def load_captions(filename, model):
    references = []
    predictions = []
    for i, line in enumerate(open(filename, "r").readlines()):
        caption_dict = json.loads(line)
        references.append([caption_dict["caption_0"]])
        references.append([caption_dict["caption_1"]])
        predictions.append(caption_dict[f"{model}_caption_0"])
        predictions.append(caption_dict[f"{model}_caption_1"])
    return references, predictions

In [4]:
path = "../results/captions/"

models = [
    (path + "ofa_tiny.jsonl", "OFA$_{Tiny}$", 20, 20, "single-stream", "merged"),
    (path + "ofa_base.jsonl", "OFA$_{Base}$", 20, 20, "single-stream", "merged"),
    (path + "ofa_medium.jsonl", "OFA$_{Medium}$", 20, 20, "single-stream", "merged"),
    (path + "ofa_large.jsonl", "OFA$_{Large}$", 20, 20, "single-stream", "merged"),
    (path + "ofa_huge.jsonl", "OFA$_{Huge}$", 20, 20, "single-stream", "merged"),
    (path + "blip_b16.jsonl", "BLIP (ViT-B/16)", 129, 130, "dual-stream", "modality-specific, merged"),
    (path + "blip_l16.jsonl", "BLIP (ViT-L/16)", 129, 130, "dual-stream", "modality-specific"),
]

In [5]:
from collections import defaultdict
import evaluate

bleu = evaluate.load("bleu")
scores = defaultdict(dict)

for model in models:
    name = "blip" if "blip" in model[0] else "ofa"
    references, predictions = load_captions(model[0], name)
    for i in range(1, 5):
        results = bleu.compute(predictions=predictions, references=references, max_order=i)
        scores[model[1]][f"bleu-{i}"] = round(results["bleu"]*100, 2)

In [6]:
import pandas as pd

scores_df = pd.DataFrame(scores).transpose()
scores_df

Unnamed: 0,bleu-1,bleu-2,bleu-3,bleu-4
OFA$_{Tiny}$,14.4,5.76,2.5,1.3
OFA$_{Base}$,16.68,7.12,3.26,1.58
OFA$_{Medium}$,16.28,6.47,2.84,1.39
OFA$_{Large}$,15.1,6.45,3.03,1.53
OFA$_{Huge}$,15.73,6.94,3.06,1.35
BLIP (ViT-B/16),17.8,8.1,3.96,2.01
BLIP (ViT-L/16),17.96,8.31,4.36,2.5


In [7]:
scores_df.to_latex()

'\\begin{tabular}{lrrrr}\n\\toprule\n{} &  bleu-1 &  bleu-2 &  bleu-3 &  bleu-4 \\\\\n\\midrule\nOFA\\$\\_\\{Tiny\\}\\$    &   14.40 &    5.76 &    2.50 &    1.30 \\\\\nOFA\\$\\_\\{Base\\}\\$    &   16.68 &    7.12 &    3.26 &    1.58 \\\\\nOFA\\$\\_\\{Medium\\}\\$  &   16.28 &    6.47 &    2.84 &    1.39 \\\\\nOFA\\$\\_\\{Large\\}\\$   &   15.10 &    6.45 &    3.03 &    1.53 \\\\\nOFA\\$\\_\\{Huge\\}\\$    &   15.73 &    6.94 &    3.06 &    1.35 \\\\\nBLIP (ViT-B/16) &   17.80 &    8.10 &    3.96 &    2.01 \\\\\nBLIP (ViT-L/16) &   17.96 &    8.31 &    4.36 &    2.50 \\\\\n\\bottomrule\n\\end{tabular}\n'