# Model Evaluation

---

## Setup


In [None]:
# Move up to project root directory (parent directory) for module imports
import os

os.chdir("../")

# Current working directory should now be project root
print("Current working directory:", os.getcwd())

In [None]:
# Imports

import json
import tempfile

import matplotlib.pyplot as plt
import torch
from transformers import set_seed

from src.dataset import CocoDataset
from src.eval import compute_caption_metrics, evaluate_captions, generate_and_evaluate
from src.models import ImageCaptioningModel, TransformerMappingNetwork
from src.visualize import create_captioning_dataset
from src.utils import load_gpt2_tokenizer

In [None]:
# Device
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
DEVICE

In [None]:
# Seed
SEED = 42
set_seed(SEED)  # Helper function that sets the seed in all relevant libraries

In [None]:
MAX_CAPTION_LENGTH = 50
DATA_DIR = "coco_data/"
EMBEDDINGS_PATH = DATA_DIR + "embeddings/"
ANNOTATIONS_PATH = DATA_DIR + "annotations/"
CHECKPOINTS_PATH = "checkpoints/"

# Tokenizer
gpt2_tokenizer = load_gpt2_tokenizer()

# Validation Dataset
val_dataset = CocoDataset(
    embeddings_path=EMBEDDINGS_PATH + "val_clip_embeddings.pt",
    annotations_path=ANNOTATIONS_PATH + "captions_val2017.json",
    tokenizer=gpt2_tokenizer,
    max_length=MAX_CAPTION_LENGTH,
    normalize_embeddings=False,
)

# Note: No test dataset as test annotations are not publicly available

---

## Prepare Model


In [None]:
# Load Trained Model

# Transformer Mapping Network Params
EMBED_DIM = 512  # Embedding dimension
GPT_DIM = 768  # GPT-2 embedding dimension
PREFIX_LENGTH = 40
HIDDEN_LENGTH = 40

# Image Captioning Model Params
FREEZE_GPT_WEIGHTS = True  # Whether to fine-tune GPT-2 alongside the mapping network
PREFIX_TASK_PROMPT: str | None = None

# Mapping Network
mapping_network = TransformerMappingNetwork(
    embed_dim=EMBED_DIM,
    gpt_dim=GPT_DIM,
    prefix_length=PREFIX_LENGTH,
    hidden_length=HIDDEN_LENGTH,
)

# Image Captioning Model
model = ImageCaptioningModel(
    mapping_network=mapping_network,
    prefix_task_prompt=PREFIX_TASK_PROMPT,
    tokenizer=gpt2_tokenizer,
    freeze_gpt_weights=FREEZE_GPT_WEIGHTS,
).to(DEVICE)

# Load checkpoint
model.load_saved_parameters(CHECKPOINTS_PATH + "base_model_epoch_1.pth")

# View model architecture
print(model)

---

## Validate Model


In [None]:
# Validation Params
EVAL_EVERY_EPOCH = 1
EVAL_BATCH_SIZE = 64
EVAL_MAX_CAPTION_LENGTH = MAX_CAPTION_LENGTH
EVAL_TEMPERATURE = 1.0
EVAL_TOP_P = 0.9

In [None]:
predictions, metrics = generate_and_evaluate(
    model=model,
    dataset=val_dataset,
    annotations_path=ANNOTATIONS_PATH + "captions_val2017.json",
    batch_size=EVAL_BATCH_SIZE,
    num_workers=0,
    max_length=EVAL_MAX_CAPTION_LENGTH,
    temperature=EVAL_TEMPERATURE,
    top_p=EVAL_TOP_P,
    device=DEVICE,
)

In [None]:
predictions, metrics