## Compare the Models (with Unsean/Test data)


In [1]:
# -----------------------------------------
# ✅ Mount Google Drive & move to repo
# -----------------------------------------
from google.colab import drive
drive.mount('/content/drive')

# Root Directory of the project
%cd /content/drive/MyDrive/TalentSprint/Project/MMRCS/image-captioning

# -----------------------------------------
# ✅ Install your package (editable mode)
# -----------------------------------------
#!pip install -e .

# -----------------------------------------
# ✅ Import & run your compare function
# -----------------------------------------
import sys
sys.path.append(".")  # ensure current repo in sys.path




Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/MyDrive/TalentSprint/Project/MMRCS/image-captioning


##  Compare Models: Training Data
Uses the metadata file created during training

1.   Scratch CNN + LSTM : CNN built from scratch (no pre-trained features), + LSTM decoder
2.   Resnet (pretrained) + LSTM : ResNet50 fully frozen (feature extractor only) + LSTM decoder
3.   Resnet (finetuned) + LSTM : ResNet50 fine-tuned (layer3 & layer4) with Dropout+BatchNorm head + LSTM decoder
4. Resnet (finetune 2) + LSTM : ResNet50 fine-tuned (layer 2,3 & 4) with Dropout+BatchNorm head + LSTM decoder
5. Resnet (finetune 2) + Attnetion + LSTM





**Models trained using flickr8k dataset**

the output files saved in the /output/flickr8k folder
the models files, vocab file saved in the /artifacts/flickr8k folder

1.   the nest model saved in the /artifacts/flickr8k folder
2.   these model.pth can be used for evaluation, comparison between the models, image captioning, etc
3.   metadata saved during the training includes training loss, validation loss, time for each epoch, best epoach, no of epochs, etc
4. Outputs inlcuding png. csv. metadata saved in /outputs/flickr8k




Select the models & define dataset
Uses the metadata file created during training

In [2]:
import os
import sys
from pathlib import Path
import kagglehub

from imgcapgen.utils.common_utils import prepare_unseen_data

# add the models here
from imgcapgen.models import (
    ScratchCNN_LSTM,
    ResNet_LSTM,
    ResNetFineTune_LSTM,
    ResNetFineTune2_LSTM,
    ResNetFineTune2_Attention_LSTM
)

# Define available models
available_models = {
    "ScratchCNN+LSTM": ("scratchcnn_lstm", ScratchCNN_LSTM),
    "ResNet+LSTM": ("resnet_lstm", ResNet_LSTM),
    "ResNetFineTune+LSTM": ("resnetfinetune_lstm", ResNetFineTune_LSTM),
    "ResNetFineTune2+LSTM": ("resnetfinetune2_lstm", ResNetFineTune2_LSTM),
    "ResNetFineTune2+Attention+LSTM": ("resnetfinetune2_attention3_lstm", ResNetFineTune2_Attention_LSTM),
    "ResNetFineTune2+Attention(LRS)+LSTM": ("resnetfinetune2_attention4_lstm", ResNetFineTune2_Attention_LSTM),
}

trained_dataset = "flickr8k"
config_file = "config/config_flickr8k.yaml"
num_samples_to_plot=5

# unseen data (download from kaggle)
data_source = "kaggle"
dataset_url = "srinivasac/flickr30k-validation-set-100-images"
unseen_dataset = "flickr30k_validation_set_100_images"
image_path = "Images"
captions_path = "captions.csv"

dataset_source = str(dataset_url).replace("\\", "/")
download_path = kagglehub.dataset_download(dataset_source, force_download=True)
print(f"[INFO] Kaggle dataset downloaded to: {download_path}")
download_path = Path(download_path)
image_dir_path = download_path / unseen_dataset / image_path
captions_csv_path = download_path / unseen_dataset/ captions_path

print("\n✅ Paths configured:")
print(f"📂 Unseen dataset path:    {download_path}")
print(f"📂 Image path:    {image_dir_path}")
print(f"📝 Captions path: {captions_csv_path}")


# Create your test dataset (unseen data with ~100 or 500 images)
# [(image_path, [list of ground truth captions]), ...]

unseen_data = prepare_unseen_data(captions_csv_path, image_dir_path)
print("\nUnseen Data: ")
print(unseen_data)
print(f"Unseen Data Size: {len(unseen_data)}")


[INFO] Kaggle dataset downloaded to: /kaggle/input/flickr30k-validation-set-100-images

✅ Paths configured:
📂 Unseen dataset path:    /kaggle/input/flickr30k-validation-set-100-images
📂 Image path:    /kaggle/input/flickr30k-validation-set-100-images/flickr30k_validation_set_100_images/Images
📝 Captions path: /kaggle/input/flickr30k-validation-set-100-images/flickr30k_validation_set_100_images/captions.csv
✅ Prepared unseen_data with 100 images

Unseen Data: 
[('/kaggle/input/flickr30k-validation-set-100-images/flickr30k_validation_set_100_images/Images/4567734402.jpg', ['A man and a woman are about to kiss .', 'A woman and a man getting closer .', 'A couple that is about to kiss .', 'Two young people are kissing .', 'A man in a blue and red shirt hugs a woman in a blue top .']), ('/kaggle/input/flickr30k-validation-set-100-images/flickr30k_validation_set_100_images/Images/2137071442.jpg', ['A boy receives a light saber for Christmas .', 'Young boy holding a light saber in front of a C

## 1. Compare Models: Models Trained with flickr8k dataset & validation on unsean data created using flickr30k (100 images)


In [8]:
from imgcapgen.scripts.evaluate_models_on_unseen import evaluate_models_on_unseen

evaluate_models_on_unseen(config_file, available_models, unseen_data, trained_dataset, num_samples_to_plot)

Output hidden; open in https://colab.research.google.com to view.

## 2. Compare Models: Models Trained with flickr30k dataset & validation on unsean data created using flickr8k

In [None]:
# dataset used in training
selected_dataset = "flickr30k"
# Compare the models with the training data

from imgcapgen.scripts.compare_models import compare_models_across_metrics

print(f"\n🚀 Comparison of Models (Training Data)...\n")
compare_models_across_metrics(selected_dataset, available_models, root_dir=".")