## Run  train.py

Mount the google drive

In [None]:
# -----------------------------------------
# âœ… Mount Google Drive & move to repo
# -----------------------------------------
from google.colab import drive
drive.mount('/content/drive')

# Root Directory of the project
%cd /content/drive/MyDrive/TalentSprint/Project/MMRCS/image-captioning

# -----------------------------------------
# âœ… Install your package (editable mode)
# -----------------------------------------
#!pip install -e .


Mounted at /content/drive
/content/drive/MyDrive/TalentSprint/Project/MMRCS/image-captioning


# Training the PyTorch models:

1.   Scratch CNN + LSTM : CNN built from scratch (no pre-trained features), + LSTM decoder
2.   Resnet (pretrained) + LSTM : ResNet50 fully frozen (feature extractor only) + LSTM decoder
3.   Resnet (finetuned) + LSTM : ResNet50 fine-tuned (layer3 & layer4) with Dropout+BatchNorm head + LSTM decoder
4. Resnet (Fine Tune 2) + LSTM : ResNet50 fine-tuned (layer2,3 & 4) with Dropout+BatchNorm head + LSTM decoder
5. ResNET (Fine Tune 2) + Attention + LSTM decoder





Train the model to create the model and save it in Artifacts Folder

*   train the model to create the model and save it in Artifacts Folder (model.pth: best model & last epach )
*   These files will be used for Image Captiioning

All outputs (png, csv, metatdata) saved in /outputs folder



In [None]:
# Run this to train the model
#!python scripts/train.py
from imgcapgen.scripts.train import run_training_pipeline
# -----------------------------------------
# âœ… Choose dataset & model
# -----------------------------------------
available_configs = {
    "flickr8k": "config/config_flickr8k.yaml",
    "flickr30k": "config/config_flickr30k.yaml",
}

available_models = {
    "ScratchCNN_LSTM": "scratchcnn_lstm",
    "Resnet_LSTM": "resnet_lstm",
    "ResnetFinetune_LSTM": "resnetfinetune_lstm",
    "ResnetFinetune2_LSTM": "resnetfinetune2_lstm",
    "ResnetFinetune2_Attention_LSTM": "resnetfinetune2_attention_lstm",
    "ResnetFinetune2_Attention2_LSTM": "resnetfinetune2_attention2_lstm",
    "ResnetFinetune2_Attention3_LSTM": "resnetfinetune2_attention3_lstm",
    "ResnetFinetune2_Attention4_LSTM": "resnetfinetune2_attention4_lstm",
}

## 1. Scratch CNN + LSTM model Training

In [None]:
# Scratch CNN + LSTM model Training
selected_dataset = "flickr8k"    # or "flickr30k"
selected_model   = "ScratchCNN_LSTM"

print(f"\nðŸš€ Training {selected_model} on {selected_dataset} dataset...\n")
trained_model = run_training_pipeline(available_configs[selected_dataset], available_models[selected_model])

## 2. ResNET (Pre-train) + LSTM model Training

In [None]:
# ResNET + LSTM model Training
selected_dataset = "flickr8k"    # or "flickr30k"
selected_model   = "Resnet_LSTM"

print(f"\nðŸš€ Training {selected_model} on {selected_dataset} dataset...\n")
trained_model = run_training_pipeline(available_configs[selected_dataset], available_models[selected_model])

## 3. ResNET (Fine Tune) + LSTM model Training

In [None]:
# Finetune ResNET + LSTM model Training
selected_dataset = "flickr8k"    # or "flickr30k"
selected_model   = "ResnetFinetune_LSTM"

print(f"\nðŸš€ Training {selected_model} on {selected_dataset} dataset...\n")
trained_model = run_training_pipeline(available_configs[selected_dataset], available_models[selected_model])

## 4. ResNET (Fine Tune 2) + LSTM model Training

In [None]:
# Finetune2 ResNET + LSTM model Training
selected_dataset = "flickr8k"    # or "flickr30k"
selected_model   = "ResnetFinetune2_LSTM"

print(f"\nðŸš€ Training {selected_model} on {selected_dataset} dataset...\n")
trained_model = run_training_pipeline(available_configs[selected_dataset], available_models[selected_model])

## 5. ResNET (Fine Tune 2) + Attention + LSTM model Training

In [None]:
# Finetune2 ResNET + Attention + LSTM model Training
selected_dataset = "flickr8k"    # or "flickr30k"
selected_model   = "ResnetFinetune2_Attention_LSTM"

print(f"\nðŸš€ Training {selected_model} on {selected_dataset} dataset...\n")
trained_model = run_training_pipeline(available_configs[selected_dataset], available_models[selected_model])

Output hidden; open in https://colab.research.google.com to view.

## 5A. ResNET (Fine Tune 2) + Attention + LSTM model Training (30 Epochs)

In [None]:
# Finetune2 ResNET + Attention + LSTM model Training
selected_dataset = "flickr8k"    # or "flickr30k"
selected_model   = "ResnetFinetune2_Attention2_LSTM"

print(f"\nðŸš€ Training {selected_model} on {selected_dataset} dataset...\n")
trained_model = run_training_pipeline(available_configs[selected_dataset], available_models[selected_model])

Output hidden; open in https://colab.research.google.com to view.

## 5B. ResNET (Fine Tune 2) + Attention + LSTM model Training (30 Epochs) (Attention 3)

In [None]:
# Finetune2 ResNET + Attention + LSTM model Training
selected_dataset = "flickr8k"    # or "flickr30k"
selected_model   = "ResnetFinetune2_Attention3_LSTM"

print(f"\nðŸš€ Training {selected_model} on {selected_dataset} dataset...\n")
trained_model = run_training_pipeline(available_configs[selected_dataset], available_models[selected_model])

Output hidden; open in https://colab.research.google.com to view.

## 5C. ResNET (Fine Tune 2) + Attention + LSTM model Training (Attention + Scheduler LR)

In [None]:
# Finetune2 ResNET + Attention + LSTM model Training
selected_dataset = "flickr8k"    # or "flickr30k"
selected_model   = "ResnetFinetune2_Attention4_LSTM"

print(f"\nðŸš€ Training {selected_model} on {selected_dataset} dataset...\n")
trained_model = run_training_pipeline(available_configs[selected_dataset], available_models[selected_model])

Output hidden; open in https://colab.research.google.com to view.