<a href="https://colab.research.google.com/github/soul11zz/imagecaption/blob/main/fine_tune.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Train with Pytorch Lighning

In [1]:
import os
import os.path as osp
if osp.exists("imagecaption"):
    os.chdir("imagecaption")

In [None]:
!pip install -r train/requirements.txt

In [2]:
import os.path as osp
import sys
train_dir = osp.abspath("train")
if train_dir not in sys.path:
    sys.path.append(train_dir)
    
import pl_module
import pytorch_lightning as pl
from huggingface_hub import HfApi, notebook_login
from fine_tune import get_input_model_name

pl.seed_everything(42, workers=True)


Global seed set to 42


42

### Login to HuggingFace

In [3]:

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

### Load the model

In [None]:
from datasets import load_dataset
from torch.utils.data import DataLoader
from pl_module import ImageCaptioningModule
from dataset import ImageCaptioningDataset
from transformers import GitProcessor, GitForCausalLM

from pytorch_lightning.loggers.tensorboard import TensorBoardLogger
from pytorch_lightning.callbacks import ModelCheckpoint

out_model = dataset_desc_only = "soul11zz/image-caption-desc-only"
in_model = "microsoft/git-large"
# model_name = get_input_model_name(out_model)

dt_train = load_dataset(dataset_desc_only, split="train")
dt_val = load_dataset(dataset_desc_only, split="validation")
dt_test = load_dataset(dataset_desc_only, split="test")

processor = GitProcessor.from_pretrained(in_model)
train_dataset = ImageCaptioningDataset(dt_train, processor)
val_dataset = ImageCaptioningDataset(dt_val, processor)
test_dataset = ImageCaptioningDataset(dt_test, processor)


### Prelims for PL Module

In [17]:
batch_size = 16
model_dir = "tmp/model"
epochs = 10

if not osp.exists(model_dir):
  os.makedirs(model_dir)
  
num_workers = os.cpu_count() if os.name != "nt" else 0
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)

model = GitForCausalLM.from_pretrained(in_model)
callbacks = []

pl_train_module = ImageCaptioningModule(processor, model, train_loader, val_loader, learning_rate=1e-4)

### Trainer
logger = TensorBoardLogger("tb_logs", name="image-captioning")

checkpoint = ModelCheckpoint(dirpath=model_dir, 
                              save_top_k=2, monitor="val_loss", 
                              mode="min", 
                              filename="imcap-{epoch:02d}-{val_loss:.2f}")

callbacks += [checkpoint]


### Create PL Trainer

In [18]:
trainer = pl.Trainer( 
                      logger=logger, 
                      gpus=1,
                      callbacks=callbacks,
                      max_epochs=epochs,
                      check_val_every_n_epoch=1,
                      # val_check_interval=50,
                      precision=16,
                      num_sanity_val_steps=1,
                      )


Using 16bit native Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


### (optional) Tune Learning Rate

In [14]:
tuner = pl.Trainer(auto_lr_find=True, accelerator="cuda", devices=1, max_epochs=1)
tuner.tune(pl_train_module)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Finding best initial lr:   0%|          | 0/100 [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_steps=100` reached.
Learning rate set to 0.0009120108393559097
Restoring states from the checkpoint path at /notebooks/imagecaption/.lr_find_915fbacf-b635-4397-850a-07eaf1675ebe.ckpt


{'lr_find': <pytorch_lightning.tuner.lr_finder._LRFinder at 0x7f6222823040>}

In [19]:
pl_train_module.lr


0.0001

### Run Training

In [None]:
trainer.fit(pl_train_module)

### Upload Best Model

In [24]:
pl_model_best = ImageCaptioningModule.load_from_checkpoint(checkpoint.best_model_path, processor=processor, model=model, train_dataloader=train_loader, val_dataloader=val_loader)
pl_model_best.model.save_pretrained("tb_logs/image-captioning/best_model", push_to_hub=True, repo_id=out_model)
pl_model_best.processor.save_pretrained("tb_logs/image-captioning/best_model", push_to_hub=True, repo_id=out_model)


pytorch_model.bin:   0%|          | 0.00/707M [00:00<?, ?B/s]

Upload 1 LFS files:   0%|          | 0/1 [00:00<?, ?it/s]

### Evaluate Best Model

In [None]:
trainer.test(pl_train_module)