In [36]:
import os

import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

import lightning as pl
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
from lightning.pytorch.callbacks import ModelCheckpoint

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [13]:
root_dir = os.getcwd()
train_dir = os.path.join(root_dir, 'dataset', 'train')
val_dir = os.path.join(root_dir, 'dataset', 'val')
base_model_dir = os.path.join(root_dir, 'base_model')
base_model_path = os.path.join(base_model_dir, 'base')
print(train_dir)

C:\Users\Siam\desktop\465\dataset\train


In [81]:
def get_dataloader(img_folder, batch_size, num_workers=2):
    
    preprocessing_transformation = transforms.Compose([transforms.Resize((128, 128)),
                                                       transforms.ToTensor(),
                                                       transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
                                                       ])
    
    dataset = datasets.ImageFolder(root=img_folder, transform=preprocessing_transformation)
    return DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers, persistent_workers=True)

In [53]:
default_config = [8, 'M', 8, 'M', 16, 'M', 16, 'M', 32, 32, 'M', 64, 64, 'M']

class CustomVGG(nn.Module):
    def __init__(self, num_classes=10, config=default_config):
        super(CustomVGG, self).__init__()
        self.conv_layers = self.create_conv_layers(config)
        self.fc_layers = nn.Sequential(
            nn.Linear(64*2*2, 64),
            nn.ReLU(),
            nn.Dropout(p=0.25),
            nn.Linear(64, num_classes),
        )

    def forward(self, x):
      x = self.conv_layers(x)
      x = x.reshape(x.shape[0], -1)
      return self.fc_layers(x)

    def create_conv_layers(self, config):
      layers = []
      in_channels = 3

      for layer in config:

          if type(layer) == int:
            out_channels = layer
            layers.append(nn.Conv2d(in_channels, out_channels,
                                    kernel_size=3, stride=1, padding=1))
            layers.append(nn.BatchNorm2d(layer))
            layers.append(nn.ReLU())
            in_channels = layer

          else:
            layers.append(nn.MaxPool2d(kernel_size=2, stride=2))

      return nn.Sequential(*layers)

In [24]:
def load_base_model(base_model_path):
    model = CustomVGG()
    model.load_state_dict(torch.load(base_model_path))
    print(f"Model loaded from {base_model_path}")
    return model

In [134]:
# use this to get the model with initial weight
model = load_base_model(base_model_path)

Model loaded from C:\Users\Siam\desktop\465\base_model\base


In [135]:
class vggTrainer(pl.LightningModule):

    def __init__(self, model, train_dir, batch_size, optimizer):
        super(vggTrainer, self).__init__()
        self.model = model
        self.optimizer = optimizer
        self.loss_fn = nn.CrossEntropyLoss()
        self.train_loader = get_dataloader(train_dir, batch_size, num_workers=2)
        self.train_step_losses = []

    def forward(self, x):
        return self.model(x)

    def training_step(self, batch, batch_idx):
        x, y = batch
        pred = self.forward(x)
        loss = self.loss_fn(pred, y)
        self.train_step_losses.append(loss.item())
        return loss

    def train_dataloader(self):
        return self.train_loader

    def on_train_epoch_end(self):
      avg_loss = sum(self.train_step_losses) / len(self.train_step_losses)
      self.log('train_loss', avg_loss, on_epoch=True, prog_bar=True)
      self.train_step_losses.clear()

    def configure_optimizers(self):
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(self.optimizer, mode='min', factor=0.5, patience=5, verbose=True)
        return {
            'optimizer': self.optimizer,
            'lr_scheduler': {
                'scheduler': scheduler,
                'interval': 'epoch',
                'monitor': 'train_loss'
            },
        }

In [136]:
early_stopping = EarlyStopping('train_loss', min_delta=0.005, patience=15)
checkpoint_callback = ModelCheckpoint(save_top_k=1, save_last=True, monitor="train_loss")

In [137]:
batch_size = 24

optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
trainer = vggTrainer(model, train_dir, batch_size, optimizer)
pl_trainer = pl.Trainer(
    max_epochs= -1, 
    callbacks=[early_stopping, checkpoint_callback]
    )

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [138]:
pl_trainer.fit(trainer)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type             | Params
---------------------------------------------
0 | model   | CustomVGG        | 91.2 K
1 | loss_fn | CrossEntropyLoss | 0     
---------------------------------------------
91.2 K    Trainable params
0         Non-trainable params
91.2 K    Total params
0.365     Total estimated model params size (MB)
C:\Users\Siam\anaconda3\envs\env1.0\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:298: The number of training batches (42) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Training: |                                                                                      | 0/? [00:00<…

Epoch 00116: reducing learning rate of group 0 to 5.0000e-03.
Epoch 00142: reducing learning rate of group 0 to 2.5000e-03.
Epoch 00159: reducing learning rate of group 0 to 1.2500e-03.
Epoch 00168: reducing learning rate of group 0 to 6.2500e-04.
Epoch 00179: reducing learning rate of group 0 to 3.1250e-04.
Epoch 00190: reducing learning rate of group 0 to 1.5625e-04.
Epoch 00196: reducing learning rate of group 0 to 7.8125e-05.
