In [1]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
from torchvision.transforms import transforms as T
import torch.nn.functional as F


from pytorch_lightning import LightningModule, Trainer
from sklearn.model_selection import train_test_split

import kornia as K


In [2]:
class PreProcess(nn.Module):
    """Module to perform pre-process using Kornia on torch tensors."""

    def __init__(self, keepdim=True) -> None:
        super().__init__()
        self.preproc = nn.Sequential(
            K.augmentation.Resize((112, 112), keepdim=keepdim),
        )
        self.to_tensor = T.PILToTensor()

    @torch.no_grad()  # disable gradients for effiency
    def forward(self, inp: torch.Tensor) -> torch.Tensor:
        x_out = T.PILToTensor()(inp)
        x_out = self.preproc(x_out.to(torch.float)) / 255.0
        return x_out.to(torch.float16)


In [3]:
DATA = '/root/workspace/work/Digital-Tashkent/Signs/traffic-sign-recognition/classification/data/signs'

In [4]:
ds = ImageFolder(DATA, PreProcess())

In [5]:
loader = DataLoader(ds, 1000, num_workers=24)

In [6]:
from pytorch_lightning.callbacks import ModelCheckpoint
checkpoint_callback = ModelCheckpoint(
    dirpath=f"../lightning_logs/cls/", save_top_k=2, monitor="val_epoch_total_step", mode='max')


In [7]:
from torchvision import models as M

In [8]:
def get_resnet(NUM_CLASSES):
    model = M.resnet50()
    in_feat = model.fc.in_features
    model.fc = nn.Linear(in_features=in_feat, out_features=NUM_CLASSES)
    return model

In [9]:
len(ds.classes)

140

In [10]:
model = get_resnet(len(ds.classes))

In [11]:
trainer = Trainer(accelerator='gpu', devices=1,
                  max_epochs=100,
                  precision=16,
                  log_every_n_steps=5,
                  default_root_dir=f'../lightning_logs/cls/'
                  )

Using 16bit native Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [14]:
class FasterRCNN(LightningModule):
    def __init__(self):
        super().__init__()
        self.model = model
        self.lr = 1e-3
        self.loss_fn = nn.CrossEntropyLoss()
        # self.loss_fn = K.losses.BinaryFocalLossWithLogits(alpha=0.7, reduction='mean')
        self.num_classes = len(ds.classes)
    def forward(self, imgs):
        return self.model(imgs)

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.lr)
        scheduler = {'scheduler': torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer=optimizer, mode='min', factor=0.3, patience=3), 'monitor': 'train_loss_step', }
        return {
            "optimizer": optimizer,
            "lr_scheduler": scheduler,
        }

    def training_step(self, batch, batch_idx):
        images, targets = batch
        logits = self.model(images)
        # targets = F.one_hot(targets, num_classes=self.num_classes)
        loss = self.loss_fn(logits, targets)
        self.log('train_loss_step', loss.detach(), on_step=True)
        return loss


In [15]:
trainer.fit(FasterRCNN(),
            loader,
            )

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type             | Params
---------------------------------------------
0 | model   | ResNet           | 23.8 M
1 | loss_fn | CrossEntropyLoss | 0     
---------------------------------------------
23.8 M    Trainable params
0         Non-trainable params
23.8 M    Total params
47.590    Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

  rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")


In [16]:
label2num = {v:k for k, v in ds.class_to_idx.items()}

In [23]:
i=10000
label2num[ds[i][1]]

'5.16.2'

In [24]:
label2num[model(ds[i][0].cuda().float().unsqueeze(0)).argmax().item()]

'5.16.2'