<a href="https://colab.research.google.com/github/qmaruf/play/blob/master/cifar10_using_pytorch_lightning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
! pip install pytorch-lightning



In [6]:
import os
import torch
from torch.nn import functional as F
from torch.utils.data import DataLoader
from torchvision.datasets import MNIST, CIFAR10
from torchvision import transforms
import pytorch_lightning as pl
import torch.nn as nn 
from torchvision import models
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
import numpy as np
import torch.nn as nn
import torch
from tqdm import tqdm
from scipy.spatial import distance
from torch.optim.lr_scheduler import ReduceLROnPlateau
%matplotlib inline

op_sigmoid = nn.Softmax()

In [7]:
checkpoint_callback = ModelCheckpoint(
    filepath=os.getcwd(),
    save_top_k=1,
    verbose=True,
    monitor='val_loss',
    mode='min',
    prefix=''
)

early_stop_callback = EarlyStopping(
   monitor='val_loss',
   min_delta=0.00,
   patience=7,
   verbose=False,
   mode='min',
)

transform_func = transforms.Compose(
    [transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                        std=[0.229, 0.224, 0.225])])



In [9]:
class CIFAR10Model(pl.LightningModule):

    def __init__(self):
        super(CIFAR10Model, self).__init__()        
        self.model = models.resnet18(pretrained=True)        
        self.model.fc = nn.Linear(512, 10)
        self.criterion = nn.CrossEntropyLoss()   
        self.learning_rate = 0.0001     

    def forward(self, x):
        return self.model(x)

    def training_step(self, batch, batch_nb):
        x, y = batch
        y_hat = self.forward(x)
        loss = self.criterion(y_hat, y)
        tensorboard_logs = {'train_loss': loss}
        return {'loss': loss, 'log': tensorboard_logs}

    def validation_step(self, batch, batch_nb):
        x, y = batch
        y_hat = self.forward(x)
        return {'val_loss': F.cross_entropy(y_hat, y)}

    def validation_end(self, outputs):
        avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean()
        tensorboard_logs = {'val_loss': avg_loss}
        return {'avg_val_loss': avg_loss, 'log': tensorboard_logs}

    def test_step(self, batch, batch_nb):
        x, y = batch
        y_hat = self.forward(x)
        return {'test_loss': self.criterion(y_hat, y)}

    def test_end(self, outputs):
        avg_loss = torch.stack([x['test_loss'] for x in outputs]).mean()
        logs = {'test_loss': avg_loss}
        return {'avg_test_loss': avg_loss, 'log': logs, 'progress_bar': logs}

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
        scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=3, verbose=True)
        return [optimizer], [scheduler]

    @pl.data_loader
    def train_dataloader(self):
        return DataLoader(CIFAR10(os.getcwd(), train=True, download=True, transform=transform_func), batch_size=128)

    @pl.data_loader
    def val_dataloader(self):
        return DataLoader(CIFAR10(os.getcwd(), train=False, download=True, transform=transform_func), batch_size=128)

    @pl.data_loader
    def test_dataloader(self):
        return DataLoader(CIFAR10(os.getcwd(), train=False, download=True, transform=transform_func), batch_size=128)

In [10]:
model = CIFAR10Model()
trainer = pl.Trainer(max_epochs=128,
                    gpus=1, 
                    check_val_every_n_epoch=5,
                    checkpoint_callback=checkpoint_callback,
                    early_stop_callback=early_stop_callback,
                    auto_lr_find=True)    
trainer.fit(model)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type             | Params
-----------------------------------------------
0 | model     | ResNet           | 11 M  
1 | criterion | CrossEntropyLoss | 0     


Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


HBox(children=(FloatProgress(value=0.0, description='Finding best initial lr', style=ProgressStyle(description…

Learning rate set to 0.001584893192461114

  | Name      | Type             | Params
-----------------------------------------------
0 | model     | ResNet           | 11 M  
1 | criterion | CrossEntropyLoss | 0     


Files already downloaded and verified


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validation sanity check', layout=Layout…

Files already downloaded and verified
Files already downloaded and verified


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…


Epoch 00004: val_loss reached 0.73456 (best 0.73456), saving model to /content/epoch=4_v1.ckpt as top 1


Epoch     5: reducing learning rate of group 0 to 7.9245e-04.


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…


Epoch 00009: val_loss  was not in top 1


Epoch     9: reducing learning rate of group 0 to 3.9622e-04.
Epoch    13: reducing learning rate of group 0 to 1.9811e-04.




1

In [47]:
correct = 0
total = 0
with torch.no_grad():
    for batch in model.test_dataloader():
        data, labels = batch        
        data = data.cuda()
        labels = labels.cuda()
        outputs = model(data)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the %d test images: %d %%' % (len(model.test_dataloader().dataset), 100 * correct / total))

Files already downloaded and verified
Files already downloaded and verified
Accuracy of the network on the 10000 test images: 78 %
