# Train MNIST Classifier using PyTorch Lightning

In [1]:
import torch 
from torch.nn import functional as F 
from torchvision.datasets import MNIST
from torch.utils.data import DataLoader, random_split
from torchvision import transforms
import pytorch_lightning as pl

In [2]:
from IPython.display import Image
Image(url='https://www.learnopencv.com/wp-content/uploads/2020/05/Annotation-2020-05-03-184411.png',width=800,height=400)

In [3]:
class MNISTModel(pl.LightningModule):
    
    def __init__(self):
        super(MNISTModel,self).__init__()
        self.layer1 = torch.nn.Sequential(
            torch.nn.Conv2d(1,28,kernel_size = 5),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size = 2)
        )
        self.layer2 = torch.nn.Sequential(
            torch.nn.Conv2d(28,10,kernel_size = 2),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size = 2)
        )

        self.dropout1 = torch.nn.Dropout(0.25)
        self.fc1 = torch.nn.Linear(250,18)
        self.dropout2 = torch.nn.Dropout(0.08)
        self.fc2 = torch.nn.Linear(18,10)


    def forward(self,x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.dropout1(x)
        x = torch.relu(self.fc1(x.view(x.size(0),-1)))
        x = F.leaky_relu(self.dropout2(x))

        return F.softmax(self.fc2(x))


    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters())


    def training_step(self,batch,batch_idx):
        x,labels = batch
        pred = self.forward(x)
        loss = F.nll_loss(pred,labels)
        logs = {'train_loss':loss}

        #REQUIRED: It ie required for us to return "loss"
        output = {
                'loss':loss,
                'log':logs
        }

        return output

    def prepare_data(self):
        MNIST(os.getcwd(),train=True,download=True)
        MNIST(os.getcwd(),train=False,download=True)

    def train_dataloader(self):
        
        mnist_train = MNIST(os.getcwd(),train=True,download=False,transform = transforms.ToTensor())
        self.train_set, self.val_set = random_split(mnist_train,[55000,5000])
        return DataLoader(self.train_set,batch_size = 128)

    def val_dataloader(self):
        return DataLoader(self.val_set,batch_size=128)

    def test_dataloader(self):
        return DataLoader(MNIST(os.getcwd(),train=False,download=False,transform = transforms.ToTensor()),batch_size=128)

In [4]:
from datetime import datetime
start = datetime.now()

mytrainer = pl.Trainer(gpus =1,max_epochs = 10)
model = MNISTModel()
mytrainer.fit(model)

print(f'Time Taken: {datetime.now() -start}')

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name     | Type       | Params
----------------------------------------
0 | layer1   | Sequential | 728   
1 | layer2   | Sequential | 1 K   
2 | dropout1 | Dropout    | 0     
3 | fc1      | Linear     | 4 K   
4 | dropout2 | Dropout    | 0     
5 | fc2      | Linear     | 190   
Epoch 9: 100%|██████████| 430/430 [00:06<00:00, 63.82it/s, loss=-0.957, v_num=19]
Time Taken: 0:01:12.345696


In [5]:
import gc
gc.collect()

51

In [7]:
start = datetime.now()

# Using Fp16 (16-Bit) precision
mytrainer = pl.Trainer(gpus =1,max_epochs = 10,precision =16)
model = MNISTModel()
mytrainer.fit(model)

print(f'Time Taken: {datetime.now() -start}')

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Using native 16bit precision.

  | Name     | Type       | Params
----------------------------------------
0 | layer1   | Sequential | 728   
1 | layer2   | Sequential | 1 K   
2 | dropout1 | Dropout    | 0     
3 | fc1      | Linear     | 4 K   
4 | dropout2 | Dropout    | 0     
5 | fc2      | Linear     | 190   
Epoch 9: 100%|██████████| 430/430 [00:07<00:00, 58.87it/s, loss=-0.949, v_num=20]
Time Taken: 0:01:16.279126


In [9]:
loaded_model =MNISTModel.load_from_checkpoint(checkpoint_path='lightning_logs/version_20/checkpoints/epoch=9.ckpt')

In [10]:
loaded_model

MNISTModel(
  (layer1): Sequential(
    (0): Conv2d(1, 28, kernel_size=(5, 5), stride=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer2): Sequential(
    (0): Conv2d(28, 10, kernel_size=(2, 2), stride=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (dropout1): Dropout(p=0.25, inplace=False)
  (fc1): Linear(in_features=250, out_features=18, bias=True)
  (dropout2): Dropout(p=0.08, inplace=False)
  (fc2): Linear(in_features=18, out_features=10, bias=True)
)

In [11]:
mytrainer.test()

1

In [13]:
x = torch.randn(1, 1, 28, 28)
out = loaded_model(x)

In [17]:
print(torch.argmax(out))

tensor(2)
