## Imports

In [1]:
# You may also need to install torchvision and matplotlib
# !pip install matplotlib
# !pip install torchvision
# !pip install pytorch_lightning

In [2]:
import torch
from torchvision import models

import padl
from padl import transform

## Using PADL with Pytorch Lightning

## Kaggle Digit Recognizer dataset:
Kaggle Digit Recognizer dataset is used in this notebook. It can be easily downloaded from the kaggle link below.

https://www.kaggle.com/c/digit-recognizer

Details on the structure of the data can be read from the link above. Important information on the data structure is given in exerpt below

> The data files train.csv and test.csv contain gray-scale images of hand-drawn digits, from zero through nine.
Each image is 28 pixels in height and 28 pixels in width, for a total of 784 pixels in total. Each pixel has a single pixel-value associated with it, indicating the lightness or darkness of that pixel, with higher numbers meaning darker. This pixel-value is an integer between 0 and 255, inclusive.
The training data set, (train.csv), has 785 columns. The first column, called "label", is the digit that was drawn by the user. The rest of the columns contain the pixel-values of the associated image.


### 0. Reading `csv` files for training and testing
Note: `test.csv` does not contain data label in kaggle dataset. It is inteded to be used for submission to kaggle competition. Here, we can use it for quick inference. 

In [3]:
train_csv = 'mnist/train.csv'
test_csv = 'mnist/test.csv'

with open(train_csv) as f:
    train_data = f.readlines()
train_array = torch.tensor([list(map(int, line.split(','))) for line in train_data[1:]])

train_array = train_array[:-1000]
valid_array = train_array[-1000:]

with open(test_csv) as f:
    test_data = f.readlines()
test_array = torch.tensor([list(map(int, line.split(','))) for line in test_data[1:]])

### 1. Model Definition

In [4]:
import torch.nn.functional as F
import torchvision.models.resnet 
from torch.utils.data import DataLoader
import torch.optim as optim
from torch.optim import lr_scheduler


@transform
class SimpleNet(torch.nn.Module):
    def __init__(self):
        super().__init__()

        # Conv 1
        # size : input: 28x28x1 -> output : 26 x 26 x 32
        self.conv1 = torch.nn.Conv2d(1, 32, kernel_size=3)
        self.batchnorm1 = torch.nn.BatchNorm2d(32)
        
        # Conv 2
        # size : input: 26x26x32 -> output : 24 x 24 x 32
        self.conv2 = torch.nn.Conv2d(32, 32, kernel_size=3)
        self.batchnorm2 = torch.nn.BatchNorm2d(32)
        
        # Conv 3
        # size : input: 24x24x32 -> output : 12 x 12 x 32
        self.conv3 = torch.nn.Conv2d(32, 32, kernel_size=2, stride = 2)
        self.batchnorm3 = torch.nn.BatchNorm2d(32)
        
        # Conv 4
        # size : input : 12 x 12 x 32 -> output : 8 x 8 x 64
        self.conv4 = torch.nn.Conv2d(32, 64, kernel_size=5)
        self.batchnorm4 = torch.nn.BatchNorm2d(64)
        
        # Conv 5
        # size : input: 8x8x64 -> output : 4 x 4 x 64 -> Linearize = 1024
        self.conv5 = torch.nn.Conv2d(64, 64, kernel_size=2, stride = 2)
        self.batchnorm5 = torch.nn.BatchNorm2d(64)
        
        # dropout layer 
        self.conv5_drop = torch.nn.Dropout2d()
        
        # FC 1 
        self.fc1 = torch.nn.Linear(1024, 128)
        
        # FC 2
        self.fc2 = torch.nn.Linear(128, 10)

    def forward(self, x):
        x = self.batchnorm1(F.relu(self.conv1(x)))
        x = self.batchnorm2(F.relu(self.conv2(x)))
        x = self.batchnorm3(F.relu(self.conv3(x)))
        x = self.batchnorm4(F.relu(self.conv4(x)))
        x = self.batchnorm5(F.relu(self.conv5(x)))
        x = self.conv5_drop(x)
        x = x.view(-1, 1024)
        x = F.relu(self.fc1(x))
        x = F.log_softmax(self.fc2(x), dim=1)
        return x

In [5]:
preprocess = (
    padl.this.type(torch.FloatTensor)
    >> padl.this[1:] + padl.this[0]
    >> padl.this.reshape(-1, 28, 28) / padl.Identity()
)

simplenet = SimpleNet()
loss_func = transform(F.nll_loss)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Device to be used: ', device)

train_model = (
    preprocess
    >> padl.Batchify()
    >> simplenet / padl.this.type(torch.long)
    >> transform(F.nll_loss)
)

train_model.pd_to(device)

Device to be used:  cuda


[1mCompose[0m:

   [32m   │
      ▼ args[0m
   [1m0: [0mtype(<class 'torch.FloatTensor'>)
   [32m   └───────────────────────────────────┐
      │                                   │
      ▼ args                              ▼ args[0m
   [1m1: [0m__getitem__(slice(1, None, None)) [32m+[0m __getitem__(0)   
   [32m   │                                   │
      ▼ args                              ▼ args[0m
   [1m2: [0mreshape(-1, 28, 28)               [32m/[0m padl.Identity()  
   [32m   │
      ▼ args[0m
   [1m3: [0mBatchify(dim=0)                  
   [32m   │└──────────────────────────────────┐
      │                                   │
      ▼ x                                 ▼ args[0m
   [1m4: [0mSimpleNet()                       [32m/[0m type(torch.int64)
   [32m   │
      ▼ (input, target, weight, size_average, ignore_index, reduce, reduction)[0m
   [1m5: [0mnll_loss                         

### 2.1 Creating a Lightning Module using PADLLightningModule
If your `train_model` has the loss function as the final step you can directly build the `PADLLightning` object by

In [6]:
from padl import PADLLightning

In [16]:
PADLLightning?

In [13]:
batch_size = 256
num_workers = 4

padl_lightning_module = PADLLightning(
    train_model,  # train_model with the loss function
    train_array,  # list of training data points
    valid_array,  # list of validation data points
    batch_size=batch_size,
    num_workers=num_workers
)

### 2.2 Inherit from PADLLightningModule to customize in the same way as a LightningModule

In [8]:
batch_size = 256
num_workers = 4
learning_rate = 0.01

class MyModule(PADLLightning):
    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-4)
        return optimizer

In [10]:
batch_size = 256
num_workers = 4

padl_lightning_module = MyModule(
    train_model,  # train_model with the loss function
    train_array,  # list of training data points
    valid_array,  # list of validation data points
    batch_size=batch_size,
    num_workers=num_workers
)

### 3. Training and validating the `train_model` with the PADL-Pytorch Lightning Connector

In [11]:
import pytorch_lightning as pl
from pytorch_lightning.callbacks.early_stopping import EarlyStopping

log_interval = 10
nepoch = 2

trainer = pl.Trainer(
    gpus=1,
    callbacks=[EarlyStopping(monitor="val_loss")],
    max_epochs=nepoch,
    default_root_dir='test',
    log_every_n_steps=log_interval
)
trainer.fit(padl_lightning_module)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name    | Type      | Params
--------------------------------------
0 | layer_0 | SimpleNet | 214 K 
--------------------------------------
214 K     Trainable params
0         Non-trainable params
214 K     Total params
0.857     Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

Training: -1it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

saving torch module to test/lightning_logs/version_7/checkpoints/model.padl/model_10.pt


Validating: 0it [00:00, ?it/s]

saving torch module to test/lightning_logs/version_7/checkpoints/epoch=0-step=160.padl/epoch=0-step=160_10.pt
