<a href="https://colab.research.google.com/github/kimsooyoung/practical_jetson_examples/blob/main/Digits%20Recognition%20with%20MNIST/%5BPractical%20Jetson%20%231-1%5D%20Digits_Recognition_with_MNIST.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Practical Exercise with MNIST Example #1-1 - "Train Part"**

**[!important] Check your Runtime type before  running this example (Runtime > Change runtime type)**

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms

import easydict
import numpy as np
from copy import deepcopy

# **GPU usability check**

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
device

device(type='cuda')

# **Model Implementation**

* In this code, simple `Block` is defined for reusable code. This will help a lot for saving time.

In [None]:
class Block(nn.Module):

    def __init__(self,
                input_size,
                output_size,
                use_batch_norm=True,
                dropout_p=.4):
        self.input_size = input_size
        self.output_size = output_size
        self.use_batch_norm = use_batch_norm
        self.dropout_p = dropout_p

        super().__init__()

        def get_regularizer(use_batch_norm, size):
            # BatchNorm1d: Convert one MNIST dataset into 784 dim vector (784 means node for fully connected layer)
            return nn.BatchNorm1d(size) if use_batch_norm else nn.Dropout(dropout_p)

        self.block = nn.Sequential(
            nn.Linear(input_size, output_size),
            nn.LeakyReLU(),
            get_regularizer(use_batch_norm, output_size),
        )

    def forward(self, x):
        # |x| = (batch_size, input_size)
        y = self.block(x)
        # |y| = (batch_size, output_size)

        return y

* Define actual model named as `ImageClassifier`
* This structure is referenced from [this book](https://www.yes24.com/Product/Goods/112198327) and [this link](https://kh-kim.github.io/nlp_with_deep_learning_blog/docs/1-15-practical_exercise/08-predict_exercise/)

> Copyright © 2017-2020 Patrick Marsceill. Distributed by an MIT license

In [None]:
class ImageClassifier(nn.Module):

    def __init__(self,
                input_size,
                output_size,
                hidden_sizes=[500, 400, 300, 200, 100],
                use_batch_norm=True,
                dropout_p=.4):

        super().__init__()

        assert len(hidden_sizes) > 0, "You need to specify hidden layers"

        last_hidden_size = input_size
        blocks = []
        for hidden_size in hidden_sizes:
            blocks += [Block(
                last_hidden_size,
                hidden_size,
                use_batch_norm,
                dropout_p
            )]
            last_hidden_size = hidden_size

        self.layers = nn.Sequential(
            *blocks,
            nn.Linear(last_hidden_size, output_size),
            nn.LogSoftmax(dim=-1),
        )

    def forward(self, x):
        # |x| = (batch_size, input_size)
        y = self.layers(x)
        # |y| = (batch_size, output_size)

        return y

# **Train helper Implemenation**

In [None]:
class Trainer():

    def __init__(self, model, optimizer, crit):
        self.model = model
        self.optimizer = optimizer
        self.crit = crit

        super().__init__()

    def _batchify(self, x, y, batch_size, random_split=True):
        if random_split:
            indices = torch.randperm(x.size(0), device=x.device)
            x = torch.index_select(x, dim=0, index=indices)
            y = torch.index_select(y, dim=0, index=indices)

        x = x.split(batch_size, dim=0)
        y = y.split(batch_size, dim=0)

        return x, y

    def _train(self, x, y, config):
        # Turn train mode on. (default: model.train)
        self.model.train()

        x, y = self._batchify(x, y, config.batch_size)
        total_loss = 0

        for i, (x_i, y_i) in enumerate(zip(x, y)):
            y_hat_i = self.model(x_i)
            loss_i = self.crit(y_hat_i, y_i.squeeze())

            # Initialize the gradients of the model.
            self.optimizer.zero_grad()
            loss_i.backward()

            self.optimizer.step()

            if config.verbose >= 2:  # print loss for two more config verbose
                print("Train Iteration(%d/%d): loss=%.4e" % (i + 1, len(x), float(loss_i)))

            # Don't forget to detach to prevent memory leak.
            total_loss += float(loss_i)

        return total_loss / len(x)

    def _validate(self, x, y, config):
        # Turn evaluation mode on.
        self.model.eval()

        # Turn on the no_grad mode to make more efficintly.
        with torch.no_grad():
            x, y = self._batchify(x, y, config.batch_size, random_split=False)
            total_loss = 0

            for i, (x_i, y_i) in enumerate(zip(x, y)):
                y_hat_i = self.model(x_i)
                loss_i = self.crit(y_hat_i, y_i.squeeze())

                if config.verbose >= 2:
                    print("Valid Iteration(%d/%d): loss=%.4e" % (i + 1, len(x), float(loss_i)))

                total_loss += float(loss_i)

            return total_loss / len(x)

    def train(self, train_data, valid_data, config):

        # save best loss and model at then
        lowest_loss = np.inf
        best_model = None

        for epoch_index in range(config.n_epochs):
            train_loss = self._train(train_data[0], train_data[1], config)
            valid_loss = self._validate(valid_data[0], valid_data[1], config)

            # You must use deep copy to take a snapshot of current best weights.
            if valid_loss <= lowest_loss:
                lowest_loss = valid_loss
                best_model = deepcopy(self.model.state_dict())

            print("Epoch(%d/%d): train_loss=%.4e  valid_loss=%.4e  lowest_loss=%.4e" % (
                epoch_index + 1,
                config.n_epochs,
                train_loss,
                valid_loss,
                lowest_loss,
            ))

        # Restore to best model.
        self.model.load_state_dict(best_model)


# **DataLoader**

In [None]:
def load_mnist(is_train=True, flatten=True):

    dataset = datasets.MNIST('../data',      # ../data: path for MNIST Dataset
        train=is_train,               # is_train  load train dataset for True, load test dataset for False
        download=True,
        transform=transforms.Compose([
            transforms.ToTensor(),         # convert image info tensor using transforms.ToTensor()
        ]),
    )

    x = dataset.data.float() / 255.   # / 255 : From 255 level pixel value into 0-1 scale
    y = dataset.targets               # ground truth tensor

    if flatten:
        x = x.view(x.size(0), -1)    # 28*28 = 784 (2D image into 1D vector)

    return x, y


def split_data(x, y, train_ratio=.8):

    train_cnt = int(x.size(0) * train_ratio)
    valid_cnt = x.size(0) - train_cnt

    # Shuffle dataset to split into train/valid set.
    indices = torch.randperm(x.size(0))
    x = torch.index_select(
        x,
        dim=0,
        index=indices
    ).split([train_cnt, valid_cnt], dim=0)

    y = torch.index_select(
        y,
        dim=0,
        index=indices
    ).split([train_cnt, valid_cnt], dim=0)

    return x, y


def get_hidden_sizes(input_size, output_size, n_layers):
    step_size = int((input_size - output_size) / n_layers)

    hidden_sizes = []
    current_size = input_size
    for i in range(n_layers - 1):
        hidden_sizes += [current_size - step_size]
        current_size = hidden_sizes[-1]

    return hidden_sizes


# **Main Loop and Final Training**

In [None]:
# handle trian configuration throught EasyDict
def define_argparser():

    args = easydict.EasyDict({
        "model_fn" : './model_test.pth',
        "gpu_id": 0,

        "train_ratio": 0.8,

        "batch_size": 256,
        "n_epochs": 10,

        "n_layers": 5,
        "use_dropout": 'store_true',
        "dropout_p": 0.3,

        "verbose":1
        })

    config = args

    return config


def main(config):
    # Set device based on user defined configuration.
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

    x, y = load_mnist(is_train=True, flatten=True)

    x, y = split_data(x, y, train_ratio=config.train_ratio)
    print("Train:", x[0].shape, y[0].shape)
    print("Valid:", x[1].shape, y[1].shape)

    input_size = int(x[0].shape[-1])
    output_size = int(max(y[0])) + 1

    model = ImageClassifier(
        input_size=input_size,
        output_size=output_size,
        hidden_sizes=get_hidden_sizes(input_size,
                                    output_size,
                                    config.n_layers),
        use_batch_norm=not config.use_dropout,
        dropout_p=config.dropout_p,
    ).to(device)
    optimizer = optim.Adam(model.parameters())
    crit = nn.NLLLoss()

    if config.verbose > 1:
        print(model)
        print(optimizer)
        print(crit)

    trainer = Trainer(model, optimizer, crit)

    train_x, train_y = x[0].to(device), y[0].to(device)
    valid_x, valid_y = x[1].to(device), y[1].to(device)

    trainer.train(
        train_data=(train_x, train_y),
        valid_data=(valid_x, valid_y),
        config=config
    )

    # Save best model weights.
    torch.save({
        'model': trainer.model.state_dict(),
        'opt': optimizer.state_dict(),
        'config': config,
    }, config.model_fn)

## **Start Training and Export `.pth` file from Files tab in the left side**

In [None]:
config = define_argparser()
main(config)

Train: torch.Size([48000, 784]) torch.Size([48000])
Valid: torch.Size([12000, 784]) torch.Size([12000])
Epoch(1/10): train_loss=5.0496e-01  valid_loss=1.6475e-01  lowest_loss=1.6475e-01
Epoch(2/10): train_loss=1.6713e-01  valid_loss=1.2158e-01  lowest_loss=1.2158e-01
Epoch(3/10): train_loss=1.2270e-01  valid_loss=9.5086e-02  lowest_loss=9.5086e-02
Epoch(4/10): train_loss=9.9114e-02  valid_loss=8.9470e-02  lowest_loss=8.9470e-02
Epoch(5/10): train_loss=8.0746e-02  valid_loss=7.7420e-02  lowest_loss=7.7420e-02
Epoch(6/10): train_loss=7.1243e-02  valid_loss=7.7732e-02  lowest_loss=7.7420e-02
Epoch(7/10): train_loss=5.9378e-02  valid_loss=7.0156e-02  lowest_loss=7.0156e-02
Epoch(8/10): train_loss=5.6176e-02  valid_loss=7.0591e-02  lowest_loss=7.0156e-02
Epoch(9/10): train_loss=5.0510e-02  valid_loss=6.6637e-02  lowest_loss=6.6637e-02
Epoch(10/10): train_loss=4.4377e-02  valid_loss=7.7233e-02  lowest_loss=6.6637e-02
