## Readings

modules, losses, activations, etc. :
 - https://pytorch.org/docs/stable/nn.html
 
optimizers and schedulers:
 - https://pytorch.org/docs/stable/optim.html
 
examples:
 - https://github.com/pytorch/examples/blob/master/vae/main.py
 - https://github.com/pytorch/examples/blob/master/mnist/main.py

In [2]:
from importlib import reload

import torch
from torch import nn
from torch import optim
from torch.utils.data import random_split, DataLoader
from torchvision.datasets import MNIST
from torchvision.transforms import ToTensor

from Models.cnn_classifier import Classifier
import utils

## Model

In [6]:
cls = Classifier(input_size=28, num_classes=10, in_channel=1,
                 out_channels=[8,16,32], kernels=[5,5,3],
                 strides=[2,2,1], dropouts=[.1,.2,.3])

# cls.cuda()
print(cls)

Classifier(
  (net): Sequential(
    (0): Conv2d(1, 8, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))
    (1): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.1, inplace=False)
    (4): Conv2d(8, 16, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))
    (5): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU()
    (7): Dropout(p=0.2, inplace=False)
    (8): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): ReLU()
    (11): Dropout(p=0.3, inplace=False)
    (12): Flatten()
    (13): Linear(in_features=1568, out_features=10, bias=True)
    (14): BatchNorm1d(10, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (15): Softmax(dim=None)
  )
)


In [7]:
utils.get_n_params(cls)

23886

## Dataset

In [8]:
mnist = MNIST(root='data', train=False, transform=ToTensor(), download=True)
len(mnist)

10000

In [9]:
train_data, test_data = random_split(mnist, [7000,3000])
len(train_data), len(test_data)

(7000, 3000)

In [10]:
train_loader = DataLoader(train_data, batch_size=64, shuffle=True)
test_loader = DataLoader(test_data, batch_size=64, shuffle=True)

## Training

In [11]:
optimizer = optim.Adam(cls.parameters(), lr=1e-4)
criterion = nn.CrossEntropyLoss()

In [12]:
utils.train(cls, train_loader, test_loader, criterion, optimizer, device='cpu', epochs=10, verbose=1)

HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))

  input = module(input)


1 train_loss:2.1844346371563996 train_acc:0.4583333333333333
val_loss:2.0741699604277914 val_acc:0.75

2 train_loss:2.024366061253981 train_acc:0.7083333333333334
val_loss:1.987453359238645 val_acc:0.8035714285714286

3 train_loss:1.9679070884531193 train_acc:0.6666666666666666
val_loss:1.946497729484071 val_acc:0.7857142857142857

4 train_loss:1.9316285978664052 train_acc:0.8333333333333334
val_loss:1.9217698117519946 val_acc:0.8214285714285714

5 train_loss:1.90823885419152 train_acc:0.8333333333333334
val_loss:1.8984166485198 val_acc:0.875

6 train_loss:1.8880571777170354 train_acc:0.875
val_loss:1.882584105146692 val_acc:0.8214285714285714

7 train_loss:1.8695084127512844 train_acc:0.875
val_loss:1.8619685299853062 val_acc:0.8571428571428571

8 train_loss:1.8554998517036438 train_acc:0.7916666666666666
val_loss:1.8480621677763918 val_acc:0.8392857142857143

9 train_loss:1.8445734869350086 train_acc:0.7083333333333334
val_loss:1.84285694994825 val_acc:0.875

10 train_loss:1.83323216