In [1]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

In [18]:
from model import Net

In [19]:
!pip install torchsummary
from torchsummary import summary
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model = Net().to(device)
summary(model, input_size=(1, 28, 28))

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 26, 26]             288
              ReLU-2           [-1, 32, 26, 26]               0
       BatchNorm2d-3           [-1, 32, 26, 26]              64
         Dropout2d-4           [-1, 32, 26, 26]               0
         MaxPool2d-5           [-1, 32, 13, 13]               0
            Conv2d-6           [-1, 32, 13, 13]           1,024
              ReLU-7           [-1, 32, 13, 13]               0
            Conv2d-8           [-1, 32, 11, 11]           9,216
              ReLU-9           [-1, 32, 11, 11]               0
      BatchNorm2d-10           [-1, 32, 11, 11]              64
        Dropout2d-11           [-1, 32, 11, 11]               0
        MaxPool2d-12             [-1, 32, 5, 5]               0
    

In [4]:


torch.manual_seed(1)
batch_size = 1024

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                    transform=transforms.Compose([
                        transforms.RandomApply([transforms.CenterCrop(22), ], p=0.1),
                        transforms.Resize((28, 28)),
                        transforms.RandomRotation((-15., 15.), fill=0),
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ../data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 92555835.86it/s]


Extracting ../data/MNIST/raw/train-images-idx3-ubyte.gz to ../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ../data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 26788079.13it/s]


Extracting ../data/MNIST/raw/train-labels-idx1-ubyte.gz to ../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 22267091.44it/s]


Extracting ../data/MNIST/raw/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 14665534.08it/s]


Extracting ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw



In [5]:
from utils import test, train

In [20]:

model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1, verbose=True)
for epoch in range(1, 21):
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader, epoch)
    scheduler.step()

Adjusting learning rate of group 0 to 1.0000e-02.


loss=0.2121088057756424 batch_id=58: 100%|██████████| 59/59 [00:33<00:00,  1.76it/s]



Test set: Average loss: 0.0001, Accuracy: 9671/10000 (96.71%) Epoch: 1

Adjusting learning rate of group 0 to 1.0000e-02.


loss=0.1290278434753418 batch_id=58: 100%|██████████| 59/59 [00:26<00:00,  2.20it/s]



Test set: Average loss: 0.0001, Accuracy: 9761/10000 (97.61%) Epoch: 2

Adjusting learning rate of group 0 to 1.0000e-02.


loss=0.1234467625617981 batch_id=58: 100%|██████████| 59/59 [00:28<00:00,  2.10it/s]



Test set: Average loss: 0.0001, Accuracy: 9804/10000 (98.04%) Epoch: 3

Adjusting learning rate of group 0 to 1.0000e-02.


loss=0.09566562622785568 batch_id=58: 100%|██████████| 59/59 [00:27<00:00,  2.17it/s]



Test set: Average loss: 0.0001, Accuracy: 9829/10000 (98.29%) Epoch: 4

Adjusting learning rate of group 0 to 1.0000e-02.


loss=0.05244730785489082 batch_id=58: 100%|██████████| 59/59 [00:27<00:00,  2.15it/s]



Test set: Average loss: 0.0000, Accuracy: 9853/10000 (98.53%) Epoch: 5

Adjusting learning rate of group 0 to 1.0000e-03.


loss=0.07786116003990173 batch_id=58: 100%|██████████| 59/59 [00:28<00:00,  2.06it/s]



Test set: Average loss: 0.0000, Accuracy: 9856/10000 (98.56%) Epoch: 6

Adjusting learning rate of group 0 to 1.0000e-03.


loss=0.07531975954771042 batch_id=58: 100%|██████████| 59/59 [00:29<00:00,  2.01it/s]



Test set: Average loss: 0.0000, Accuracy: 9860/10000 (98.60%) Epoch: 7

Adjusting learning rate of group 0 to 1.0000e-03.


loss=0.06424839049577713 batch_id=58: 100%|██████████| 59/59 [00:28<00:00,  2.08it/s]



Test set: Average loss: 0.0000, Accuracy: 9860/10000 (98.60%) Epoch: 8

Adjusting learning rate of group 0 to 1.0000e-03.


loss=0.13328468799591064 batch_id=58: 100%|██████████| 59/59 [00:28<00:00,  2.11it/s]



Test set: Average loss: 0.0000, Accuracy: 9863/10000 (98.63%) Epoch: 9

Adjusting learning rate of group 0 to 1.0000e-03.


loss=0.05735478922724724 batch_id=58: 100%|██████████| 59/59 [00:27<00:00,  2.13it/s]



Test set: Average loss: 0.0000, Accuracy: 9862/10000 (98.62%) Epoch: 10

Adjusting learning rate of group 0 to 1.0000e-04.


loss=0.06688350439071655 batch_id=58: 100%|██████████| 59/59 [00:27<00:00,  2.14it/s]



Test set: Average loss: 0.0000, Accuracy: 9862/10000 (98.62%) Epoch: 11

Adjusting learning rate of group 0 to 1.0000e-04.


loss=0.0818043202161789 batch_id=58: 100%|██████████| 59/59 [00:28<00:00,  2.08it/s]



Test set: Average loss: 0.0000, Accuracy: 9862/10000 (98.62%) Epoch: 12

Adjusting learning rate of group 0 to 1.0000e-04.


loss=0.06326936185359955 batch_id=58: 100%|██████████| 59/59 [00:27<00:00,  2.15it/s]



Test set: Average loss: 0.0000, Accuracy: 9861/10000 (98.61%) Epoch: 13

Adjusting learning rate of group 0 to 1.0000e-04.


loss=0.09473282843828201 batch_id=58: 100%|██████████| 59/59 [00:27<00:00,  2.15it/s]



Test set: Average loss: 0.0000, Accuracy: 9862/10000 (98.62%) Epoch: 14

Adjusting learning rate of group 0 to 1.0000e-04.


loss=0.07888049632310867 batch_id=58: 100%|██████████| 59/59 [00:27<00:00,  2.12it/s]



Test set: Average loss: 0.0000, Accuracy: 9863/10000 (98.63%) Epoch: 15

Adjusting learning rate of group 0 to 1.0000e-05.


loss=0.07122716307640076 batch_id=58: 100%|██████████| 59/59 [00:26<00:00,  2.20it/s]



Test set: Average loss: 0.0000, Accuracy: 9862/10000 (98.62%) Epoch: 16

Adjusting learning rate of group 0 to 1.0000e-05.


loss=0.08236567676067352 batch_id=58: 100%|██████████| 59/59 [00:27<00:00,  2.12it/s]



Test set: Average loss: 0.0000, Accuracy: 9862/10000 (98.62%) Epoch: 17

Adjusting learning rate of group 0 to 1.0000e-05.


loss=0.08614931255578995 batch_id=58: 100%|██████████| 59/59 [00:27<00:00,  2.15it/s]



Test set: Average loss: 0.0000, Accuracy: 9862/10000 (98.62%) Epoch: 18

Adjusting learning rate of group 0 to 1.0000e-05.


loss=0.06726453453302383 batch_id=58: 100%|██████████| 59/59 [00:27<00:00,  2.15it/s]



Test set: Average loss: 0.0000, Accuracy: 9862/10000 (98.62%) Epoch: 19

Adjusting learning rate of group 0 to 1.0000e-05.


loss=0.08887985348701477 batch_id=58: 100%|██████████| 59/59 [00:26<00:00,  2.21it/s]



Test set: Average loss: 0.0000, Accuracy: 9862/10000 (98.62%) Epoch: 20

Adjusting learning rate of group 0 to 1.0000e-06.
