In [1]:
# !pip install --upgrade torch torchvision torchinfo tqdm matplotlib

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torchinfo import summary

In [3]:
# Device
if torch.cuda.is_available():
    device = "cuda"
elif torch.backends.mps.is_available():
    device = "mps"
else:
    device = "cpu"
print("Device Selected:", device)

Device Selected: cuda


In [4]:
torch.manual_seed(42)
batch_size = 64

kwargs = {'num_workers': 3, 'pin_memory': True} if device != 'cpu' else {}

train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                    transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=False, **kwargs)



In [5]:
DROP = 0.05

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.cblock1 = nn.Sequential(
            nn.Conv2d(1, 8, 3, padding=1, bias=False),    # Input -  28x28x1, Output -  28x28x8, RF - 3x3
            nn.Dropout(DROP),
            nn.ReLU(),
            nn.BatchNorm2d(8),
            nn.Conv2d(8, 8, 3, padding=1, bias=False),    # Input -  28x28x8, Output -  28x28x8, RF - 5x5
            nn.Dropout(DROP),
            nn.ReLU()
        )

        self.tblock1 = nn.Sequential(
            nn.MaxPool2d(2, 2)                            # Input -  28x28x8, Output -  14x14x8, RF - 6x6
        )

        self.cblock2 = nn.Sequential(
            nn.BatchNorm2d(8),
            nn.Conv2d(8, 16, 3, padding=1, bias=False),   # Input -  14x14x8, Output - 14x14x16, RF - 10x10
            nn.Dropout(DROP),
            nn.ReLU(),
            nn.BatchNorm2d(16),
            nn.Conv2d(16, 16, 3, padding=1, bias=False),  # Input - 14x14x16, Output - 14x14x16, RF - 14x14
            nn.Dropout(DROP),
            nn.ReLU()
        )

        self.tblock2 = nn.Sequential(
            nn.MaxPool2d(2, 2)                            # Input - 14x14x16, Output -   7x7x16, RF - 16x16
        )

        self.cblock3 = nn.Sequential(
            nn.BatchNorm2d(16),
            nn.Conv2d(16, 32, 3, padding=1, bias=False),  # Input -   7x7x16, Output -   7x7x32, RF - 24x24
            nn.Dropout(DROP),
            nn.ReLU(),
            nn.BatchNorm2d(32),
            nn.Conv2d(32, 32, 3, padding=1, bias=False),  # Input -   7x7x32, Output -   7x7x32, RF - 32x32
            nn.Dropout(DROP),
            nn.ReLU()
        )

        self.outblock = nn.Sequential(
            nn.BatchNorm2d(32),
            nn.Conv2d(32, 32, 1, bias=False),             # Input -   7x7x32, Output -   7x7x32, RF - 32x32
            nn.Dropout(DROP),
            nn.ReLU(),
            nn.BatchNorm2d(32),
            nn.Conv2d(32, 10, 1),                         # Input -   7x7x32, Output -   7x7x10, RF - 32x32
            nn.ReLU(),
            nn.AvgPool2d(7, 7),                           # Input -   7x7x10, Output -   1x1x10
            nn.Flatten(),
            nn.LogSoftmax()
        )

    def forward(self, x):
        x = self.cblock1(x)
        x = self.tblock1(x)
        x = self.cblock2(x)
        x = self.tblock2(x)
        x = self.cblock3(x)
        x = self.outblock(x)
        return x

In [6]:
model = Net().to(device)
summary(model, input_size=(batch_size, 1, 28, 28),
        col_names=["input_size", "output_size", "num_params", "params_percent"])

  input = module(input)


Layer (type:depth-idx)                   Input Shape               Output Shape              Param #                   Param %
Net                                      [64, 1, 28, 28]           [64, 10]                  --                             --
├─Sequential: 1-1                        [64, 1, 28, 28]           [64, 8, 28, 28]           --                             --
│    └─Conv2d: 2-1                       [64, 1, 28, 28]           [64, 8, 28, 28]           72                          0.37%
│    └─Dropout: 2-2                      [64, 8, 28, 28]           [64, 8, 28, 28]           --                             --
│    └─ReLU: 2-3                         [64, 8, 28, 28]           [64, 8, 28, 28]           --                             --
│    └─BatchNorm2d: 2-4                  [64, 8, 28, 28]           [64, 8, 28, 28]           16                          0.08%
│    └─Conv2d: 2-5                       [64, 8, 28, 28]           [64, 8, 28, 28]           576               

In [7]:
from tqdm import tqdm
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    pbar = tqdm(train_loader)
    for batch_idx, (data, target) in enumerate(pbar):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        pbar.set_description(desc= f'Train set: Average loss={loss.item()} batch_id={batch_idx}')

def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

    return test_loss

In [8]:
model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=1, verbose=True)

for epoch in range(0, 20):
    print("Epoch {}: ".format(epoch + 1))
    train(model, device, train_loader, optimizer, epoch)
    test_loss = test(model, device, test_loader)
    scheduler.step(test_loss)

Epoch 1: 


Train set: Average loss=0.3112333118915558 batch_id=937: 100%|██████████| 938/938 [00:25<00:00, 36.14it/s]



Test set: Average loss: 0.0700, Accuracy: 9800/10000 (98.00%)

Epoch 2: 


Train set: Average loss=0.09517934918403625 batch_id=937: 100%|██████████| 938/938 [00:20<00:00, 45.48it/s]



Test set: Average loss: 0.0476, Accuracy: 9856/10000 (98.56%)

Epoch 3: 


Train set: Average loss=0.0555470809340477 batch_id=937: 100%|██████████| 938/938 [00:20<00:00, 44.77it/s]



Test set: Average loss: 0.0340, Accuracy: 9896/10000 (98.96%)

Epoch 4: 


Train set: Average loss=0.05665958672761917 batch_id=937: 100%|██████████| 938/938 [00:21<00:00, 42.86it/s]



Test set: Average loss: 0.0375, Accuracy: 9893/10000 (98.93%)

Epoch 5: 


Train set: Average loss=0.005381383001804352 batch_id=937: 100%|██████████| 938/938 [00:22<00:00, 41.28it/s]



Test set: Average loss: 0.0240, Accuracy: 9930/10000 (99.30%)

Epoch 6: 


Train set: Average loss=0.08005742728710175 batch_id=937: 100%|██████████| 938/938 [00:22<00:00, 42.51it/s]



Test set: Average loss: 0.0254, Accuracy: 9924/10000 (99.24%)

Epoch 7: 


Train set: Average loss=0.003386575961485505 batch_id=937: 100%|██████████| 938/938 [00:22<00:00, 42.51it/s]



Test set: Average loss: 0.0327, Accuracy: 9896/10000 (98.96%)

Epoch 00007: reducing learning rate of group 0 to 1.0000e-03.
Epoch 8: 


Train set: Average loss=0.0019814581610262394 batch_id=937: 100%|██████████| 938/938 [00:21<00:00, 43.88it/s]



Test set: Average loss: 0.0191, Accuracy: 9944/10000 (99.44%)

Epoch 9: 


Train set: Average loss=0.06423667818307877 batch_id=937: 100%|██████████| 938/938 [00:20<00:00, 44.69it/s]



Test set: Average loss: 0.0206, Accuracy: 9934/10000 (99.34%)

Epoch 10: 


Train set: Average loss=0.07756365835666656 batch_id=937: 100%|██████████| 938/938 [00:21<00:00, 43.56it/s]



Test set: Average loss: 0.0189, Accuracy: 9945/10000 (99.45%)

Epoch 11: 


Train set: Average loss=0.011073358356952667 batch_id=937: 100%|██████████| 938/938 [00:22<00:00, 42.38it/s]



Test set: Average loss: 0.0194, Accuracy: 9939/10000 (99.39%)

Epoch 12: 


Train set: Average loss=0.08013952523469925 batch_id=937: 100%|██████████| 938/938 [00:22<00:00, 41.73it/s]



Test set: Average loss: 0.0202, Accuracy: 9936/10000 (99.36%)

Epoch 00012: reducing learning rate of group 0 to 1.0000e-04.
Epoch 13: 


Train set: Average loss=0.005549655761569738 batch_id=937: 100%|██████████| 938/938 [00:22<00:00, 42.49it/s]



Test set: Average loss: 0.0194, Accuracy: 9942/10000 (99.42%)

Epoch 14: 


Train set: Average loss=0.005162339191883802 batch_id=937: 100%|██████████| 938/938 [00:22<00:00, 42.23it/s]



Test set: Average loss: 0.0190, Accuracy: 9944/10000 (99.44%)

Epoch 00014: reducing learning rate of group 0 to 1.0000e-05.
Epoch 15: 


Train set: Average loss=0.0011867674766108394 batch_id=937: 100%|██████████| 938/938 [00:21<00:00, 43.07it/s]



Test set: Average loss: 0.0193, Accuracy: 9938/10000 (99.38%)

Epoch 16: 


Train set: Average loss=0.08149804919958115 batch_id=937: 100%|██████████| 938/938 [00:21<00:00, 44.41it/s]



Test set: Average loss: 0.0193, Accuracy: 9943/10000 (99.43%)

Epoch 00016: reducing learning rate of group 0 to 1.0000e-06.
Epoch 17: 


Train set: Average loss=0.0007700475980527699 batch_id=937: 100%|██████████| 938/938 [00:21<00:00, 44.63it/s]



Test set: Average loss: 0.0180, Accuracy: 9948/10000 (99.48%)

Epoch 18: 


Train set: Average loss=0.01887049525976181 batch_id=937: 100%|██████████| 938/938 [00:22<00:00, 42.54it/s]



Test set: Average loss: 0.0178, Accuracy: 9944/10000 (99.44%)

Epoch 19: 


Train set: Average loss=0.006549663841724396 batch_id=937: 100%|██████████| 938/938 [00:22<00:00, 42.20it/s]



Test set: Average loss: 0.0194, Accuracy: 9943/10000 (99.43%)

Epoch 20: 


Train set: Average loss=0.02715657837688923 batch_id=937: 100%|██████████| 938/938 [00:22<00:00, 42.28it/s]



Test set: Average loss: 0.0177, Accuracy: 9948/10000 (99.48%)

