In [1]:

%matplotlib inline
from matplotlib import pyplot as plt
import numpy as np
import collections

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [2]:
class_names = ['airplane','automobile','bird','cat','deer',
               'dog','frog','horse','ship','truck']

In [3]:
from torchvision import datasets, transforms
data_path = '../data-unversioned/p1ch6/'
cifar10 = datasets.CIFAR10(
    data_path, train=True, download=True,
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4915, 0.4823, 0.4468),
                             (0.2470, 0.2435, 0.2616))
    ]))

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ../data-unversioned/p1ch6/cifar-10-python.tar.gz


HBox(children=(FloatProgress(value=0.0, max=170498071.0), HTML(value='')))


Extracting ../data-unversioned/p1ch6/cifar-10-python.tar.gz to ../data-unversioned/p1ch6/


In [4]:
cifar10_val = datasets.CIFAR10(
    data_path, train=False, download=True,
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4915, 0.4823, 0.4468),
                             (0.2470, 0.2435, 0.2616))
    ]))

Files already downloaded and verified


In [5]:
class resblock(nn.Module):
    def __init__(self, n):
        super(resblock, self).__init__()
        self.conv = nn.Conv2d(n, n, kernel_size=3,
                              padding=1, bias=False)  
        self.batch_norm = nn.BatchNorm2d(num_features=n)
        '''torch.nn.init.kaiming_normal_(self.conv.weight,
                                      nonlinearity='relu')  # <2>
        torch.nn.init.constant_(self.batch_norm.weight, 0.5)
        torch.nn.init.zeros_(self.batch_norm.bias)'''

    def forward(self, x):
        out = self.conv(x)
        out = self.batch_norm(out)
        out = torch.relu(out)
        return out + x

In [6]:
class deep(nn.Module):
    def __init__(self, n1=32, n_blocks=10):
        super().__init__()
        self.n1 = n1
        self.conv1 = nn.Conv2d(3, n1, kernel_size=3, padding=1)
        self.resblocks = nn.Sequential(
            *(n_blocks * [resblock(n=n1)]))
        self.fc1 = nn.Linear(8 * 8 * n1, 32)
        self.fc2 = nn.Linear(32, 10)
        
    def forward(self, x):
        out = F.max_pool2d(torch.relu(self.conv1(x)), 2)
        out = self.resblocks(out)
        out = F.max_pool2d(out, 2)
        out = out.view(-1, 8 * 8 * self.n1)
        out = torch.relu(self.fc1(out))
        out = self.fc2(out)
        return out

In [7]:

device = (torch.device('cuda') if torch.cuda.is_available()
          else torch.device('cpu'))
print(f"Training on device {device}.")

Training on device cuda.


In [8]:
import datetime

def training_loop(n_epochs, optimizer, model, loss_fn, train_loader):
    for epoch in range(1, n_epochs + 1):
        loss_train = 0.0
        for imgs, labels in train_loader:
            imgs = imgs.to(device=device)  # <1>
            labels = labels.to(device=device)
            outputs = model(imgs)
            loss = loss_fn(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            loss_train += loss.item()

        if epoch == 1 or epoch % 10 == 0:
            print('{} Epoch {}, Training loss {}'.format(
                datetime.datetime.now(), epoch,
                loss_train / len(train_loader)))

In [9]:
train_loader = torch.utils.data.DataLoader(cifar10, batch_size=64,
                                           shuffle=True)

model =  deep().to(device=device)# <1>
optimizer = optim.SGD(model.parameters(), lr=1e-2)
loss_fn = nn.CrossEntropyLoss()

In [10]:
training_loop(  
    n_epochs = 100,
    optimizer = optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_loader,
)

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


2021-07-28 12:46:48.611952 Epoch 1, Training loss 2.3159351675101862
2021-07-28 12:48:51.426712 Epoch 10, Training loss 1.2141340057868177
2021-07-28 12:51:07.649465 Epoch 20, Training loss 0.8525501969449051
2021-07-28 12:53:23.922241 Epoch 30, Training loss 0.7394904084598927
2021-07-28 12:55:43.709836 Epoch 40, Training loss 0.6055725093578439
2021-07-28 12:57:58.111331 Epoch 50, Training loss 0.5347025550882835
2021-07-28 13:00:12.421280 Epoch 60, Training loss 0.49188517281771316
2021-07-28 13:02:29.205341 Epoch 70, Training loss 0.4443377943333153
2021-07-28 13:04:44.428484 Epoch 80, Training loss 0.4130946373009621
2021-07-28 13:07:01.022181 Epoch 90, Training loss 0.3808805112872282
2021-07-28 13:09:15.847943 Epoch 100, Training loss 0.3639952096888019


In [12]:

train_loader = torch.utils.data.DataLoader(cifar10, batch_size=64,
                                           shuffle=False)
val_loader = torch.utils.data.DataLoader(cifar10_val, batch_size=64,
                                         shuffle=False)

def validate(model, train_loader, val_loader):
    for name, loader in [("train", train_loader), ("val", val_loader)]:
        correct = 0
        total = 0

        with torch.no_grad():  # <1>
            for imgs, labels in loader:
                imgs = imgs.to(device=device)
                labels = labels.to(device=device)
                outputs = model(imgs)
                _, predicted = torch.max(outputs, dim=1) # <2>
                total += labels.shape[0]  # <3>
                correct += int((predicted == labels).sum())  # <4>

        print("Accuracy {}: {:.2f}".format(name , correct / total))

validate(model, train_loader, val_loader)

Accuracy train: 0.87
Accuracy val: 0.69


In [13]:
numel_list = [p.numel() for p in model.parameters()]
sum(numel_list), numel_list

(76074, [864, 32, 9216, 32, 32, 65536, 32, 320, 10])