In [1]:
%matplotlib inline
from matplotlib import pyplot as plt
import numpy as np
import collections
import datetime
import time

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

torch.set_printoptions(edgeitems=2)
torch.manual_seed(123)

device = (torch.device('cuda') if torch.cuda.is_available()
          else torch.device('cpu'))
print(f"Training on device {device}.")

Training on device cuda.


In [2]:
#Setting up the data needed
class_names = ['airplane','automobile','bird','cat','deer',
'dog','frog','horse','ship','truck']
#getting data
data_path="C:/Users/rosam/OneDrive/Desktop/cifar-10-batches-py"
tensor_cifar10 = datasets.CIFAR10(data_path, train=True, download=True, 
                                  transform=transforms.Compose([
                                                                transforms.ToTensor(),
                                                                transforms.Normalize((0.4915, 0.4823, 0.4468), (0.2470, 0.2435, 0.2616))]))
imgs = torch.stack([img_t for img_t, _ in tensor_cifar10], dim=3)
#imgs.view(3, -1).mean(dim=1) => tensor([0.4914, 0.4822, 0.4465])
#imgs.view(3, -1).std(dim=1) => tensor([0.2470, 0.2435, 0.2616])

tensor_cifar10Val = datasets.CIFAR10(data_path, train=False, download=False, 
                                     transform=transforms.Compose([
                                                                   transforms.ToTensor(),
                                                                   transforms.Normalize((0.4942, 0.4851, 0.4504), (0.2467, 0.2429, 0.2616))]))
imgsV = torch.stack([img_t for img_t, _ in tensor_cifar10Val], dim=3)
#imgsV.view(3, -1).mean(dim=1) => tensor([0.4942, 0.4851, 0.4504])
#imgsV.view(3, -1).std(dim=1) => tensor([0.2467, 0.2429, 0.2616])

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to C:/Users/rosam/OneDrive/Desktop/cifar-10-batches-py/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting C:/Users/rosam/OneDrive/Desktop/cifar-10-batches-py/cifar-10-python.tar.gz to C:/Users/rosam/OneDrive/Desktop/cifar-10-batches-py


In [3]:
#ResNet training for adding weight decay
#Use ResNet10 model
def training_loop_WD(n_epochs, optimizer, model, loss_fn,
                        train_loader):
    training_start_time = time.time()
    for epoch in range(1, n_epochs + 1):
        loss_train = 0.0
        for imgs, labels in train_loader:
            imgs = imgs.to(device=device)
            labels = labels.to(device=device)
            outputs = model(imgs)
            loss = loss_fn(outputs, labels)

            l2_lambda = 0.001
            l2_norm = sum(p.pow(2.0).sum()
                          for p in model.parameters())  # <1>
            loss = loss + l2_lambda * l2_norm

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            loss_train += loss.item()
        if epoch == 1 or epoch % 10 == 0:
            print('{} Epoch {}, Training loss {}'.format(
                datetime.datetime.now(), epoch,
                loss_train / len(train_loader)))
    print('Training finished, took {:.2f}s'.format(time.time() - training_start_time))

#Defining function to validate accuracy
def validate(model, train_loader, val_loader):
    accdict = {}
    for name, loader in [("train", train_loader), ("val", val_loader)]:
        correct = 0
        total = 0

        with torch.no_grad():
            for imgs, labels in loader:
                imgs = imgs.to(device=device)
                labels = labels.to(device=device)
                outputs = model(imgs)
                _, predicted = torch.max(outputs, dim=1) # <1>
                total += labels.shape[0]
                correct += int((predicted == labels).sum())

        print("Accuracy {}: {:.2f}".format(name , correct / total))
        accdict[name] = correct / total
    return accdict

In [4]:
#Setting up Dataloader
#used for training, shuffle and oraganize data in minibatches
train_loader = torch.utils.data.DataLoader(tensor_cifar10, batch_size=64,shuffle=True)
#used for accuracy measurement
val_loader = torch.utils.data.DataLoader(tensor_cifar10Val, batch_size=64, shuffle=False)
acc_train_loader = torch.utils.data.DataLoader(tensor_cifar10, batch_size=64, shuffle=False)

In [5]:
#ResNet block
class ResBlock(nn.Module):
    def __init__(self, n_chans):
        super(ResBlock, self).__init__()
        self.conv = nn.Conv2d(n_chans, n_chans, kernel_size=3, padding=1, bias=False)
        self.batch_norm = nn.BatchNorm2d(num_features=n_chans)
        torch.nn.init.kaiming_normal_(self.conv.weight, nonlinearity='relu')
        torch.nn.init.constant_(self.batch_norm.weight, 0.5)
        torch.nn.init.zeros_(self.batch_norm.bias)

    def forward(self, x):
        out = self.conv(x)
        out = self.batch_norm(out)
        out = torch.relu(out)
        return out + x

#ResNet model
class ResNet10(nn.Module):
    def __init__(self, n_chans1=32, n_blocks=10):
        super().__init__()
        self.n_chans1 = n_chans1
        self.conv1 = nn.Conv2d(3, n_chans1, kernel_size=3, padding=1)
        self.resblocks = nn.Sequential(*(n_blocks * [ResBlock(n_chans=n_chans1)]))
        self.fc1 = nn.Linear(8 * 8 * n_chans1, 32)
        self.fc2 = nn.Linear(32, 10)
        
    def forward(self, x):
        out = F.max_pool2d(torch.relu(self.conv1(x)), 2)
        out = self.resblocks(out)
        out = F.max_pool2d(out, 2)
        out = out.view(-1, 8 * 8 * self.n_chans1)
        out = torch.relu(self.fc1(out))
        out = self.fc2(out)
        return out

model =  ResNet10(n_chans1=32, n_blocks=10).to(device=device)
optimizer = optim.SGD(model.parameters(), lr=1e-3)
loss_fn = nn.CrossEntropyLoss()
#Size of model
numel_list = [p.numel() for p in model.parameters()]
sum(numel_list), numel_list

(76074, [864, 32, 9216, 32, 32, 65536, 32, 320, 10])

In [6]:
training_loop_WD(
    n_epochs = 300,
    optimizer = optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_loader,
)

2022-03-30 21:09:46.337679 Epoch 1, Training loss 1.8742518589624664
2022-03-30 21:16:17.023369 Epoch 10, Training loss 1.188954090828176
2022-03-30 21:23:28.312102 Epoch 20, Training loss 0.9812968999071194
2022-03-30 21:30:38.597176 Epoch 30, Training loss 0.8571398002869638
2022-03-30 21:37:49.445415 Epoch 40, Training loss 0.7673872204311668
2022-03-30 21:44:59.600929 Epoch 50, Training loss 0.6983939158489637
2022-03-30 21:52:08.851069 Epoch 60, Training loss 0.6387291414963315
2022-03-30 21:59:17.797558 Epoch 70, Training loss 0.5820658166923791
2022-03-30 22:06:28.041871 Epoch 80, Training loss 0.5360758844620127
2022-03-30 22:13:36.220965 Epoch 90, Training loss 0.4912638781625596
2022-03-30 22:20:44.719023 Epoch 100, Training loss 0.45131040267322375
2022-03-30 22:27:56.805827 Epoch 110, Training loss 0.4134503111167027
2022-03-30 22:35:09.825251 Epoch 120, Training loss 0.38932373325157044
2022-03-30 22:42:22.227369 Epoch 130, Training loss 0.356737741652657
2022-03-30 22:49:

In [7]:
validate(model,  acc_train_loader, val_loader)

Accuracy train: 0.98
Accuracy val: 0.64


{'train': 0.9793, 'val': 0.6395}