In [1]:
import numpy as np
import torch

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import matplotlib.pyplot as plt
import torchvision
import torchvision.transforms as transforms

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [2]:
batch_size = 128

In [3]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Files already downloaded and verified
Files already downloaded and verified


In [27]:
class ResLayer(nn.Module):
    def __init__(self, in_channels, out_channels, kernel=3):
        super().__init__()
        
        stride = out_channels // in_channels
        self.non_linear_pipe = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel, stride=stride, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(),
            nn.Conv2d(out_channels, out_channels, kernel, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
        )
        
        if stride == 1:
            self.shortcut_pipe = nn.Sequential()
        else:
            self.shortcut_pipe = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride),
                nn.BatchNorm2d(out_channels),
            )          
    
    def forward(self, x):
        Fx = self.non_linear_pipe(x)
        y = Fx + self.shortcut_pipe(x)
        Hx = F.relu(y)
        return Hx

class ResStack(nn.Module):
    def __init__(self, n_layers, n_filters, in_channels):
        super().__init__()
        
        layers = []
        in_channel_list = [in_channels] + (n_layers-1)*[n_filters]
        for id_layer in range(0, n_layers):
            layers.append(ResLayer(in_channel_list[id_layer], n_filters))
        self.layers = nn.Sequential(*layers) 
    
    def forward(self, x):
        return self.layers(x)
    

class ResNet18(nn.Module):
    def __init__(self, n_filters_0=16, input_size=32, n_classes=10):
        super().__init__()
        
        self.initial_pipe = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=n_filters_0, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(n_filters_0),
        )
        
        stacks = []
        for id_stack in range(0,3):
            n_layers = 2
            in_channels = n_filters_0*(2**max(0,id_stack-1))
            n_filters = n_filters_0*(2**id_stack)
            stacks.append(ResStack(n_layers, n_filters, in_channels))
        self.stack = nn.Sequential(*stacks)
        
        self.avg_pool_layer = nn.AvgPool2d(input_size//(2**2))
        self.linear_layers = nn.Linear(n_filters_0*(2**2), n_classes)
    
    def forward(self, x):
        y = self.initial_pipe(x)
        y = self.stack(y)
        y = self.avg_pool_layer(y)
        y = self.linear_layers(y.view(y.size(0),-1))
        return y

In [36]:
resnet = ResNet18().to(device)

In [37]:
res_optimizer = optim.SGD(resnet.parameters(), lr=0.0001, momentum=0.9)

In [38]:
for epoch in range(164):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data[0].to(device), data[1].to(device)

        # zero the parameter gradients
        res_optimizer.zero_grad()

        # forward + backward + optimize
        outputs = resnet(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        res_optimizer.step()

        # print statistics
        running_loss += (loss.item() - running_loss)/(i+1)
        print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss), end="\r")
            
    print('')

print('Finished Training')

[1,   391] loss: 2.253
[2,   391] loss: 2.127
[3,   391] loss: 2.026
[4,   391] loss: 1.950
[5,   391] loss: 1.891
[6,   391] loss: 1.840
[7,   391] loss: 1.794
[8,   391] loss: 1.753
[9,   391] loss: 1.717
[10,   391] loss: 1.687
[11,   391] loss: 1.659
[12,   391] loss: 1.634
[13,   391] loss: 1.614
[14,   391] loss: 1.591
[15,   391] loss: 1.571
[16,   391] loss: 1.552
[17,   391] loss: 1.534
[18,   391] loss: 1.514
[19,   391] loss: 1.498
[20,   391] loss: 1.480
[21,   391] loss: 1.465
[22,   391] loss: 1.448
[23,   391] loss: 1.431
[24,   391] loss: 1.415
[25,   391] loss: 1.400
[26,   391] loss: 1.385
[27,   391] loss: 1.371
[28,   391] loss: 1.357
[29,   391] loss: 1.342
[30,   391] loss: 1.326
[31,   391] loss: 1.311
[32,   391] loss: 1.297
[33,   391] loss: 1.285
[34,   391] loss: 1.270
[35,   391] loss: 1.257
[36,   391] loss: 1.243
[37,   391] loss: 1.228
[38,   391] loss: 1.216
[39,   391] loss: 1.203
[40,   391] loss: 1.191
[41,   391] loss: 1.177
[42,   391] loss: 1.166
[

In [None]:
PATH_shallowRN = './cifar_resnet_shallow.pth'
torch.save(resnet.state_dict(), PATH_shallowRN)

In [None]:
correct_RN = 0
total_RN = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = resnet(images)
        _, predicted = torch.max(outputs.data, 1)
        total_RN += labels.size(0)
        correct_RN += (predicted == labels).sum().item()

print('Accuracy of ResNet18 on the 10000 test images: %d %%' % (
    100 * correct_RN / total_RN))