In [None]:
import torch 
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms

# 1. Hyper-parameters and Dataset

In [None]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Hyper-parameters
num_epochs = 80
batch_size = 100
learning_rate = 0.001

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## 1.1 Image preprocessing

**According to the paper:**  [Deep Residual Learning for Image Recognition](https://arxiv.org/pdf/1512.03385.pdf).
1. We follow the simple data augmentation for training: 4 pixels are padded on each side, and a 32×32 crop is randomly sampled from the padded image or its horizontal flip. 
2. For testing, we only evaluate the single view of the original 32×32 image.

In [None]:
data_dir = '/content/drive/My Drive/PyTorch/Github_Series/02-intermediate/'

# Image preprocessing modules
transform = transforms.Compose([
  transforms.Pad(padding=4),
  transforms.RandomHorizontalFlip(p=0.5),
  transforms.RandomCrop(size=32),
  transforms.ToTensor()])

# CIFAR-10 dataset
train_dataset = torchvision.datasets.CIFAR10(root=data_dir,
                                             train=True, 
                                             transform=transform,
                                             download=True)

test_dataset = torchvision.datasets.CIFAR10(root=data_dir,
                                            train=False, 
                                            transform=transforms.ToTensor())

# Data loader
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size, 
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size, 
                                          shuffle=False)

Files already downloaded and verified


# 2. Modeling and Training

1. The architecture is illustrated in the paper [Deep Residual Learning for Image Recognition](https://arxiv.org/pdf/1512.03385.pdf). 

In [None]:
# 3x3 convolution
def conv3x3(in_channels, out_channels, stride=1):
  # padding=1 for keeping the input and output of the same dimensions
  return nn.Conv2d(in_channels, out_channels, kernel_size=3, 
                   stride=stride, padding=1, bias=False)

# 1x1 convolution: used to match dimensions
def conv1x1(in_channels, out_channels, stride=1):
    return nn.Conv2d(in_channels, out_channels, kernel_size=1, 
                     stride=stride, bias=False)

# Residual block
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
      super().__init__()
      self.conv1 = conv3x3(in_channels, out_channels, stride)
      # We adopt batch normalization (BN) right after each convolution and before activation
      self.bn1 = nn.BatchNorm2d(out_channels)
      self.relu = nn.ReLU(inplace=True)
      self.conv2 = conv3x3(out_channels, out_channels)
      self.bn2 = nn.BatchNorm2d(out_channels)
      self.downsample = downsample
        
    def forward(self, x):
      residual = x
      out = self.conv1(x)
      out = self.bn1(out)
      out = self.relu(out)
      out = self.conv2(out)
      out = self.bn2(out)
      if self.downsample:
        residual = self.downsample(x)
      out += residual
      # We adopt the second nonlinearity after the addition, as shown in Figure 5 in the paper.
      out = self.relu(out)
      return out

# ResNet
class ResNet(nn.Module):
    def __init__(self, layers, num_classes=10):
      super().__init__()
      self.conv = conv3x3(3, 16)
      self.bn = nn.BatchNorm2d(16)
      self.relu = nn.ReLU(inplace=True)
      # Each layer consists of several Residual blocks
      self.layer1 = self.__make_layer(16, 16, layers[0])
      self.layer2 = self.__make_layer(16, 32, layers[1], 2)  # stride=2
      self.layer3 = self.__make_layer(32, 64, layers[2], 2)
      self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
      self.fc = nn.Linear(64, num_classes)

    def __make_layer(self, in_channels, out_channels, layer_num, stride=1):
      downsample = None
      if (stride != 1) or (in_channels != out_channels):
        downsample = nn.Sequential(
            conv1x1(in_channels, out_channels, stride),
            nn.BatchNorm2d(out_channels))
      
      layers = []
      layers.append(ResidualBlock(in_channels, out_channels, stride, downsample))
      for _ in range(1, layer_num):
        layers.append(ResidualBlock(out_channels, out_channels))
      return nn.Sequential(*layers)

    def forward(self, x):
      out = self.conv(x)
      out = self.bn(out)
      out = self.relu(out)
      out = self.layer1(out)
      out = self.layer2(out)
      out = self.layer3(out)
      out = self.avg_pool(out)
      out = torch.flatten(out, start_dim=1)
      out = self.fc(out)
      return out

In [None]:
# Initialize the deep residual model
model = ResNet([2, 2, 2]).to(device)

# loss and optimizer
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

def update_lr(optimizer, lr):
  for param_group in optimizer.param_groups:
    param_group['lr'] = lr

# Train the model
total_step = len(train_loader)
curr_lr = learning_rate
for epoch in range(num_epochs):
  for batch_id, (images, labels) in enumerate(train_loader):
    images = images.to(device)
    labels = labels.to(device)

    # Feedward
    output = model(images)
    loss = loss_fn(output, labels)

    # Backward propagation
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (batch_id+1) % 100 == 0:
      print('Epoch: [{}/{}], Step: [{}/{}], Loss: {:.4f}'
            .format(epoch+1, num_epochs, batch_id+1, total_step, loss.item()))
  
  # decay learning rate
  if (epoch+1) % 20 == 0:
    curr_lr /= 3
    update_lr(optimizer, curr_lr)

# 3. Test the model

In [None]:
# Test the model
model.eval()
with torch.no_grad():
  total = 0
  correct = 0
  for images, labels in test_loader:
    # Keep model and data on the same device
    images = images.to(device)
    labels = labels.to(device)

    # Predict
    output = model(images)
    _, pred = torch.max(output, dim=1)
    
    total += labels.size(0)
    correct += (pred == labels).sum().item()

  print('Accuracy of the model on the test images: {} %'.format(100 * correct / total))

Accuracy of the model on the test images: 88.6 %
