In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils import data as D
from torch.utils.data.sampler import SubsetRandomSampler

import torchsummary
import torchvision
import torchvision.transforms as transforms
from torchvision.utils import save_image

import numpy as np
import os
import glob
import PIL
from PIL import Image
import random

In [2]:
print(torch.__version__)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

1.7.0+cu101
cuda:0


In [32]:
# Hyperparameters
random_seed = 10
batch_size = 64
validation_ratio = 0.1
learning_rate = 0.1 # learning rate decay
num_epoch = 30 # originally, 300

In [4]:
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465),(0.2470, 0.2435, 0.2616))])
                           
transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465),(0.2470, 0.2435, 0.2616))])                    

train_set = torchvision.datasets.CIFAR10(
    root="./data", train=True, download=True, transform=transform_train)
valid_set = torchvision.datasets.CIFAR10(
    root="./data", train=True, download=True, transform=transform_test)
test_set = torchvision.datasets.CIFAR10(
    root="./data", train=False, download=True, transform=transform_test)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified
Files already downloaded and verified


In [6]:
num_train = len(train_set)
indices = list(range(num_train))
split = int(np.floor(validation_ratio * num_train))

np.random.seed(random_seed)
np.random.shuffle(indices)

train_idx, valid_idx = indices[split:], indices[:split]
train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)

train_loader = torch.utils.data.DataLoader(
    train_set, batch_size=batch_size, sampler=train_sampler)
valid_loader = torch.utils.data.DataLoader(
    valid_set, batch_size=batch_size, sampler=valid_sampler)
test_loader = torch.utils.data.DataLoader(
    test_set, batch_size=batch_size)

classes = ("plane", "car", "bird", "cat", "deer", "dog", "frog", "horse", "ship", "truck")

Basic Modules

In [7]:
# composite function
class bn_relu_conv(nn.Module):
  def __init__(self, nin, nout, kernel_size, stride, padding, bias=False):
    super(bn_relu_conv, self).__init__()
    self.batch_norm = nn.BatchNorm2d(nin)
    self.relu = nn.ReLU(inplace = True)
    self.conv = nn.Conv2d(nin, nout, kernel_size=kernel_size, stride=stride, padding=padding, bias=bias)
  
  def forward(self, x):
    out = self.batch_norm(x)
    out = self.relu(out)
    out = self.conv(out)
    return out

In [8]:
# bottleneck
class bottleneck_layer(nn.Sequential):
  def __init__(self, nin, growth_rate, drop_rate=0.2):
    super(bottleneck_layer, self).__init__()
    self.add_module("conv_1x1", bn_relu_conv(nin=nin, nout=growth_rate*4, kernel_size=1, stride=1, padding=0, bias=False))
    self.add_module("conv 3x3", bn_relu_conv(nin=growth_rate*4, nout=growth_rate, kernel_size=3, stride=1, padding=1, bias=False))
    self.drop_rate = drop_rate
  
  def forward(self, x):
    bottleneck_output = super(bottleneck_layer, self).forward(x)
    # inspired by stochastic depth of ResNet
    if self.drop_rate > 0:
      bottleneck_output = F.dropout(bottleneck_output, p=self.drop_rate, training=self.training)
    
    # concatenation instead of addition of ResNet
    bottleneck_output = torch.cat((x, bottleneck_output), 1)
    return bottleneck_output

In [9]:
# transition
class transition_layer(nn.Sequential):
  def __init__(self, nin, theta=0.5):
    super(transition_layer, self).__init__()
    self.add_module("conv 1x1", bn_relu_conv(nin=nin, nout=int(nin*theta), kernel_size=1, stride=1, padding=0, bias=False))
    self.add_module("avg_pool 2x2", nn.AvgPool2d(kernel_size=2, stride=2, padding=0))

In [14]:
# dense block
class DenseBlock(nn.Sequential):
  def __init__(self, nin, num_bottleneck_layers, growth_rate, drop_rate=0.2):
    super(DenseBlock, self).__init__()

    for i in range(num_bottleneck_layers):
      nin_bottleneck_layer = nin + growth_rate * i
      self.add_module("bottleneck_layer_%d" %i, bottleneck_layer(nin=nin_bottleneck_layer, growth_rate=growth_rate, drop_rate=drop_rate))

DenseNet

In [24]:
class DenseNet(nn.Module):
  def __init__(self, growth_rate=12, num_layers=100, theta=0.5, drop_rate=0.2, num_classes=10):
    super(DenseNet, self).__init__()
    assert (num_layers - 4) % 6 == 0 

    num_bottleneck_layers = (num_layers - 4) // 6
    
    self.dense_init = nn.Conv2d(3, growth_rate*2, kernel_size=3, stride=1, padding=1, bias=True)
    
    self.dense_block_1 = DenseBlock(nin=growth_rate*2, num_bottleneck_layers=num_bottleneck_layers, growth_rate=growth_rate, drop_rate=drop_rate)
    nin_transition_layer_1 = (growth_rate) * 2 + (growth_rate * num_bottleneck_layers)
    self.transition_layer_1 = transition_layer(nin=nin_transition_layer_1, theta=theta)

    self.dense_block_2 = DenseBlock(nin=int(nin_transition_layer_1*theta), num_bottleneck_layers=num_bottleneck_layers, growth_rate=growth_rate, drop_rate=drop_rate)
    nin_transition_layer_2 = int(nin_transition_layer_1*theta) + (growth_rate * num_bottleneck_layers)
    self.transition_layer_2 = transition_layer(nin=nin_transition_layer_2, theta=theta)

    self.dense_block_3 = DenseBlock(nin=int(nin_transition_layer_2*theta), num_bottleneck_layers=num_bottleneck_layers, growth_rate=growth_rate, drop_rate=drop_rate)
    nin_fc_layer = int(nin_transition_layer_2 * theta) + (growth_rate * num_bottleneck_layers)
    self.fc_layer = nn.Linear(nin_fc_layer, num_classes)
  
  def forward(self, x):
    dense_init_output = self.dense_init(x)

    dense_block_1_output = self.dense_block_1(dense_init_output)
    transition_layer_1_output = self.transition_layer_1(dense_block_1_output)

    dense_block_2_output = self.dense_block_2(transition_layer_1_output)
    transition_layer_2_output = self.transition_layer_2(dense_block_2_output)

    dense_block_3_output = self.dense_block_3(transition_layer_2_output)
    global_avg_pool_output = F.adaptive_avg_pool2d(dense_block_3_output, (1,1))
    global_avg_pool_output_flat = global_avg_pool_output.view(global_avg_pool_output.size(0), -1)

    output = self.fc_layer(global_avg_pool_output_flat)
    return output

In [25]:
def DenseNetBC_100_12():
  return DenseNet(growth_rate=12, num_layers=100)
def DenseNetBC_250_24():
  return DenseNet(growth_rate=24, num_layers=250)
def DenseNetBC_190_40():
  return DenseNet(growth_rate=40, num_layers=190)

In [26]:
net = DenseNetBC_100_12()
net.to(device)

DenseNet(
  (dense_init): Conv2d(3, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (dense_block_1): DenseBlock(
    (bottleneck_layer_0): bottleneck_layer(
      (conv_1x1): bn_relu_conv(
        (batch_norm): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv): Conv2d(24, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
      )
      (conv 3x3): bn_relu_conv(
        (batch_norm): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
    )
    (bottleneck_layer_1): bottleneck_layer(
      (conv_1x1): bn_relu_conv(
        (batch_norm): BatchNorm2d(36, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv): Conv2d(36, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
      )
   

In [27]:
torchsummary.summary(net, (3,32,32))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 24, 32, 32]             672
       BatchNorm2d-2           [-1, 24, 32, 32]              48
              ReLU-3           [-1, 24, 32, 32]               0
            Conv2d-4           [-1, 48, 32, 32]           1,152
      bn_relu_conv-5           [-1, 48, 32, 32]               0
       BatchNorm2d-6           [-1, 48, 32, 32]              96
              ReLU-7           [-1, 48, 32, 32]               0
            Conv2d-8           [-1, 12, 32, 32]           5,184
      bn_relu_conv-9           [-1, 12, 32, 32]               0
      BatchNorm2d-10           [-1, 36, 32, 32]              72
             ReLU-11           [-1, 36, 32, 32]               0
           Conv2d-12           [-1, 48, 32, 32]           1,728
     bn_relu_conv-13           [-1, 48, 32, 32]               0
      BatchNorm2d-14           [-1, 48,

Training

In [33]:
criterion = nn.CrossEntropyLoss()
# stochastic gradient descent
optimizer = optim.SGD(net.parameters(), lr=learning_rate, momentum=0.9) 
# learning rate decay
lr_scheduler = optim.lr_scheduler.MultiStepLR(optimizer=optimizer, milestones=[int(num_epoch*0.5), int(num_epoch*0.75)])

for epoch in range(num_epoch):
  lr_scheduler.step()

  running_loss = 0.0
  for i, data in enumerate(train_loader, 0):
    inputs, labels = data
    inputs, labels = inputs.to(device), labels.to(device)

    optimizer.zero_grad()
    
    outputs = net(inputs)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()

    running_loss += loss.item()

    if i % show_period == show_period - 1:
      print("(Epoch, batch): " + str((epoch+1, (i+1) * batch_size)) + " Loss: " + str(running_loss / show_period))
      running_loss = 0.0
    
  
  # validation 
  correct = 0
  total = 0
  for i, data in enumerate(valid_loader, 0):
    inputs, labels = data
    inputs, labels = inputs.to(device), labels.to(device)
    outputs  = net(inputs)

    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()
  
  print("Epoch: " + str(epoch+1) +  " Accuracy: " + str(100 * correct / total))

print("Finished Training")



(Epoch, batch): (1, 6400) Loss: 0.6431474420428276
(Epoch, batch): (1, 12800) Loss: 0.6405600637197495
(Epoch, batch): (1, 19200) Loss: 0.647274187207222
(Epoch, batch): (1, 25600) Loss: 0.6351401954889297
(Epoch, batch): (1, 32000) Loss: 0.6190198501944542
(Epoch, batch): (1, 38400) Loss: 0.6107663002610206
(Epoch, batch): (1, 44800) Loss: 0.5876800894737244
Epoch: 1 Accuracy: 75.58
(Epoch, batch): (2, 6400) Loss: 0.6120308634638786
(Epoch, batch): (2, 12800) Loss: 0.5869823995232583
(Epoch, batch): (2, 19200) Loss: 0.5908719611167907
(Epoch, batch): (2, 25600) Loss: 0.5688532775640488
(Epoch, batch): (2, 32000) Loss: 0.5668252521753311
(Epoch, batch): (2, 38400) Loss: 0.5603009551763535
(Epoch, batch): (2, 44800) Loss: 0.5520080584287643
Epoch: 2 Accuracy: 75.78
(Epoch, batch): (3, 6400) Loss: 0.5714077201485633
(Epoch, batch): (3, 12800) Loss: 0.5297363828122615
(Epoch, batch): (3, 19200) Loss: 0.5473501151800155
(Epoch, batch): (3, 25600) Loss: 0.5537824577093124
(Epoch, batch): (3

Test

In [36]:
class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))

correct = 0
total = 0

with torch.no_grad():
  for data in test_loader:
    images, labels = data
    images, labels = images.to(device), labels.to(device)
    outputs = net(images)
    _, predicted = torch.max(outputs, 1)
    c = (predicted == labels).squeeze()

    for i in range(labels.shape[0]):
      label = labels[i]
      class_correct[label] += c[i].item()
      class_total[label] += 1

      total += labels.size(0)
      correct += (predicted == labels).sum().item()
  
print("Accuracy of the network on test images: %d" % (100 * correct / total))

Accuracy of the network on test images: 86
