In [1]:
!pip install torch torchvision



In [2]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import random_split
import torchvision
from torchvision import datasets, transforms
import torchvision.transforms as transforms
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data import DataLoader
import torchvision.models as models
# from ray import tune
# from ray.tune import CLIReporter
# from ray.tune.schedulers import ASHAScheduler
import matplotlib.pyplot as plt

In [25]:
def data_loader(batch_size=64):
  normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                      std=[0.229, 0.224, 0.225])

  cifar10_training_data = torchvision.datasets.CIFAR10("/content", 
                                              train=True,
                                              transform = transforms.Compose([
                                                                              transforms.RandomHorizontalFlip(),
                                                                              transforms.RandomCrop(32, 4), # size 32x32, padding 4
                                                                              transforms.ToTensor(),
                                                                              normalize,]), 
                                              download=True)
  
  cifar10_val_data = torchvision.datasets.CIFAR10("/content", 
                                              train=True,
                                              transform = transforms.Compose([transforms.ToTensor(),
                                                                              normalize,]), 
                                              download=True)

  cifar10_testing_data = torchvision.datasets.CIFAR10("/content", 
                                              train=False,
                                              transform = transforms.Compose([transforms.ToTensor(),
                                                                              normalize,]), 
                                              download=True)
  num_train = len(cifar10_training_data)
  indices = list(range(num_train))
  split = 5000 #45k/5k train/val split
  train_idx, valid_idx = indices[split:], indices[:split]
  train_sampler = SubsetRandomSampler(train_idx)
  valid_sampler = SubsetRandomSampler(valid_idx)
  cifar10_training_data_loader = torch.utils.data.DataLoader(cifar10_training_data, batch_size, sampler=train_sampler, shuffle=False)
  cifar10_val_data_loader = torch.utils.data.DataLoader(cifar10_val_data, batch_size, sampler=valid_sampler, shuffle=False)
  cifar10_testing_data_loader = torch.utils.data.DataLoader(cifar10_testing_data, batch_size, shuffle=True)
  return cifar10_training_data_loader, cifar10_val_data_loader, cifar10_testing_data_loader

In [9]:
# https://github.com/kuangliu/pytorch-cifar/blob/master/models/densenet.py
import math

class Bottleneck(nn.Module):
    def __init__(self, in_planes, growth_rate):
        super(Bottleneck, self).__init__()
        self.bn1 = nn.BatchNorm2d(in_planes)
        self.conv1 = nn.Conv2d(in_planes, 4*growth_rate, kernel_size=1, bias=False)
        self.bn2 = nn.BatchNorm2d(4*growth_rate)
        self.conv2 = nn.Conv2d(4*growth_rate, growth_rate, kernel_size=3, padding=1, bias=False)

    def forward(self, x):
        out = self.conv1(F.relu(self.bn1(x)))
        out = self.conv2(F.relu(self.bn2(out)))
        out = torch.cat([out,x], 1)
        return out


class Transition(nn.Module):
    def __init__(self, in_planes, out_planes):
        super(Transition, self).__init__()
        self.bn = nn.BatchNorm2d(in_planes)
        self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=1, bias=False)

    def forward(self, x):
        out = self.conv(F.relu(self.bn(x)))
        out = F.avg_pool2d(out, 2)
        return out


class DenseNet(nn.Module):
    def __init__(self, block, nblocks, growth_rate=12, reduction=0.5, num_classes=10):
        super(DenseNet, self).__init__()
        self.growth_rate = growth_rate

        num_planes = 2*growth_rate
        self.conv1 = nn.Conv2d(1, num_planes, kernel_size=3, padding=1, bias=False)

        self.dense1 = self._make_dense_layers(block, num_planes, nblocks[0])
        num_planes += nblocks[0]*growth_rate
        out_planes = int(math.floor(num_planes*reduction))
        self.trans1 = Transition(num_planes, out_planes)
        num_planes = out_planes

        self.dense2 = self._make_dense_layers(block, num_planes, nblocks[1])
        num_planes += nblocks[1]*growth_rate
        out_planes = int(math.floor(num_planes*reduction))
        self.trans2 = Transition(num_planes, out_planes)
        num_planes = out_planes

        self.dense3 = self._make_dense_layers(block, num_planes, nblocks[2])
        num_planes += nblocks[2]*growth_rate
        out_planes = int(math.floor(num_planes*reduction))
        self.trans3 = Transition(num_planes, out_planes)
        num_planes = out_planes

        self.dense4 = self._make_dense_layers(block, num_planes, nblocks[3])
        num_planes += nblocks[3]*growth_rate

        self.bn = nn.BatchNorm2d(num_planes)
        self.linear = nn.Linear(num_planes, num_classes)

    def _make_dense_layers(self, block, in_planes, nblock):
        layers = []
        for i in range(nblock):
            layers.append(block(in_planes, self.growth_rate))
            in_planes += self.growth_rate
        return nn.Sequential(*layers)

    def forward(self, x):
        out = self.conv1(x)
        out = self.trans1(self.dense1(out))
        out = self.trans2(self.dense2(out))
        out = self.trans3(self.dense3(out))
        out = self.dense4(out)
        out = F.adaptive_avg_pool2d(F.relu(self.bn(out)), (1,1))
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

def DenseNet121(blocks=[6,12,24,16], k=12):
    return DenseNet(Bottleneck, blocks, growth_rate=k)

In [4]:
def test(net, testloader, device, epoch):
    global best_acc
    net.eval()
    test_loss = 0
    correct = 0
    total = 0
    criterion = nn.CrossEntropyLoss()
    test_steps = 0
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(testloader):
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = net(inputs)
            loss = criterion(outputs, targets)

            test_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()
            test_steps += 1
    print("epoch {} test_loss {} test_steps {} test_acc {}".format(epoch, test_loss, test_steps, correct / total))

In [4]:
def train_dense(w_decay=0.0001, momentum_arg=0.9, batch_size=64):
  net = DenseNet121()
  
  trainloader, valloader, testloader = data_loader(batch_size)
  classes = ('plane', 'car', 'bird', 'cat', 'deer',
           'dog', 'frog', 'horse', 'ship', 'truck')
  
  device = "cpu"
  if torch.cuda.is_available():
      device = "cuda:0"
      if torch.cuda.device_count() > 1:
          net = nn.DataParallel(net)
  net.to(device)

  def _lr_lambda(current_step):
      """
      _lr_lambda returns a multiplicative factor given an interger parameter epochs.
      """
      if current_step < 25000:
          _lr = 1
      elif current_step < 37500:
          _lr = .1
      else:
          _lr = .01
      return _lr

  criterion = nn.CrossEntropyLoss()
  optimizer = optim.SGD(net.parameters(), lr=0.1, momentum=momentum_arg, weight_decay=w_decay)
  scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, _lr_lambda, last_epoch=-1, verbose=True)
  
  train_accs = []
  epochs = []

  for epoch in range(300):  # loop over the dataset multiple times
        running_loss = 0.0
        epoch_steps = 0
        train_loss = 0
        correct = 0
        total = 0
        for i, data in enumerate(trainloader, 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            train_loss += loss.item()
            epoch_steps += 1
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
            if i % 10 == 9:  # print every 10 mini-batches
                print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 10:.3f}')
                #print("{}".format(correct/total))
            running_loss = 0.0
        train_accs.append(correct/total)
           
        
        # Validation loss
        val_loss = 0.0
        val_steps = 0
        total = 0
        correct = 0
        for i, data in enumerate(valloader, 0):
            with torch.no_grad():
                inputs, labels = data
                inputs, labels = inputs.to(device), labels.to(device)

                outputs = net(inputs)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

                loss = criterion(outputs, labels)
                val_loss += loss.cpu().numpy()
                val_steps += 1
        print("{}".format(correct/total))
        val_acc.append(correct/total)
        epochs.append(epoch)


  #final test error rate
  test(net, testloader, device, epoch)
  print("Finished Training")
  return net


In [None]:
net, val_acc, epochs = train_dense()

Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.10.0


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
[59,    60] loss: 0.032
0.8447916666666667
[59,   100] loss: 0.041
0.845625
[59,   110] loss: 0.067
0.8460227272727273
[59,   120] loss: 0.064
0.84609375
[59,   130] loss: 0.051
0.8456730769230769
[59,   140] loss: 0.069
0.8470982142857143
[59,   150] loss: 0.037
0.8494791666666667
[59,   160] loss: 0.029
0.84853515625
[59,   170] loss: 0.072
0.8474264705882353
[59,   180] loss: 0.034
0.8472222222222222
[59,   190] loss: 0.059
0.8457236842105263
[59,   200] loss: 0.047
0.8465625
[59,   210] loss: 0.030
0.8476190476190476
[59,   220] loss: 0.044
0.8471590909090909
[59,   230] loss: 0.048
0.8466711956521739
[59,   240] loss: 0.040
0.8460286458333334
[59,   250] loss: 0.032
0.8454375
[59,   260] loss: 0.035
0.8466346153846154
[59,   270] loss: 0.050
0.8467592592592592
[59,   280] loss: 0.032
0.8470982142857143
[59,   290] loss: 0.029
0.8466056034482758
[59,   300] loss: 0.025
0.8466145833333333
[59,   310] loss: 0.032
0.8452