In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.backends.cudnn as cudnn
from torch.utils.data import random_split, DataLoader

import torchvision
import torchvision.transforms as transforms

In [2]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
device

device(type='cuda')

In [3]:
# Data
print('==> Preparing data..')
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

trainset = torchvision.datasets.CIFAR10(
    root='./data', train=True, download=True, transform=transform_train)

testset = torchvision.datasets.CIFAR10(
    root='./data', train=False, download=True, transform=transform_test)

val_ratio = 0.2
train_dataset, val_dataset = random_split(trainset, [int((1 - val_ratio) * len(trainset)),
                                          int(val_ratio * len(trainset))])
batch_size = 32
train_dl = DataLoader(train_dataset, batch_size, shuffle=True, pin_memory=True)
val_dl = DataLoader(val_dataset, batch_size, shuffle=True, pin_memory=True)
test_dl = DataLoader(testset, batch_size, shuffle=True, pin_memory=True)

classes = ('plane', 'car', 'bird', 'cat', 'deer',
           'dog', 'frog', 'horse', 'ship', 'truck')

==> Preparing data..
Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:03<00:00, 53116466.17it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [4]:
def get_default_device():
  return torch.device('cuda') if torch.cuda.is_available else torch.device('cpu')
    
def to_device(entity, device):
    if isinstance(entity, (list,tuple)):
        return [to_device(x, device) for x in entity]
    return entity.to(device, non_blocking=True)

class DeviceDataLoader():
    """Wrap a dataloader to move data to a device"""
    def __init__(self, dl, device):
        self.dl = dl
        self.device = device
        
    def __iter__(self):
        """Yield a batch of data after moving it to device"""
        for b in self.dl: 
            yield to_device(b, self.device)

    def __len__(self):
        """Number of batches"""
        return len(self.dl)

device = get_default_device()
train_dl = DeviceDataLoader(train_dl, device)
val_dl = DeviceDataLoader(val_dl, device)
test_dl = DeviceDataLoader(test_dl, device)

In [5]:
def conv_block(in_channels, out_channels, pool=False):
    layers = [nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1), 
              nn.BatchNorm2d(out_channels), 
              nn.ReLU(inplace=True)]
    if pool: layers.append(nn.MaxPool2d(2))
    return nn.Sequential(*layers)

In [6]:
class ResNet9(nn.Module):
    def __init__(self, in_channels, num_Classes):
      super(ResNet9, self).__init__()
      self.conv1 = conv_block(in_channels, 64)
      self.conv2 = conv_block(64, 128, pool=True)
      self.res1 = nn.Sequential(conv_block(128, 128), conv_block(128, 128))

      self.conv3 = conv_block(128, 256, pool=True)
      self.conv4 = conv_block(256, 512, pool=True)
      self.res2 = nn.Sequential(conv_block(512, 512), conv_block(512, 512))

      self.classifier = nn.Sequential(nn.MaxPool2d(4), 
                                        nn.Flatten(), 
                                        nn.Linear(512, num_Classes))
        
    def forward(self, x):
      out = self.conv1(x)
      out = self.conv2(out)
      out = self.res1(out) + out
      out = self.conv3(out)
      out = self.conv4(out)
      out = self.res2(out) + out
      out = self.classifier(out)
      return out

In [7]:
model = ResNet9(3, 10)

In [15]:
# Train the model
def accuracy(logits, labels):
    pred, predClassId = torch.max(logits, dim=1)
    return torch.tensor(torch.sum(predClassId == labels).item() / len(logits))

def evaluate(model, dl, loss_func):
  model.eval()
  batch_losses, batch_accs = [], []
  for images, labels in val_dl:
    with torch.no_grad():
      logits = model(images)
    batch_losses.append(loss_func(logits, labels))
    batch_accs.append(accuracy(logits, labels))
  epoch_avg_loss = torch.stack(batch_losses).mean()
  epoch_avg_acc = torch.stack(batch_accs).mean()
  return epoch_avg_loss, epoch_avg_acc

def train(model, train_dl, val_dl, epochs, max_lr, loss_func, optim):
  optimizer = optim(model.parameters(), max_lr)
  scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr, epochs*len(train_dl))

  results = []
  for epoch in range(epochs):
    model.train()
    train_losses = []
    lrs = []
    for images, labels in train_dl:
      logits = model(images)
      loss = loss_func(logits, labels)
      train_losses.append(loss)
      loss.backward()
      optimizer.step()
      optimizer.zero_grad()
      lrs.append(optimizer.param_groups[0]["lr"])
      scheduler.step()
    epoch_train_loss = torch.stack(train_losses).mean()

    epoch_avg_loss, epoch_avg_acc = evaluate(model, val_dl, loss_func)

    results.append({'avg_valid_loss': epoch_avg_loss,
                    'avg_valid_acc': epoch_avg_acc,
                    'avg_train_loss': epoch_train_loss,
                    'lr': lrs})
  return results


In [16]:
model = to_device(model, device)
epochs = 8
max_lr = 1e-2
loss_func = nn.functional.cross_entropy
optim = torch.optim.Adam
results = train(model, train_dl, val_dl, epochs, max_lr, loss_func, optim)

In [17]:
for result in results:
  print(result["avg_valid_acc"])

tensor(0.8199)
tensor(0.7507)
tensor(0.8154)
tensor(0.8253)
tensor(0.8633)
tensor(0.8791)
tensor(0.8929)
tensor(0.8990)


In [18]:
_, test_acc = evaluate(model, test_dl, loss_func)

In [19]:
test_acc

tensor(0.8946)