In [None]:
!pip install torch
!pip install torchvision
import os
import sys
import time
import math
import torch.nn.init as init

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.backends.cudnn as cudnn

import torchvision
import torchvision.transforms as transforms


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


# lr =0.01
# Number of epochs 50, 100 and 200
# Optimizer: SGD and Adam
For models 1A, 1E, 1F

## Utility functions

In [None]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

## Building the Model

### Basic ResNet 18 - 11M parameters

In [None]:
class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(
            in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


In [None]:
class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        # self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = nn.Linear(256*block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        # out = self.layer4(out)
        out = F.avg_pool2d(out, 6)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

In [None]:
def ResNet18():
    return ResNet(BasicBlock, [2, 2, 2])

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
# global best_acc 
# best test accuracy
best_acc = 0
start_epoch = 0  # start from epoch 0 or last checkpoint epoch
device

'cuda'

## Data Processing

In [None]:
# Data
print('==> Preparing data..')
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

==> Preparing data..


In [None]:

ROOT = '.data'
trainset = torchvision.datasets.CIFAR10(
    root=ROOT, train=True, download=True, transform=transform_train)
# trainloader = torch.utils.data.DataLoader(
#     trainset, batch_size=128, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(
    root=ROOT , train=False, download=True, transform=transform_test)
# testloader = torch.utils.data.DataLoader(
#     testset, batch_size=100, shuffle=False, num_workers=2)
VALID_RATIO = 0.9

n_train_examples = int(len(trainset) * VALID_RATIO)
n_valid_examples = len(trainset) - n_train_examples
print(n_train_examples)
print(n_valid_examples)
generator=torch.Generator().manual_seed(42)

train_data, valid_data = torch.utils.data.random_split(trainset, 
                                           [n_train_examples, n_valid_examples], generator = generator)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to .data/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting .data/cifar-10-python.tar.gz to .data
Files already downloaded and verified
45000
5000


In [None]:
import copy
valid_data = copy.deepcopy(valid_data)
valid_data.dataset.transform = transform_test

In [None]:
trainloader = torch.utils.data.DataLoader(train_data, batch_size = 256, shuffle = True, num_workers=2)
validloader = torch.utils.data.DataLoader(valid_data, batch_size = 256, shuffle = True)
testloader = torch.utils.data.DataLoader(testset, batch_size = 256, shuffle = False, num_workers=2)

In [None]:
classes = ('plane', 'car', 'bird', 'cat', 'deer',
           'dog', 'frog', 'horse', 'ship', 'truck')

In [None]:
print('==> Building model..')
net = ResNet18()
net = net.to(device)
if device == 'cuda':
    net = torch.nn.DataParallel(net)
    cudnn.benchmark = True


==> Building model..


In [None]:
print("Number of trainable parameters: ",count_parameters(net))

Number of trainable parameters:  2777674


In [None]:
net

DataParallel(
  (module): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (shortcut): Sequential()
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
       

In [None]:
lr = 0.01

## Currently this runs with SGD, to use adam please comment it out

In [None]:
criterion = nn.CrossEntropyLoss()
criterion = criterion.to(device)
optimizer = optim.SGD(net.parameters(), lr=lr,
                      momentum=0.9, weight_decay=5e-4)
# optimizer = optim.Adam(net.parameters(), lr=lr,
#                       weight_decay=5e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)

In [None]:
def calculate_accuracy(y_pred, y):
    top_pred = y_pred.argmax(1, keepdim = True)
    correct = top_pred.eq(y.view_as(top_pred)).sum()
    acc = correct.float() / y.shape[0]
    return acc

In [None]:
def train(net, iterator, optimizer, criterion, device):
    epoch_loss = 0
    epoch_acc = 0
    net.train()
    for (inputs, y) in iterator:
        inputs = inputs.to(device)
        y = y.to(device)
        optimizer.zero_grad()
        
        y_pred = net(inputs)
        loss = criterion(y_pred, y)
        acc = calculate_accuracy(y_pred, y)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
        epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [None]:
def evaluate(net, iterator, criterion, device):
    epoch_loss = 0
    epoch_acc = 0
    global best_acc
    net.eval()
    with torch.no_grad():
        for (x, y) in iterator:
            x = x.to(device)
            y = y.to(device)
            y_pred = net(x)
            loss = criterion(y_pred, y)
            acc = calculate_accuracy(y_pred, y)
            epoch_loss += loss.item()
            epoch_acc += acc.item()
            
    # acc = 100.*correct/total
    if acc > best_acc:
        print('Saving..')
        if not os.path.isdir('checkpoint'):
            os.mkdir('checkpoint')
        # torch.save(state, './checkpoint/ckpt.pth')
        torch.save(net.state_dict(), './checkpoint/model_Adam.pt')
        best_acc = acc
    # print(len(loader))
    # return test_loss/len(loader), 100.*correct/total
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

Please change the epochs to either 50,100 or 200

In [None]:
EPOCHS = 200
# best_acc = 0
trainloss = []
trainaccuracy = []
validloss = []
validaccuracy = []
for epoch in range(EPOCHS):
  train_epoch_loss , train_epoch_acc = train(net, trainloader, optimizer, criterion, device)
  val_epoch_loss , val_epoch_acc = evaluate(net, validloader, criterion, device)
  scheduler.step()
  #print out training loss , accuracy
  print("Epoch", epoch)
  print("Train loss & accuracy", train_epoch_loss , train_epoch_acc)
  #print validation loss , accuracy
  print("Validation loss & accuracy", val_epoch_loss , val_epoch_acc)
  trainloss.append(train_epoch_loss)
  trainaccuracy.append(train_epoch_acc)
  validloss.append(val_epoch_loss)
  validaccuracy.append(val_epoch_acc)

In [None]:
# net.load_state_dict(torch.load('./checkpoint/model_Adam.pt'))
testaccuracy = []
testloss = []
# for epoch in range(EPOCHS):
#     print('\nEpoch: %d' % epoch)
#     test_loss_epoch, test_acc = evaluate(net, testloader,criterion, device)
#     testaccuracy.append(test_acc)
#     print("test loss and accuracy: ", test_loss_epoch, test_acc)
#     testloss.append(test_loss_epoch)

test_loss_epoch, test_acc = evaluate(net, testloader,criterion, device)
testaccuracy.append(test_acc)
print("test loss and accuracy: ", test_loss_epoch, test_acc)
testloss.append(test_loss_epoch)

test loss and accuracy:  1.2480855196714402 0.6744140625


In [None]:
!pip install matplotlib
import matplotlib.pyplot as plt

In [None]:
plt.plot(range(len(trainloss)), trainloss,'-', linewidth=3, label='Train loss')
plt.plot(range(len(validloss)), validloss,'-', linewidth=3, label='Valid loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.grid()
plt.legend()
plt.savefig("initial.png")

In [None]:
plt.plot(range(len(trainaccuracy)), trainaccuracy,'-', linewidth=3, label='Train accuracy')
plt.plot(range(len(validaccuracy)), validaccuracy,'-', linewidth=3, label='Valid accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.grid()
plt.legend()
plt.savefig("accuracy1.png")