# ResNet For CIFAR0-10

In [2]:
"""
from google.colab import drive
drive.mount('/content/drive')

%cd /content/drive/My\ Drive/DL_Mini_Proj
!ls
"""

"\nfrom google.colab import drive\ndrive.mount('/content/drive')\n\n%cd /content/drive/My\\ Drive/DL_Mini_Proj\n!ls\n"

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
#import renet

import torchvision
import torchvision.transforms as transforms

import os
from torchsummary import summary
import torch.utils.data as data

### Model Construction

In [3]:
class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(
            in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_planes, planes, stride=1):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
                               stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, self.expansion *
                               planes, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(self.expansion*planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = nn.Linear(512*block.expansion, num_classes) # ochange

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out


def ResNet18():
    return ResNet(BasicBlock, [2, 2, 2, 2])

In [5]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = ResNet18().to(device)
print(model.layer1)
model.layer4 = torch.nn.Sequential(*[model.layer4[0]]) # https://discuss.pytorch.org/t/how-to-delete-layer-in-pretrained-model/17648/4
model.layer3 = torch.nn.Sequential(*[model.layer3[0]])
model.layer1 = torch.nn.Sequential(*[model.layer1[0]])
model.layer2 = torch.nn.Sequential(*[model.layer2[0]])

Sequential(
  (0): BasicBlock(
    (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (shortcut): Sequential()
  )
  (1): BasicBlock(
    (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (shortcut): Sequential()
  )
)
Sequential(
  (0): BasicBlock(
    (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(64, eps=1

In [4]:
summary(model, (3, 32, 32))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 32, 32]           1,728
       BatchNorm2d-2           [-1, 64, 32, 32]             128
            Conv2d-3           [-1, 64, 32, 32]          36,864
       BatchNorm2d-4           [-1, 64, 32, 32]             128
            Conv2d-5           [-1, 64, 32, 32]          36,864
       BatchNorm2d-6           [-1, 64, 32, 32]             128
        BasicBlock-7           [-1, 64, 32, 32]               0
            Conv2d-8          [-1, 128, 16, 16]          73,728
       BatchNorm2d-9          [-1, 128, 16, 16]             256
           Conv2d-10          [-1, 128, 16, 16]         147,456
      BatchNorm2d-11          [-1, 128, 16, 16]             256
           Conv2d-12          [-1, 128, 16, 16]           8,192
      BatchNorm2d-13          [-1, 128, 16, 16]             256
       BasicBlock-14          [-1, 128,

Construction Complete

### Data Preprocessing

In [6]:
class Cutout(object):
    """Randomly mask out one or more patches from an image.
    Args:
    n_holes (int): Number of patches to cut out of each image.
    length (int): The length (in pixels) of each square patch.
    """
    def __init__(self, n_holes, length):
      self.n_holes = n_holes
      self.length = length

    def __call__(self, img):
        h = img.size(1)
        w = img.size(2)

        mask = np.ones((h, w), np.float32)
    
        for n in range(self.n_holes):
            y = np.random.randint(h)
            x = np.random.randint(w)
    
            y1 = np.clip(y - self.length // 2, 0, h)
            y2 = np.clip(y + self.length // 2, 0, h)
            x1 = np.clip(x - self.length // 2, 0, w)
            x2 = np.clip(x + self.length // 2, 0, w)

            mask[y1: y2, x1: x2] = 0.
    

        mask = torch.from_numpy(mask)
          
        mask = mask.expand_as(img)
              
        img = img * mask

    
      
        return img

In [7]:
def load_CIFAR10(batch_size, train_ratio):

  ROOT = '/scratch/hx2214/data'
  trainset = torchvision.datasets.CIFAR10(
      root = ROOT,
      train = True, 
      download = True
  )

  # Compute means and standard deviations
  means = trainset.data.mean(axis=(0,1,2)) / 255
  stds = trainset.data.std(axis=(0,1,2)) / 255
  #print(means, stds)

  # Preprocess setting
  transform_train = transforms.Compose([
      transforms.RandomCrop(32, padding=4),
      transforms.RandomHorizontalFlip(),
      transforms.ToTensor(),
      transforms.Normalize(mean=means, std=stds),
      Cutout(n_holes=1, length=16)
  ])
  transform_test = transforms.Compose([
      transforms.ToTensor(),
      transforms.Normalize(mean=means, std=stds)
  ])

  # Load the dataset
  trainset = torchvision.datasets.CIFAR10(
      root = ROOT, 
      train = True, 
      download = True, 
      transform = transform_train
  )
  testset = torchvision.datasets.CIFAR10(
      root = ROOT, 
      train = False, 
      download = True, 
      transform = transform_test
  )

  train_iterator = data.DataLoader(trainset, batch_size)
  test_iterator = data.DataLoader(testset, batch_size)

  return train_iterator, test_iterator
  """
  # Split trainset for validset
  n_train = int(len(trainset) * train_ratio)
  n_valid = len(trainset) - n_train
  train_dataset, valid_dataset = data.random_split(trainset, [n_train, n_valid])
  
  # Build dataloader
  train_iterator = data.DataLoader(train_dataset, batch_size)
  valid_iterator = data.DataLoader(valid_dataset, batch_size)
  test_iterator = data.DataLoader(testset, batch_size)

  return train_iterator, valid_iterator, test_iterator
  """

In [8]:
# trainloader, validloader, testloader = load_CIFAR10(batch_size=16, train_ratio=1)
trainloader, testloader = load_CIFAR10(batch_size=16, train_ratio=1)

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


In [91]:
"""
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])
"""

'\ntransform_train = transforms.Compose([\n    transforms.RandomCrop(32, padding=4),\n    transforms.RandomHorizontalFlip(),\n    transforms.ToTensor(),\n    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),\n])\n\ntransform_test = transforms.Compose([\n    transforms.ToTensor(),\n    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),\n])\n'

In [92]:
"""
trainset = torchvision.datasets.CIFAR10(
    root='./data', train=True, download=True, transform=transform_train) # change transform in future
trainloader = torch.utils.data.DataLoader(
    trainset, batch_size=128, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(
    root='./data', train=False, download=True, transform=transform_test) # change transform in future
testloader = torch.utils.data.DataLoader(
    testset, batch_size=100, shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat', 'deer',
           'dog', 'frog', 'horse', 'ship', 'truck')
"""

"\ntrainset = torchvision.datasets.CIFAR10(\n    root='./data', train=True, download=True, transform=transform_train) # change transform in future\ntrainloader = torch.utils.data.DataLoader(\n    trainset, batch_size=128, shuffle=True, num_workers=2)\n\ntestset = torchvision.datasets.CIFAR10(\n    root='./data', train=False, download=True, transform=transform_test) # change transform in future\ntestloader = torch.utils.data.DataLoader(\n    testset, batch_size=100, shuffle=False, num_workers=2)\n\nclasses = ('plane', 'car', 'bird', 'cat', 'deer',\n           'dog', 'frog', 'horse', 'ship', 'truck')\n"

In [14]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)#,weight_decay=1e-3)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)

In [15]:
def train(epoch):
    print('\nEpoch: %d' % epoch)
    model.train()
    train_loss = 0
    correct = 0
    total = 0
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()
    print("Train Set Loss:",train_loss/total)



In [16]:
def test(epoch):
    global best_acc
    model.eval()
    test_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(testloader):
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, targets)

            test_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()

    # Save checkpoint.
    acc = 100.*correct/total
    if acc > best_acc:
        print('Saving..')
        state = {
            'net': model.state_dict(),
            'acc': acc,
            'epoch': epoch,
        }
        if not os.path.isdir('checkpoint'):
            os.mkdir('checkpoint')
        torch.save(state, './checkpoint/ckpt.pth')
        best_acc = acc
    print('Test Set Accuracy:',acc)
    return acc


In [17]:
best_acc = 0  # best test accuracy
start_epoch = 0  # start from epoch 0 or last checkpoint epoch

for epoch in range(start_epoch, start_epoch+20):
    train(epoch)
    test(epoch)
    scheduler.step()


Epoch: 0
Train Set Loss: 0.11087040110230446
Saving..
Test Set Accuracy: 45.15

Epoch: 1
Train Set Loss: 0.0861809755575657
Saving..
Test Set Accuracy: 59.9

Epoch: 2
Train Set Loss: 0.07349206447184085
Saving..
Test Set Accuracy: 62.27

Epoch: 3
Train Set Loss: 0.06549771600067615
Saving..
Test Set Accuracy: 70.29

Epoch: 4
Train Set Loss: 0.05974413369119167
Saving..
Test Set Accuracy: 72.2

Epoch: 5
Train Set Loss: 0.05427168503075838
Saving..
Test Set Accuracy: 75.34

Epoch: 6
Train Set Loss: 0.04951760059475899
Saving..
Test Set Accuracy: 77.61

Epoch: 7
Train Set Loss: 0.0462010094127059
Saving..
Test Set Accuracy: 78.74

Epoch: 8
Train Set Loss: 0.04342289152622223
Saving..
Test Set Accuracy: 81.82

Epoch: 9
Train Set Loss: 0.04146881461083889
Saving..
Test Set Accuracy: 82.07

Epoch: 10
Train Set Loss: 0.03902833487033844
Test Set Accuracy: 81.9

Epoch: 11
Train Set Loss: 0.03778805713400245
Saving..
Test Set Accuracy: 82.54

Epoch: 12
Train Set Loss: 0.03632956011362374
Savin

In [18]:
state = {
      'net': model.state_dict()
}
if not os.path.isdir('checkpoint'):
  os.mkdir('checkpoint')
torch.save(state, './checkpoint/20Epoch.pth')

In [19]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.001,weight_decay=1e-6)

In [20]:
start_epoch = 20  

for epoch in range(start_epoch, start_epoch+20):
    train(epoch)
    test(epoch)
    scheduler.step()


Epoch: 20
Train Set Loss: 0.023478635339643807
Saving..
Test Set Accuracy: 89.11

Epoch: 21
Train Set Loss: 0.0223777623976022
Saving..
Test Set Accuracy: 89.16

Epoch: 22
Train Set Loss: 0.02129191546998918
Saving..
Test Set Accuracy: 89.44

Epoch: 23
Train Set Loss: 0.021413901176527143
Test Set Accuracy: 89.31

Epoch: 24
Train Set Loss: 0.02070578764460981
Saving..
Test Set Accuracy: 89.63

Epoch: 25
Train Set Loss: 0.02046374794024974
Saving..
Test Set Accuracy: 89.7

Epoch: 26
Train Set Loss: 0.02014391047986224
Saving..
Test Set Accuracy: 89.87

Epoch: 27
Train Set Loss: 0.01987136450753547
Test Set Accuracy: 89.82

Epoch: 28
Train Set Loss: 0.01971722053712234
Saving..
Test Set Accuracy: 89.97

Epoch: 29
Train Set Loss: 0.019586234425231816
Test Set Accuracy: 89.9

Epoch: 30
Train Set Loss: 0.0190031138999667
Saving..
Test Set Accuracy: 90.08

Epoch: 31
Train Set Loss: 0.018994125402458012
Test Set Accuracy: 89.87

Epoch: 32
Train Set Loss: 0.018663700077356772
Saving..
Test Se

In [None]:
start_epoch = 40 

for epoch in range(start_epoch, start_epoch+60):
    train(epoch)
    test(epoch)
    scheduler.step()

In [None]:
start_epoch = 100

for epoch in range(start_epoch, start_epoch+70):
    train(epoch)
    test(epoch)
    scheduler.step()