# Training SimpleNN on CIFAR-10
In this project, you will use the SimpleNN model to perform image classification on CIFAR-10. CIFAR-10 orginally contains 60K images from 10 categories. We split it into 45K/5K/10K images to serve as train/valiation/test set. We only release the ground-truth labels of training/validation dataset to you.

## Step 0: Set up the SimpleNN model
As you have practiced to implement simple neural networks in Homework 1, we just prepare the implementation for you.

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import sys 
sys.path.append('/content/drive/MyDrive/ECE_661/661_Final_Project')

In [3]:
!ls

data  drive  sample_data  saved_model


In [4]:
# import necessary dependencies
import argparse
import os
import time
import tools
import datetime
from tqdm import tqdm_notebook as tqdm
from PIL import Image

import torch
import torch.nn as nn
import torch.nn.functional as F

import numpy as np
from tools.dataset import CIFAR10
from torchvision.datasets import CIFAR10 as CIFAR
from torch.utils.data import DataLoader
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim
from typing import Any, Callable, Optional, Tuple

In [5]:
# define the ResBlock to be used in ResNet;
class ResBlock(nn.Module):
    def __init__(self, input_dim, output_dim, k_size=3, stride_bool=False):
        super(ResBlock, self).__init__()
        if(stride_bool == True):
          self.conv1 = nn.Conv2d(input_dim, output_dim, kernel_size=k_size, stride=2, padding=1)
          self.resconv = nn.Conv2d(input_dim, output_dim, kernel_size=1, stride=2)
        else:
          self.conv1 = nn.Conv2d(input_dim, output_dim, kernel_size=k_size, padding=1)
          self.resconv = nn.Conv2d(input_dim, output_dim, kernel_size=1)

        if input_dim != output_dim:
          input_dim = output_dim
        self.bn1 = nn.BatchNorm2d(output_dim)
        self.conv2 = nn.Conv2d(input_dim, output_dim, kernel_size=k_size, padding=1)
        self.bn2 = nn.BatchNorm2d(output_dim)

    def forward(self, x):
        y = x
      # Note shortcut will just be x if of same shape as out
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out)) 
        if (x.shape == out.shape):
          out = out + y
          out = F.relu(out)
          return out
        else:
          y = self.resconv(y)
          out = out.add(y)
          out = F.relu(out)
          return out

In [6]:
class ResNet(nn.Module):
  def __init__(self):
    super(ResNet, self).__init__()
    self.conv1 = nn.Conv2d(3, 16, 3, padding=1)
    self.bn1 = nn.BatchNorm2d(16)
    # 16 filter res blockss
    self.res1 = ResBlock(16, 16)
    self.res2 = ResBlock(16, 16)
    self.res3 = ResBlock(16, 16)
    # Now 32 filter res blocks
    self.res4 = ResBlock(16, 32, stride_bool=True)
    self.res5 = ResBlock(32, 32)
    self.res6 = ResBlock(32, 32)
    # Now 64 filter res blocks 
    self.res7 = ResBlock(32, 64, stride_bool=True)
    self.res8 = ResBlock(64, 64)
    self.res9 = ResBlock(64, 64)

    self.fc = nn.Linear(64, 10)
    self.fc2 = nn.Linear(64,4)

  def forward(self, x):
    out = F.relu(self.bn1(self.conv1(x)))

    # ReLU contained within forward method of res blocks
    
    out = self.res1(out)
    out = self.res2(out)
    out = self.res3(out)

    out = self.res4(out)
    out = self.res5(out)
    out = self.res6(out)

    out = self.res7(out)
    out = self.res8(out)
    out = self.res9(out)

    out = F.adaptive_avg_pool2d(out, 1)
    out = out.view(out.size(0), -1)
    outt = self.fc(out)
    outr = self.fc2(out)
    # out = F.softmax(out)
    return outt, outr

In [7]:
class Cutout(object):
    """Randomly mask out one or more patches from an image.
    Args:
        n_holes (int): Number of patches to cut out of each image.
        length (int): The length (in pixels) of each square patch.
    """
    def __init__(self, n_holes, length):
        self.n_holes = n_holes
        self.length = length

    def __call__(self, img):
        """
        Args:
            img (Tensor): Tensor image of size (C, H, W).
        Returns:
            Tensor: Image with n_holes of dimension length x length cut out of it.
        """
        h = img.size(1)
        w = img.size(2)

        mask = np.ones((h, w), np.float32)

        for n in range(self.n_holes):
            y = np.random.randint(h)
            x = np.random.randint(w)

            y1 = np.clip(y - self.length // 2, 0, h)
            y2 = np.clip(y + self.length // 2, 0, h)
            x1 = np.clip(x - self.length // 2, 0, w)
            x2 = np.clip(x + self.length // 2, 0, w)

            mask[y1: y2, x1: x2] = 0.

        mask = torch.from_numpy(mask)
        mask = mask.expand_as(img)
        img = img * mask

        return img

In [8]:


def make_transform(n_holes, length):
  transform_train = transforms.Compose([transforms.ToTensor(),
                                      transforms.Normalize(mean=(0.4914, 0.4822, 0.4465), std=(0.2023, 0.1994, 0.2010)),
                                      Cutout(n_holes=n_holes, length=length)])
                                      # transforms.RandomHorizontalFlip(), 
                                      # transforms.RandomRotation(degrees=15)]) 
                                      # transforms.GaussianBlur(kernel_size=5, sigma = 0.5)])
  return transform_train


def make_train_set(transform_train):
  train_set = CIFAR10(
    root=DATA_ROOT, 
    mode='train', 
    download=True,
    transform=transform_train    # your code
  )
  return train_set 


def make_train_loader(train_set):
  # construct dataloader
  train_loader = DataLoader(
    train_set, 
    batch_size=TRAIN_BATCH_SIZE,  # your code
    shuffle=True,     # your code
    num_workers=4
  )
  return train_loader


transform_val = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=(0.4914, 0.4822, 0.4465), std=(0.2023, 0.1994, 0.2010))])

# a few arguments, do NOT change these
DATA_ROOT = "./data"
TRAIN_BATCH_SIZE = 128
VAL_BATCH_SIZE = 100


val_set = CIFAR10(
    root=DATA_ROOT, 
    mode='val', 
    download=True,
    transform=transform_val    # your code
)

val_loader = DataLoader(
    val_set, 
    batch_size=256,  # your code
    shuffle=False,     # your code
    num_workers=4
)
#############################################

Using downloaded and verified file: ./data/cifar10_trainval_F22.zip
Extracting ./data/cifar10_trainval_F22.zip to ./data
Files already downloaded and verified


## Step 3: Instantiate your SimpleNN model and deploy it to GPU devices.
### Question (d)
You may want to deploy your model to GPU device for efficient training. Please assign your model to GPU if possible. If you are training on a machine without GPUs, please deploy your model to CPUs.

Here, you need to:
1. Complete the code below.
2. **In the PDF report**, briefly describe how you verify that your model is indeed deployed on GPU. (Hint: check $\texttt{nvidia-smi}$.)

In [9]:
# specify the device for computation
#############################################
# your code here
# GPU check                
device = 'cuda' if torch.cuda.is_available() else 'cpu'
if device =='cuda':
    print("Run on GPU...")
else:
    print("Run on CPU...")
  
    
#############################################

Run on GPU...


In [10]:
def fine_tune(train_loader, holes):
  print(f"Considering holes = {holes}") 
  net = ResNet().to(device)
  # state_dict = torch.load("/content/drive/MyDrive/ECE_661/661_Final_Project/resnet_base_noreg_50.pth")
  # net.load_state_dict(state_dict["state_dict"])

  INITIAL_LR = 0.1

  MOMENTUM = 0.9

  REG = 1e-5

  criterion = nn.CrossEntropyLoss().to(device)

  optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9, weight_decay = REG) #momentum value from AlexNet
  # some hyperparameters
  # total number of training epochs
  EPOCHS = 75
  
  # the folder where the trained model is saved
  CHECKPOINT_FOLDER = "./saved_model"

  # start the training/validation process
  # the process should take about 5 minutes on a GTX 1070-Ti
  # if the code is written efficiently.
  best_val_acc = 0
  current_learning_rate = INITIAL_LR

  # Calculate initial loss 
  initial_loss = 0
  for batch_idx, (inputs, targets) in enumerate(train_loader):
    inputs = inputs.to(device)
    targets = targets.to(device)
    # compute the output and loss
    outputs = net(inputs)[0]
    initial_loss = criterion(outputs, targets)
    # initial_loss += loss
  print("Initial loss: %.4f" %(initial_loss))

  print("==> Training starts!")
  print("="*50)
  for i in range(0, EPOCHS):
  
      #######################
      # your code here
      # switch to train mode
      net.train()
      #######################

      print("Epoch %d:" %i)
      # this help you compute the training accuracy
      total_examples = 0
      correct_examples = 0

      train_loss = 0 # track training loss if you want
      
      # Train the model for 1 epoch.
      for batch_idx, (inputs, targets) in enumerate(train_loader):
          ####################################
          # your code here
          # copy inputs to device
          inputs = inputs.to(device)
          targets = targets.to(device)
          # compute the output and loss
          outputs = net(inputs)[0]
          loss = criterion(outputs, targets)
          train_loss += loss
          # zero the gradient
          optimizer.zero_grad()
          
          # backpropagation
          loss.backward()
          
          # apply gradient and update the weights
          optimizer.step()
          
          # count the number of correctly predicted samples in the current batch
          _, predicted = torch.max(outputs, 1)
          correct = predicted.eq(targets).sum()

          total_examples += targets.shape[0]
          correct_examples += correct.item()
          ####################################
                  
      avg_loss = train_loss / len(train_loader)
      avg_acc = correct_examples / total_examples
      print("Training loss: %.4f, Training accuracy: %.4f" %(avg_loss, avg_acc))

      # Validate on the validation dataset
      #######################
      # your code here
      # switch to eval mode
      net.eval()
      
      #######################

      # this help you compute the validation accuracy
      total_examples = 0
      correct_examples = 0
      
      val_loss = 0 # again, track the validation loss if you want

      # disable gradient during validation, which can save GPU memory
      with torch.no_grad():
          for batch_idx, (inputs, targets) in enumerate(val_loader):
              ####################################
              # your code here
              # copy inputs to device
              inputs = inputs.to(device)
              targets = targets.to(device)
              # compute the output and loss
              outputs = net(inputs)[0]
              val_loss += criterion(outputs, targets)
              
              # count the number of correctly predicted samples in the current batch
              _, predicted = torch.max(outputs, 1)
              correct = predicted.eq(targets).sum()

              total_examples += targets.shape[0]
              correct_examples += correct.item()
              ####################################

      avg_loss = val_loss / len(val_loader)
      avg_acc = correct_examples / total_examples
      print("Validation loss: %.4f, Validation accuracy: %.4f" % (avg_loss, avg_acc))
      
      # save the model checkpoint
      if avg_acc > best_val_acc:
          best_val_acc = avg_acc
          if not os.path.exists(CHECKPOINT_FOLDER):
            os.makedirs(CHECKPOINT_FOLDER)
          print("Saving ...")
          state = {'state_dict': net.state_dict(),
                  'epoch': i,
                  'lr': current_learning_rate}
          torch.save(state, os.path.join(CHECKPOINT_FOLDER, f'resnet_cutout_only_finetune_{holes}.pth'))
          # model.load_state_dict(state_dict["state_dict"])
      print('')

  print("="*50)
  print(f"==> Optimization finished! Best validation accuracy: {best_val_acc:.4f}")

## Sweep cutout lengths


In [11]:
lengths = [2, 4, 8, 16]
# holes = [1, 2, 4]
for length in lengths:
  train_loader = make_train_loader(make_train_set(make_transform(n_holes=1, length = length)))
  fine_tune(train_loader=train_loader, holes="length{}".format(length))

Using downloaded and verified file: ./data/cifar10_trainval_F22.zip
Extracting ./data/cifar10_trainval_F22.zip to ./data
Files already downloaded and verified
Considering holes = length2
Initial loss: 2.4257
==> Training starts!
Epoch 0:
Training loss: 1.4650, Training accuracy: 0.4544
Validation loss: 1.3124, Validation accuracy: 0.5376
Saving ...

Epoch 1:
Training loss: 1.0003, Training accuracy: 0.6408
Validation loss: 1.2617, Validation accuracy: 0.5652
Saving ...

Epoch 2:
Training loss: 0.8135, Training accuracy: 0.7132
Validation loss: 0.9476, Validation accuracy: 0.6638
Saving ...

Epoch 3:
Training loss: 0.6937, Training accuracy: 0.7562
Validation loss: 0.8928, Validation accuracy: 0.6930
Saving ...

Epoch 4:
Training loss: 0.6070, Training accuracy: 0.7887
Validation loss: 0.7174, Validation accuracy: 0.7498
Saving ...

Epoch 5:
Training loss: 0.5449, Training accuracy: 0.8109
Validation loss: 0.6913, Validation accuracy: 0.7648
Saving ...

Epoch 6:
Training loss: 0.4855, T

In [12]:
# lengths = [2, 4, 8, 16]
holes = [1, 2, 4]
for hole in holes:
  train_loader = make_train_loader(make_train_set(make_transform(n_holes=hole, length = 2)))
  fine_tune(train_loader=train_loader, holes="holes{}".format(hole))

Using downloaded and verified file: ./data/cifar10_trainval_F22.zip
Extracting ./data/cifar10_trainval_F22.zip to ./data
Files already downloaded and verified
Considering holes = holes1
Initial loss: 2.4415
==> Training starts!
Epoch 0:
Training loss: 1.4667, Training accuracy: 0.4568
Validation loss: 1.2252, Validation accuracy: 0.5480
Saving ...

Epoch 1:
Training loss: 1.0148, Training accuracy: 0.6345
Validation loss: 0.9756, Validation accuracy: 0.6536
Saving ...

Epoch 2:
Training loss: 0.8174, Training accuracy: 0.7127
Validation loss: 0.9620, Validation accuracy: 0.6692
Saving ...

Epoch 3:
Training loss: 0.6946, Training accuracy: 0.7566
Validation loss: 0.7653, Validation accuracy: 0.7258
Saving ...

Epoch 4:
Training loss: 0.6050, Training accuracy: 0.7882
Validation loss: 0.8444, Validation accuracy: 0.7050

Epoch 5:
Training loss: 0.5383, Training accuracy: 0.8126
Validation loss: 0.8487, Validation accuracy: 0.7152

Epoch 6:
Training loss: 0.4819, Training accuracy: 0.831