# Training SimpleNN on CIFAR-10
In this project, you will use the SimpleNN model to perform image classification on CIFAR-10. CIFAR-10 orginally contains 60K images from 10 categories. We split it into 45K/5K/10K images to serve as train/valiation/test set. We only release the ground-truth labels of training/validation dataset to you.

## Step 0: Set up the SimpleNN model
As you have practiced to implement simple neural networks in Homework 1, we just prepare the implementation for you.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import sys 
sys.path.append('/content/drive/MyDrive/ECE_661/661_Final_Project')

In [None]:
# import necessary dependencies
import argparse
import os
import time
import tools
import datetime
from tqdm import tqdm_notebook as tqdm

from tools import opencv_functional as cv2
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import itertools

In [None]:
# define the ResBlock to be used in ResNet;
class ResBlock(nn.Module):
    def __init__(self, input_dim, output_dim, k_size=3, stride_bool=False):
        super(ResBlock, self).__init__()
        if(stride_bool == True):
          self.conv1 = nn.Conv2d(input_dim, output_dim, kernel_size=k_size, stride=2, padding=1)
          self.resconv = nn.Conv2d(input_dim, output_dim, kernel_size=1, stride=2)
        else:
          self.conv1 = nn.Conv2d(input_dim, output_dim, kernel_size=k_size, padding=1)
          self.resconv = nn.Conv2d(input_dim, output_dim, kernel_size=1)

        if input_dim != output_dim:
          input_dim = output_dim
        self.bn1 = nn.BatchNorm2d(output_dim)
        self.conv2 = nn.Conv2d(input_dim, output_dim, kernel_size=k_size, padding=1)
        self.bn2 = nn.BatchNorm2d(output_dim)

    def forward(self, x):
        y = x
      # Note shortcut will just be x if of same shape as out
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out)) 
        if (x.shape == out.shape):
          out = out + y
          out = F.relu(out)
          return out
        else:
          y = self.resconv(y)
          out = out.add(y)
          out = F.relu(out)
          return out

In [None]:
class ResNet(nn.Module):
  def __init__(self):
    super(ResNet, self).__init__()
    self.conv1 = nn.Conv2d(3, 16, 3, padding=1)
    self.bn1 = nn.BatchNorm2d(16)
    # 16 filter res blockss
    self.res1 = ResBlock(16, 16)
    self.res2 = ResBlock(16, 16)
    self.res3 = ResBlock(16, 16)
    # Now 32 filter res blocks
    self.res4 = ResBlock(16, 32, stride_bool=True)
    self.res5 = ResBlock(32, 32)
    self.res6 = ResBlock(32, 32)
    # Now 64 filter res blocks 
    self.res7 = ResBlock(32, 64, stride_bool=True)
    self.res8 = ResBlock(64, 64)
    self.res9 = ResBlock(64, 64)

    self.fc = nn.Linear(64, 10)
    self.fc2 = nn.Linear(64,4)

  def forward(self, x):
    out = F.relu(self.bn1(self.conv1(x)))

    # ReLU contained within forward method of res blocks
    
    out = self.res1(out)
    out = self.res2(out)
    out = self.res3(out)

    out = self.res4(out)
    out = self.res5(out)
    out = self.res6(out)

    out = self.res7(out)
    out = self.res8(out)
    out = self.res9(out)

    out = F.adaptive_avg_pool2d(out, 1)
    out = out.view(out.size(0), -1)
    outt = self.fc(out)
    outr = self.fc2(out)
    # out = F.softmax(out)
    return outt, outr

In [None]:
class Cutout(object):
    """Randomly mask out one or more patches from an image.
    Args:
        n_holes (int): Number of patches to cut out of each image.
        length (int): The length (in pixels) of each square patch.
    """
    def __init__(self, n_holes, length):
        self.n_holes = n_holes
        self.length = length

    def __call__(self, img):
        """
        Args:
            img (Tensor): Tensor image of size (C, H, W).
        Returns:
            Tensor: Image with n_holes of dimension length x length cut out of it.
        """
        h = img.size(1)
        w = img.size(2)

        mask = np.ones((h, w), np.float32)

        for n in range(self.n_holes):
            y = np.random.randint(h)
            x = np.random.randint(w)

            y1 = np.clip(y - self.length // 2, 0, h)
            y2 = np.clip(y + self.length // 2, 0, h)
            x1 = np.clip(x - self.length // 2, 0, w)
            x2 = np.clip(x + self.length // 2, 0, w)

            mask[y1: y2, x1: x2] = 0.

        mask = torch.from_numpy(mask)
        mask = mask.expand_as(img)
        img = img * mask

        return img

In [None]:
# useful libraries
import torchvision
import torchvision.transforms as transforms

#############################################
# your code here
# specify preprocessing function
transform_train = transforms.Compose([transforms.ToTensor(),
                                      transforms.Normalize(mean=(0.4914, 0.4822, 0.4465), std=(0.2023, 0.1994, 0.2010)),
                                      transforms.RandomCrop(size=(32,32), padding=4),
                                      Cutout(n_holes=1, length=8),
                                      # transforms.RandomHorizontalFlip(), 
                                      transforms.RandomRotation(degrees=90)]) 
                                      # transforms.GaussianBlur(kernel_size=5, sigma = 0.5)])

transform_val = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=(0.4914, 0.4822, 0.4465), std=(0.2023, 0.1994, 0.2010))])
#############################################

In [None]:
# do NOT change these
#import tools
from tools.dataset import CIFAR10
from torch.utils.data import DataLoader

# a few arguments, do NOT change these
DATA_ROOT = "./data"
TRAIN_BATCH_SIZE = 128
VAL_BATCH_SIZE = 100

#############################################
# your code here
# construct dataset
train_set = CIFAR10(
    root=DATA_ROOT, 
    mode='train', 
    download=True,
    transform=transform_train    # your code
)
val_set = CIFAR10(
    root=DATA_ROOT, 
    mode='val', 
    download=True,
    transform=transform_val    # your code
)


# construct dataloader
train_loader = DataLoader(
    train_set, 
    batch_size=128,  # your code
    shuffle=True,     # your code
    num_workers=4
)
val_loader = DataLoader(
    val_set, 
    batch_size=128,  # your code
    shuffle=False,     # your code
    num_workers=4
)
#############################################

Downloading https://www.dropbox.com/s/s8orza214q45b23/cifar10_trainval_F22.zip?dl=1 to ./data/cifar10_trainval_F22.zip


0it [00:00, ?it/s]

Extracting ./data/cifar10_trainval_F22.zip to ./data
Files already downloaded and verified
Using downloaded and verified file: ./data/cifar10_trainval_F22.zip
Extracting ./data/cifar10_trainval_F22.zip to ./data
Files already downloaded and verified




In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
if device =='cuda':
    print("Run on GPU...")
else:
    print("Run on CPU...")
  

Run on GPU...


## Step 4: Set up the loss function and optimizer
Loss function/objective function is used to provide "feedback" for the neural networks. Typically, we use multi-class cross-entropy as the loss function for classification models. As for the optimizer, we will use SGD with momentum. 

### Question (e)
Here, you need to:
1. Set up the cross-entropy loss as the criterion. (Hint: there are implemented functions in **torch.nn**)
2. Specify a SGD optimizer with momentum. (Hint: there are implemented functions in **torch.optim**)

In [None]:
import torch.nn as nn
import torch.optim as optim

net = ResNet().to(device)
state_dict = torch.load("/content/drive/MyDrive/ECE_661/661_Final_Project/resnet_base_75.pth")
net.load_state_dict(state_dict["state_dict"])
INITIAL_LR = 0.1

  # momentum for optimizer
MOMENTUM = 0.9

  # L2 regularization strength
REG = 1e-5

  #############################################
  # your code here
  # create loss function
criterion = nn.CrossEntropyLoss().to(device)

  # Add optimizer
optimizer = optim.SGD(net.parameters(), lr=0.1, momentum=0.9, weight_decay = REG) #momentum value from AlexNet


## Step 5: Start the training process.

### Question (f)/(g)
Congratulations! You have completed all of the previous steps and it is time to train our neural network.

Here you need to:
1. Complete the training codes.
2. Actually perform the training.

Hint: Training a neural network usually repeats the following 4 steps: 

**i) Get a batch of data from the dataloader and copy it to your device (GPU).**

**ii) Do a forward pass to get the outputs from the neural network and compute the loss. Be careful about your inputs to the loss function. Are the inputs required to be the logits or softmax probabilities?)**

**iii) Do a backward pass (back-propagation) to compute gradients of all weights with respect to the loss.**

**iiii) Update the model weights with the optimizer.**

You will also need to compute the accuracy of training/validation samples to track your model's performance over each epoch (the accuracy should be increasing as you train for more and more epochs).


In [None]:
print('code below is stuff for initial loss, can add back in later if I feel like it, but honestly fuck it')

# # Calculate initial loss 
# initial_loss = 0
# for batch_idx, (inputs, targets) in enumerate(train_loader):
#   inputs = inputs.to(device)
#   targets = targets.to(device)
#   # print(targets.shape)
#   # print(inputs.shape)

#   # print(max(targets[:,0,0])) #this is the target for the label / classification
#   # print(max(targets[:,0,1])) # this is the target for the rotation amount - i.e will be 0 for 0,1, one for 1,1 two for 2,1, three for 3,1
#   output0 = net(inputs[:,0,:,:,:]) #this is output of unrotated image - change 0 to 1 for 90, 0 to 2 for 180, 0 to 3 for 270 
#   # compute the output and loss
#   # outputs = net(inputs[:,:3,:,:])[0]
#   initial_loss = criterion(output0[0], targets[:,0,0]) #this should just be initial classification loss 
#   # initial_loss += loss
# print("Initial loss: %.4f" %(initial_loss))

code below is stuff for initial loss, can add back in later if I feel like it, but honestly fuck it


## Only data augmentation


In [None]:
# Just data augmentation
lam = 0.5
# some hyperparameters
# total number of training epochs
EPOCHS = 50
 
# the folder where the trained model is saved
CHECKPOINT_FOLDER = "./saved_model"

# start the training/validation process
# the process should take about 5 minutes on a GTX 1070-Ti
# if the code is written efficiently.
best_val_acc = 0
current_learning_rate = INITIAL_LR

print("==> Training starts!")
print("="*50)
for i in range(0, EPOCHS):

    if 140 > i > 100  :
      current_learning_rate = 0.1
      for param_group in optimizer.param_groups:
          param_group['lr'] = current_learning_rate
      print("Current learning rate has decayed to %f" %current_learning_rate)
    if 170 > i > 140  :
      current_learning_rate = 0.001
      momentum = 0.6
      for param_group in optimizer.param_groups:
          param_group['lr'] = current_learning_rate
          param_group['momentum'] = momentum
      print("Current learning rate has decayed to %f" %current_learning_rate)
    if i > 170  :
      current_learning_rate = 0.0005
      momentum = 0.3
      for param_group in optimizer.param_groups:
          param_group['lr'] = current_learning_rate
          param_group['momentum'] = momentum
      print("Current learning rate has decayed to %f" %current_learning_rate)
    
    
    #######################
    # your code here
    # switch to train mode
    net.train()
    #######################

    print("Epoch %d:" %i)
    # this help you compute the training accuracy
    total_examples = 0
    correct_examples = 0

    train_loss = 0 # track training loss if you want
    
    # Train the model for 1 epoch.
    for batch_idx, (bx, by) in enumerate(train_loader):
        ####################################
        # your code here

        bx = bx.to(device)
        by = by.to(device) # RIGHT HERE WE COULD CHANGE THIS, THEN WOULD BE IDENTICAL TO theirs, WHERE WE WOUDLN'T NEED TO DETACH OUR BX BEFORE - BUT historically we have been doing input = input.to(device)
        # SO I cam keeping it the way we used to for nostalgias sake


        # We are lifting the mthod of doing this directly from hendryx adversarial folder, the train.py file - we tried implementing it in three other ways but all lead to horrible overfitting 
        # This is the last appraoch we could find in hyndrex
        curr_batch_size = bx.size(0)
      
        
        by_prime = torch.cat((torch.zeros(bx.size(0)), torch.ones(bx.size(0)), 2*torch.ones(bx.size(0)), 3*torch.ones(bx.size(0))), 0).long()
        
        bx = bx.cpu().detach().numpy() #only change was here, as our stuff was already on GPU so heeded to detach
        
        bx = np.concatenate((bx, bx, np.rot90(bx, 1, axes=(2,3)), np.rot90(bx, 2, axes = (2,3)), np.rot90(bx, 3, axes =(2,3))), 0)
        
        bx = torch.FloatTensor(bx)
        
        bx, by_prime = bx.cuda(), by_prime.cuda()


        #now we are doing our forward - we don't have a scheduler like them, so no scheduler.step() 
        #also we zero our gradient later bc we are just hard bodied like that 

        logits, pen = net(bx*2-1)
        
        loss = criterion(logits[:curr_batch_size], by)
        loss += lam*.25 * criterion(pen[curr_batch_size:], by_prime)
      
        # compute the output and loss
       
        train_loss += loss
        # zero the gradient
        optimizer.zero_grad()

        # backpropagation
        loss.backward()
        
        # apply gradient and update the weights
        optimizer.step()
        
        # count the number of correctly predicted samples in the current batch
        # _, predicted = torch.max(output0[0], 1)
        _, predicted = torch.max(logits[:curr_batch_size], 1)
        # correct = predicted.eq(targets[:,0, 0]).sum()
        correct = predicted.eq(by).sum() #looking at correct labels

        total_examples += by.shape[0]
        correct_examples += correct.item()
        ####################################
                
    avg_loss = train_loss / len(train_loader)
    avg_acc = correct_examples / total_examples
    print("Training loss: %.4f, Training accuracy: %.4f" %(avg_loss, avg_acc))

    # Validate on the validation dataset
    #######################
    # your code here
    # switch to eval mode
    net.eval()
    
    #######################

    # this help you compute the validation accuracy
    total_examples = 0
    correct_examples = 0
    
    val_loss = 0 # again, track the validation loss if you want

    # disable gradient during validation, which can save GPU memory
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(val_loader):
            ####################################
            # your code here
            # copy inputs to device
            inputs = inputs.to(device)
            targets = targets.to(device)
            # compute the output and loss
            outputs = net(inputs)[0]
            val_loss += criterion(outputs, targets)
            
            # count the number of correctly predicted samples in the current batch
            _, predicted = torch.max(outputs, 1)
            correct = predicted.eq(targets).sum()

            total_examples += targets.shape[0]
            correct_examples += correct.item()
            ####################################

    avg_loss = val_loss / len(val_loader)
    avg_acc = correct_examples / total_examples
    print("Validation loss: %.4f, Validation accuracy: %.4f" % (avg_loss, avg_acc))
    
    # save the model checkpoint
    if avg_acc > best_val_acc:
        best_val_acc = avg_acc
        if not os.path.exists(CHECKPOINT_FOLDER):
           os.makedirs(CHECKPOINT_FOLDER)
        print("Saving ...")
        state = {'state_dict': net.state_dict(),
                'epoch': i,
                'lr': current_learning_rate}
        torch.save(state, os.path.join(CHECKPOINT_FOLDER, 'resnet_aux_cutout.pth'))
        # model.load_state_dict(state_dict["state_dict"])
    print('')

print("="*50)
print(f"==> Optimization finished! Best validation accuracy: {best_val_acc:.4f}")

==> Training starts!
Epoch 0:
Training loss: 0.6220, Training accuracy: 0.8382
Validation loss: 0.9107, Validation accuracy: 0.7212
Saving ...

Epoch 1:
Training loss: 0.5621, Training accuracy: 0.8509
Validation loss: 0.7685, Validation accuracy: 0.7572
Saving ...

Epoch 2:
Training loss: 0.5396, Training accuracy: 0.8542
Validation loss: 0.8311, Validation accuracy: 0.7440

Epoch 3:
Training loss: 0.5232, Training accuracy: 0.8576
Validation loss: 0.8462, Validation accuracy: 0.7294

Epoch 4:
Training loss: 0.5032, Training accuracy: 0.8637
Validation loss: 0.7515, Validation accuracy: 0.7626
Saving ...

Epoch 5:
Training loss: 0.4928, Training accuracy: 0.8668
Validation loss: 0.6732, Validation accuracy: 0.7802
Saving ...

Epoch 6:
Training loss: 0.4853, Training accuracy: 0.8677
Validation loss: 1.0074, Validation accuracy: 0.6900

Epoch 7:
Training loss: 0.4771, Training accuracy: 0.8704
Validation loss: 0.9943, Validation accuracy: 0.7034

Epoch 8:
Training loss: 0.4667, Trainin

In [None]:
print('ideas are just pass it pen, not pen[curr_batch_size:] - also ')

ideas are just pass it pen, not pen[curr_batch_size:] - also 
