# Training SimpleNN on CIFAR-10
In this project, you will use the SimpleNN model to perform image classification on CIFAR-10. CIFAR-10 orginally contains 60K images from 10 categories. We split it into 45K/5K/10K images to serve as train/valiation/test set. We only release the ground-truth labels of training/validation dataset to you.

## Step 0: Set up the SimpleNN model
As you have practiced to implement simple neural networks in Homework 1, we just prepare the implementation for you.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import sys 
sys.path.append('/content/drive/MyDrive/ECE_661/661_Final_Project')

In [None]:
# import necessary dependencies
import argparse
import os
import time
import tools
import datetime
from tqdm import tqdm_notebook as tqdm

import torch
import torch.nn as nn
import torch.nn.functional as F

import numpy as np

In [None]:
# define the ResBlock to be used in ResNet;
class ResBlock(nn.Module):
    def __init__(self, input_dim, output_dim, k_size=3, stride_bool=False):
        super(ResBlock, self).__init__()
        if(stride_bool == True):
          self.conv1 = nn.Conv2d(input_dim, output_dim, kernel_size=k_size, stride=2, padding=1)
          self.resconv = nn.Conv2d(input_dim, output_dim, kernel_size=1, stride=2)
        else:
          self.conv1 = nn.Conv2d(input_dim, output_dim, kernel_size=k_size, padding=1)
          self.resconv = nn.Conv2d(input_dim, output_dim, kernel_size=1)

        if input_dim != output_dim:
          input_dim = output_dim
        self.bn1 = nn.BatchNorm2d(output_dim)
        self.conv2 = nn.Conv2d(input_dim, output_dim, kernel_size=k_size, padding=1)
        self.bn2 = nn.BatchNorm2d(output_dim)

    def forward(self, x):
        y = x
      # Note shortcut will just be x if of same shape as out
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out)) 
        if (x.shape == out.shape):
          out = out + y
          out = F.relu(out)
          return out
        else:
          y = self.resconv(y)
          out = out.add(y)
          out = F.relu(out)
          return out

In [None]:
class ResNet(nn.Module):
  def __init__(self):
    super(ResNet, self).__init__()
    self.conv1 = nn.Conv2d(3, 16, 3, padding=1)
    self.bn1 = nn.BatchNorm2d(16)
    # 16 filter res blockss
    self.res1 = ResBlock(16, 16)
    self.res2 = ResBlock(16, 16)
    self.res3 = ResBlock(16, 16)
    # Now 32 filter res blocks
    self.res4 = ResBlock(16, 32, stride_bool=True)
    self.res5 = ResBlock(32, 32)
    self.res6 = ResBlock(32, 32)
    # Now 64 filter res blocks 
    self.res7 = ResBlock(32, 64, stride_bool=True)
    self.res8 = ResBlock(64, 64)
    self.res9 = ResBlock(64, 64)

    self.fc = nn.Linear(64, 10)
    self.fc2 = nn.Linear(64,4)

  def forward(self, x):
    out = F.relu(self.bn1(self.conv1(x)))

    # ReLU contained within forward method of res blocks
    
    out = self.res1(out)
    out = self.res2(out)
    out = self.res3(out)

    out = self.res4(out)
    out = self.res5(out)
    out = self.res6(out)

    out = self.res7(out)
    out = self.res8(out)
    out = self.res9(out)

    out = F.adaptive_avg_pool2d(out, 1)
    out = out.view(out.size(0), -1)
    outt = self.fc(out)
    outr = self.fc2(out)
    # out = F.softmax(out)
    return outt, outr

## Step 1: Set up preprocessing functions
Preprocessing is very important as discussed in the lecture.
You will need to write preprocessing functions with the help of *torchvision.transforms* in this step.
You can find helpful tutorial/API at [here](https://pytorch.org/vision/stable/transforms.html).

### Question (b)
For the question, you need to:
1. Complete the preprocessing code below.
2. **In the PDF report**, briefly describe what preprocessing operations you used and what are the purposes of them.

Hint: 
1. Only two operations are necessary to complete the basic preprocessing here.
2. The raw input read from the dataset will be PIL images.
3. Data augmentation operations are not mendatory, but feel free to incorporate them if you want.
4. Reference value for mean/std of CIFAR-10 images (assuming the pixel values are within [0,1]): mean (RGB-format): (0.4914, 0.4822, 0.4465), std (RGB-format): (0.2023, 0.1994, 0.2010)

In [None]:
# useful libraries
import torchvision
import torchvision.transforms as transforms

#############################################
# your code here
# specify preprocessing function
transform_train = transforms.Compose([transforms.ToTensor(),
                                      # transforms.Normalize(mean=(0.4914, 0.4822, 0.4465), std=(0.2023, 0.1994, 0.2010)),
                                     # transforms.RandomCrop(size=(32,32), padding=4),
                                      # transforms.RandomHorizontalFlip(), 
                                      # transforms.RandomRotation(degrees=15)
]) 
                                      # transforms.GaussianBlur(kernel_size=5, sigma = 0.5)])

transform_val = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=(0.4914, 0.4822, 0.4465), std=(0.2023, 0.1994, 0.2010))])
#############################################


## Step 2: Set up dataset and dataloader

### Question (c)
Set up the train/val datasets and dataloders that are to be used during the training. Check out the [official API](https://pytorch.org/docs/stable/data.html) for more information about **torch.utils.data.DataLoader**.

Here, you need to:
1. Complete the code below.

In [None]:
# do NOT change these
#import tools
from tools.dataset import CIFAR10
from torch.utils.data import DataLoader

# a few arguments, do NOT change these
DATA_ROOT = "./data"
TRAIN_BATCH_SIZE = 256
VAL_BATCH_SIZE = 100

#############################################
# your code here
# construct dataset
train_set = CIFAR10(
    root=DATA_ROOT, 
    mode='train', 
    download=True,
    transform=transform_train    # your code
)

val_set = CIFAR10(
    root=DATA_ROOT, 
    mode='val', 
    download=True,
    transform=transform_val    # your code
)

# construct dataloader
# train_loader = DataLoader(
#     train_set, 
#     batch_size=256,  # your code
#     shuffle=True,     # your code
#     num_workers=4
# )
# val_loader = DataLoader(
#     val_set, 
#     batch_size=256,  # your code
#     shuffle=False,     # your code
#     num_workers=4
# )
#############################################

Downloading https://www.dropbox.com/s/s8orza214q45b23/cifar10_trainval_F22.zip?dl=1 to ./data/cifar10_trainval_F22.zip


0it [00:00, ?it/s]

Extracting ./data/cifar10_trainval_F22.zip to ./data
Files already downloaded and verified
Using downloaded and verified file: ./data/cifar10_trainval_F22.zip
Extracting ./data/cifar10_trainval_F22.zip to ./data
Files already downloaded and verified


In [None]:
# PERTURB OUR TRAINING DATA HERE
inputs_changed = []
targets_changed = []
i = 0
print(type(train_set))
for inputs, targets in train_set:
  # print(inputs.shape, targets)
  #print(inputs.type)

  inputs = inputs.detach().numpy()
  input0 = inputs
  # input1 = np.rot90(inputs, 1, axes=(1,2))
  # input2 = np.rot90(inputs, 2, axes=(1,2))
  # input3 = np.rot90(inputs, 3, axes=(1,2))
  inputs_changed.append(input0)
  # inputs_changed.append(input1)
  # inputs_changed.append(input2)
  # inputs_changed.append(input3)
  targets_changed.append([targets] + [0])
  # targets_changed.append([targets] + [1])
  # targets_changed.append([targets] + [2])
  # targets_changed.append([targets]+ [3])
  i += 1
  # if i ==10:
  #   break
# print(train_set)

<class 'tools.dataset.CIFAR10'>


In [None]:
# # PERTURB OUR TRAINING DATA HERE
# print('this is trying to create the stacked ones')
# inputs_changed = []
# targets_changed = []
# i = 0
# for inputs, targets in train_set:
#   # print(inputs.shape, targets)
#   # print(i)
#   inputs = inputs.numpy()
#   input = np.concatenate((inputs, np.rot90(inputs, 1, axes=(1,2)), np.rot90(inputs, 2, axes =(1,2)), np.rot90(inputs, 3, axes = (1,2))), 0)
#   inputs_changed.append(input)
#   target = np.vstack(([targets] + [0], [targets] + [1], [targets] + [2], [targets] + [3]))
#   targets_changed.append(target)
#   i += 1
# # print(train_set)

In [None]:
print(targets_changed[0])

[1, 0]


In [None]:
inputs_changed = np.array(inputs_changed, dtype = 'float64')
targets_changed = np.array(targets_changed)

# print('HEY LUIS I MADE A CHANGE HERE - THE TWO LINES ABOVE USED TO BE COMMENTED OUT, BUT THIS MORNING I HAD TO UNCOMMENT THEM')
# inputs_changed[0].shape
# print(targets_changed.shape)

## Step 3: Instantiate your SimpleNN model and deploy it to GPU devices.
### Question (d)
You may want to deploy your model to GPU device for efficient training. Please assign your model to GPU if possible. If you are training on a machine without GPUs, please deploy your model to CPUs.

Here, you need to:
1. Complete the code below.
2. **In the PDF report**, briefly describe how you verify that your model is indeed deployed on GPU. (Hint: check $\texttt{nvidia-smi}$.)

In [None]:
# specify the device for computation
#############################################
# your code here
# GPU check                
device = 'cuda' if torch.cuda.is_available() else 'cpu'
if device =='cuda':
    print("Run on GPU...")
else:
    print("Run on CPU...")
  
    
#############################################

Run on GPU...


In [None]:
from torch.utils.data import TensorDataset
features = torch.from_numpy(inputs_changed).type(torch.float32)
labels = torch.from_numpy(targets_changed)

training_set = TensorDataset(features,labels)

# CIFAR10( --- Was trying to see if there was a way we could transform after perturbing, but doesn't seem to be the case
#     root=DATA_ROOT, 
#     mode='train', 
#     download=True,
#     transform=transform_train    # your code
# )

In [None]:
train_loader = DataLoader(
    training_set, 
    batch_size=256,  # your code
    shuffle=True,     # your code
    num_workers=4
)

val_loader = DataLoader(
    val_set, 
    batch_size=256,  # your code
    shuffle=False,     # your code
    num_workers=4
)



In [None]:
!nvidia-smi

Tue Dec 13 06:28:18 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   57C    P0    28W /  70W |      3MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

## Step 4: Set up the loss function and optimizer
Loss function/objective function is used to provide "feedback" for the neural networks. Typically, we use multi-class cross-entropy as the loss function for classification models. As for the optimizer, we will use SGD with momentum. 

### Question (e)
Here, you need to:
1. Set up the cross-entropy loss as the criterion. (Hint: there are implemented functions in **torch.nn**)
2. Specify a SGD optimizer with momentum. (Hint: there are implemented functions in **torch.optim**)

In [None]:
import torch.nn as nn
import torch.optim as optim

net = ResNet().to(device)
# hyperparameters, do NOT change right now
# initial learning rate
INITIAL_LR = 0.1

# momentum for optimizer
MOMENTUM = 0.9

# L2 regularization strength
REG = 1e-5

#############################################
# your code here
# create loss function
criterion = nn.CrossEntropyLoss().to(device)

# Add optimizer
optimizer = optim.SGD(net.parameters(), lr=0.1, momentum=0.9, weight_decay = REG) #momentum value from AlexNet
#############################################

## Step 5: Start the training process.

### Question (f)/(g)
Congratulations! You have completed all of the previous steps and it is time to train our neural network.

Here you need to:
1. Complete the training codes.
2. Actually perform the training.

Hint: Training a neural network usually repeats the following 4 steps: 

**i) Get a batch of data from the dataloader and copy it to your device (GPU).**

**ii) Do a forward pass to get the outputs from the neural network and compute the loss. Be careful about your inputs to the loss function. Are the inputs required to be the logits or softmax probabilities?)**

**iii) Do a backward pass (back-propagation) to compute gradients of all weights with respect to the loss.**

**iiii) Update the model weights with the optimizer.**

You will also need to compute the accuracy of training/validation samples to track your model's performance over each epoch (the accuracy should be increasing as you train for more and more epochs).


## Only data augmentation


In [None]:
# Just data augmentation

# total number of training epochs
EPOCHS = 200
 
lam = .5
# the folder where the trained model is saved
CHECKPOINT_FOLDER = "./saved_model"

# start the training/validation process
# the process should take about 5 minutes on a GTX 1070-Ti
# if the code is written efficiently.
best_val_acc = 0
current_learning_rate = INITIAL_LR

# Calculate initial loss 
initial_loss = 0
for batch_idx, (inputs, targets) in enumerate(train_loader):
  # print(inputs[:,:3,:,:].shape)
  inputs = inputs.to(device)
  targets = targets.to(device)
  
  # compute the output and loss
  # outputs = net(inputs[:,:3,:,:]) #this is how to seperate out images - this will be the unrotated image
  outputs = net(inputs)
  if batch_idx ==1:
    print('initial loss from each head makes sense! Goood news')
    # print((outputs[0][1]))
    # print(max(targets[:,0]))
    # print(min(targets[:,1]))
    # print(outputs[0][1])
  # print(targets.shape)
  initial_loss_10 = criterion(outputs[0], targets[:,0]) #these targets will be the tuple that corresponds to the unrotated image! - CHANGED FOR THE BASE CASE
  # initial_loss_4 = criterion(outputs[1], targets[:,1])
  # initial_loss += loss
print("Initial loss for classi: %.4f" %(initial_loss_10))
# print("Initial loss for rotay: %.4f" %(initial_loss_4))

print("==> Training starts!")
print("="*50)
for i in range(0, EPOCHS):
#     # handle the learning rate scheduler.
  #  if i % DECAY_EPOCHS == 0 and i != 0:
#         current_learning_rate = current_learning_rate * DECAY
#         for param_group in optimizer.param_groups:
#             param_group['lr'] = current_learning_rate
#         print("Current learning rate has decayed to %f" %current_learning_rate)
    if 140 > i > 100  :
      current_learning_rate = 0.1
      for param_group in optimizer.param_groups:
          param_group['lr'] = current_learning_rate
      print("Current learning rate has decayed to %f" %current_learning_rate)
    if 170 > i > 140  :
      current_learning_rate = 0.01
      momentum = 0.6
      for param_group in optimizer.param_groups:
          param_group['lr'] = current_learning_rate
          param_group['momentum'] = momentum
      print("Current learning rate has decayed to %f" %current_learning_rate)
    if i > 170  :
      current_learning_rate = 0.001
      momentum = 0.3
      for param_group in optimizer.param_groups:
          param_group['lr'] = current_learning_rate
          param_group['momentum'] = momentum
      print("Current learning rate has decayed to %f" %current_learning_rate)
    
    
    #######################
    # your code here
    # switch to train mode
    net.train()
    #######################

    print("Epoch %d:" %i)
    # this help you compute the training accuracy
    total_examples = 0
    correct_examples = 0

    train_loss = 0 # track training loss if you want
    
    # Train the model for 1 epoch.
    for batch_idx, (inputs, targets) in enumerate(train_loader):
        ####################################
        # your code here
        # copy inputs to device
        inputs = inputs.to(device)
        targets = targets.to(device)
        # compute the output and loss
        output = net(inputs)
        # output0 = net(inputs[:,:3,:,:])
        # output90 = net(inputs[:,3:6,:,:])
        # output180 = net(inputs[:,6:9,:,:])
        # output270 = net(inputs[:,9:12,:,:]) # This is admittedly naive - but we are doing it this way so our net is easily adapated to combine the image pertubation methods!
        
        loss = criterion(output[0], targets[:,0])
        # loss = criterion(output0[0], targets[:,0,0]) # this is loss from classification head on unrotated image
        # loss += lam * .25* criterion(output0[1], targets[:,0,1]) #now adding in weighted rotation loss for unrotated
        # loss += lam * .25* criterion(output90[1], targets[:,1,1]) #again, weighted rotation loss for 90 degree
        # loss += lam * .25* criterion(output180[1], targets[:,2,1]) #again, weighted rotation loss for 180 degree
        # loss += lam * .25* criterion(output270[1], targets[:,3,1]) #again, weighted rotation loss for 270 degree
        
          
        train_loss += loss
        # zero the gradient
        optimizer.zero_grad()
        
        # backpropagation
        loss.backward()
        
        # apply gradient and update the weights
        optimizer.step()
        
        # count the number of correctly classified samples in the current batch
        _, predicted = torch.max(output[0], 1) 


        ###


        ## THIS RIGHT HERE, UNSURE ABOUT, DIRECT ABOVE< IS THIS RIGHT? IS THIS WRONG? I DON'T KNOW
        # line with the torch.max, used to just be outputs, not outputs[0]
        #also for the line below, is this correct? - I think I am only looking at percetange of correct for our image classification
        # not also considering percet correct for our  rotations (i.e only seeing if predicted right, not predicted right and rotation right)



        ###
        correct = predicted.eq(targets[:,0]).sum()

        total_examples += targets.shape[0]
        correct_examples += correct.item()
        ####################################
                
    avg_loss = train_loss / len(train_loader)
    avg_acc = correct_examples / total_examples
    print("Training loss: %.4f, Training accuracy: %.4f" %(avg_loss, avg_acc))

    # Validate on the validation dataset
    #######################
    # your code here
    # switch to eval mode
    net.eval()
    
    #######################

    # this help you compute the validation accuracy
    total_examples = 0
    correct_examples = 0
    
    val_loss = 0 # again, track the validation loss if you want

    # disable gradient during validation, which can save GPU memory
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(val_loader):
            ####################################
            # your code here
            # copy inputs to device
            inputs = inputs.to(device)
            targets = targets.to(device)
            # compute the output and loss
            outputs = net(inputs)
            val_loss += criterion(outputs[0], targets)
            
            # count the number of correctly predicted samples in the current batch





            #####


            #ALSO THE PART BELOW HERE, SAME QUESTION AS DURING TRAINING


            #######
            _, predicted = torch.max(outputs[0], 1)
            correct = predicted.eq(targets).sum()

            total_examples += targets.shape[0]
            correct_examples += correct.item()
            ####################################

    avg_loss = val_loss / len(val_loader)
    avg_acc = correct_examples / total_examples
    print("Validation loss: %.4f, Validation accuracy: %.4f" % (avg_loss, avg_acc))
    
    # save the model checkpoint
    if avg_acc > best_val_acc:
        best_val_acc = avg_acc
        if not os.path.exists(CHECKPOINT_FOLDER):
           os.makedirs(CHECKPOINT_FOLDER)
        print("Saving ...")
        state = {'state_dict': net.state_dict(),
                'epoch': i,
                'lr': current_learning_rate}
        torch.save(state, os.path.join(CHECKPOINT_FOLDER, 'resnet_auxrot.pth'))
        # model.load_state_dict(state_dict["state_dict"])
    print('')

print("="*50)
print(f"==> Optimization finished! Best validation accuracy: {best_val_acc:.4f}")

this is now only considering the loss as a result of our rotation head - i.e we need to feed it a model that was already trained on its classification head!
so we need to load in our pre trained model for this guy
also our hyper parameter is how much we weight our loss by in this setting
initial loss from each head makes sense! Goood news
Initial loss for classi: 2.5759
==> Training starts!
Epoch 0:
Training loss: 1.7304, Training accuracy: 0.3484
Validation loss: 1.8479, Validation accuracy: 0.3942
Saving ...

Epoch 1:
Training loss: 1.3671, Training accuracy: 0.4987
Validation loss: 1.4180, Validation accuracy: 0.4922
Saving ...

Epoch 2:
Training loss: 1.1396, Training accuracy: 0.5878
Validation loss: 1.4036, Validation accuracy: 0.5392
Saving ...

Epoch 3:
Training loss: 0.9809, Training accuracy: 0.6496
Validation loss: 0.9532, Validation accuracy: 0.6626
Saving ...

Epoch 4:
Training loss: 0.8559, Training accuracy: 0.6972
Validation loss: 0.9509, Validation accuracy: 0.6768
Sav

**Now trying there way**

In [None]:
import itertools
import torchvision.transforms as trn
import cv2
import torchvision.transforms.functional as trnF
import opencv_functional as cv2f

expanded_params = ((0, -56, 56), (0, -56, 56))

shift = np.cumsum([0] + [len(p) for p in expanded_params[:-1]]).tolist()
num_params = [len(expanded_params[i]) for i in range(len(expanded_params))]
n_p1, n_p2 = num_params[0], num_params[1]
output_dim = sum(num_params) + 4  # +4 due to four rotations

pert_configs = []
for tx, ty in itertools.product(*expanded_params):
    pert_configs.append((tx, ty))

num_perts = len(pert_configs)

resize_and_crop = trn.Compose([trn.Resize(256), trn.RandomCrop(224)])


class PerturbDataset(torch.utils.data.Dataset):

    def __init__(self, dataset, train_mode=True):
        self.dataset = dataset
        self.train_mode = train_mode

    def __getitem__(self, index):
        x, _ = self.dataset[index // num_perts]
        pert = pert_configs[index % num_perts]

        x = np.asarray(resize_and_crop(x))

        if np.random.uniform() < 0.5:
            x = x[:, ::-1]
        x = cv2f.affine(np.asarray(x), 0, (pert[0], pert[1]), 1, 0,
                        interpolation=cv2.INTER_LINEAR, mode=cv2.BORDER_REFLECT_101)

        label = [expanded_params[i].index(pert[i]) for i in range(len(expanded_params))]
        label = np.vstack((label + [0], label + [1], label + [2], label + [3]))

        x = trnF.to_tensor(x.copy()).unsqueeze(0).numpy()
        x = np.concatenate((x, np.rot90(x, 1, axes=(2, 3)),
                            np.rot90(x, 2, axes=(2, 3)), np.rot90(x, 3, axes=(2, 3))), 0)

        return torch.FloatTensor(x), label