# Assignment 3 - Deep Learning
# Names: Roy Madpis, Michael Kobiavanov
## IDs: 319091526, 206814485
## Question 1

In [3]:
! pip install prettytable



In [1]:
#import imageio
import matplotlib
import matplotlib.pyplot as plt
import torch
import torchvision
import os
import random
import numpy as np
import pandas as pd
import math
from PIL import Image 

import time
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"

import seaborn as sns

from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import MinMaxScaler 
from sklearn.preprocessing import StandardScaler 

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import accuracy_score
from sklearn.metrics import r2_score

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#################################################################
#The following function compute the number of parameters in each layer in a neural net model
#and stores the total number of parameters
from prettytable import PrettyTable

def count_parameters(model):
    table = PrettyTable(["Modules", "Parameters"])
    total_params = 0
    for name, parameter in model.named_parameters():
        if not parameter.requires_grad: continue
        param = parameter.numel()
        table.add_row([name, param])
        total_params+=param
    print(table)
    print(f"Total Trainable Params: {total_params}")
    return total_params
    
#total_params = count_parameters(model_name)

# <font color = crimson > Question 1 (70%) : </font>

CIFAR-10 is another MNIST-like repository of images.

### a. Your task is to explore performance of several neural network architectures.
**Choose combinations of:**
+ fully connected
+ convolutional
+ dropout
+ max pooling
+ and batch normalization layers.

**Then :**
- Compare rates of training and inference (time per sample)
- and accuracy on **train**, **validation**, and **test sets**.
- Present your results in informative, simple to understand and visually appealing manner (e.g. summary table or graphs). 
- Explain your conclusions (briefly). 

## Step 1: CIFAR-10 Data Loading + change to tensor type 
+ Each row of the array stores a 32x32 colour image

In [5]:
# from Mnist_Data.data_utils import load_CIFAR10
# cifar10_dir = 'Mnist_Data/cifar-10-batches-py'

# # Cleaning up variables to prevent loading data multiple times (which may cause memory issue)
# try:
#     del X_train, y_train
#     del X_test, y_test
#     print('Clear previously loaded data.')
# except:
#     pass

# X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
# #reshaping to get data in shape: KX3X32X32 #k=num of images
# X_train = X_train.reshape(-1,3,32,32)
# X_test = X_test.reshape(-1,3,32,32)

# #changing to type Tensor
# X_train_t = torch.tensor(X_train, dtype = torch.uint8)
# X_test_t = torch.tensor(X_test, dtype = torch.uint8)
# y_train_t = torch.tensor(y_train, dtype = torch.uint8)
# y_test_t = torch.tensor(y_test, dtype = torch.uint8)

# #creating validation set -  splitting to training and validation set
# X_train_t, X_validation_t, y_train_t, y_validation_t=train_test_split(X_train_t, y_train_t, test_size = 0.05,
#                                                                       shuffle=True, random_state = 1998)

# # As a sanity check, we print out the size of the training and test data.

# print('Training data shape: ', X_train_t.shape) #print('Training data shape: ', X_train.shape)
# print('Training labels shape: ', y_train_t.shape) #print('Training labels shape: ', y_train.shape)
# print("---------------------------------------------------------")
# print('Validation data shape: ', X_validation_t.shape)
# print('Validation labels shape: ', y_validation_t.shape)
# print("---------------------------------------------------------")
# print('Test data shape: ', X_test_t.shape) #print('Test data shape: ', X_test.shape)
# print('Test labels shape: ', y_test_t.shape) #print('Test labels shape: ', y_test.shape)


In [2]:
cifar_train = torchvision.datasets.CIFAR10(root="./data", train = True, download = True)
cifar_test = torchvision.datasets.CIFAR10(root="./data", train = False, download = True)

Files already downloaded and verified
Files already downloaded and verified


In [3]:
# load data - this may take some time!
print("loading the data, this may take some time!")
print()
cifar_train = torchvision.datasets.CIFAR10(root="./data", train = True, download = True)
cifar_test = torchvision.datasets.CIFAR10(root="./data", train = False, download = True)
#################################################
x_train = cifar_train.data
y_train = cifar_train.targets
x_test = cifar_test.data
y_test = cifar_test.targets

#reshaping to get data in shape: KX3X32X32 #k=num of images
X_train = x_train.reshape(-1,3,32,32)
X_test = x_test.reshape(-1,3,32,32)
#changing to type Tensor
X_train_t = torch.tensor(X_train, dtype = torch.uint8)
X_test_t = torch.tensor(X_test, dtype = torch.uint8)
y_train_t = torch.tensor(y_train, dtype = torch.uint8)
y_test_t = torch.tensor(y_test, dtype = torch.uint8)

#creating validation set -  splitting to training and validation set
X_train_t, X_validation_t, y_train_t, y_validation_t=train_test_split(X_train_t, y_train_t, test_size = 0.05,
                                                                      shuffle=True, random_state = 1998)

# As a sanity check, we print out the size of the training and test data.

print('Training data shape: ', X_train_t.shape) #print('Training data shape: ', X_train.shape)
print('Training labels shape: ', y_train_t.shape) #print('Training labels shape: ', y_train.shape)
print("---------------------------------------------------------")
print('Validation data shape: ', X_validation_t.shape)
print('Validation labels shape: ', y_validation_t.shape)
print("---------------------------------------------------------")
print('Test data shape: ', X_test_t.shape) #print('Test data shape: ', X_test.shape)
print('Test labels shape: ', y_test_t.shape) #print('Test labels shape: ', y_test.shape)

loading the data, this may take some time!

Files already downloaded and verified
Files already downloaded and verified
Training data shape:  torch.Size([47500, 3, 32, 32])
Training labels shape:  torch.Size([47500])
---------------------------------------------------------
Validation data shape:  torch.Size([2500, 3, 32, 32])
Validation labels shape:  torch.Size([2500])
---------------------------------------------------------
Test data shape:  torch.Size([10000, 3, 32, 32])
Test labels shape:  torch.Size([10000])


## Step 2 - Normalizing the data

As we are dealing with **images**, then we know that each pixel can have a value between 0 to 255, thus if we divide the X vector with 255 we will get values between 0 to 1.

**Note** that before normalizing the data, we must convert it first into **torch.float32**.

In [4]:
X_train_t_normalize = X_train_t.to(dtype = torch.float32)/255
X_validation_t_normalize = X_validation_t.to(dtype = torch.float32)/255
X_test_t_normalize = X_test_t.to(dtype = torch.float32)/255

#sanity check: we need to get that the max value is 1 and that the min value is 0:
print("Max value after normalization: ", torch.max(X_train_t_normalize).item()) # the .item() will retrieve only the value
print("Min value after normalization: ", torch.min(X_validation_t_normalize).item())

Max value after normalization:  1.0
Min value after normalization:  0.0


In [5]:
#The function retrives 4 outputs regarding the shape we will get after the conv layer:
def conv_output_shape(conv_layer, H_in = 32, W_in = 32, verbose = True):
    out_channels = conv_layer.out_channels
    in_channels = conv_layer.in_channels
    kernel_size = conv_layer.kernel_size
    stride = conv_layer.stride
    padding = conv_layer.padding
    dilation = conv_layer.dilation
    
    H_out = math.floor(((H_in + 2*padding[0] - dilation[0]*(kernel_size[0] - 1) -1)/stride[0]) +1)
    W_out =  math.floor(((W_in + 2*padding[1] - dilation[1]*(kernel_size[1] - 1) -1)/stride[1]) +1)
    
    if verbose:
        print("Input Channels =", in_channels, " | Output channels =", out_channels)
        print("Output shape: num_images X", out_channels, "X", H_out, "X", W_out)
    return out_channels, H_out, W_out



def conv_maxpool_output_shape(conv_layer, maxpoolayer, H_in = 32, W_in = 32, verbose = True):
    out_channels = conv_layer.out_channels
    in_channels = conv_layer.in_channels
    kernel_size = conv_layer.kernel_size
    stride = conv_layer.stride
    padding = conv_layer.padding
    dilation = conv_layer.dilation
    H_out_conv = math.floor(((H_in + 2*padding[0] - dilation[0]*(kernel_size[0] - 1) -1)/stride[0]) +1)
    W_out_conv =  math.floor(((W_in + 2*padding[1] - dilation[1]*(kernel_size[1] - 1) -1)/stride[1]) +1)
    
    kernel_size_pool = maxpoolayer.kernel_size
    stride_pool = maxpoolayer.stride
    padding_pool = maxpoolayer.padding
    dilation_pool = maxpoolayer.dilation
    #H_out_pool = math.floor(((H_out_conv + 2*padding_pool[0] - dilation_pool[0]*(kernel_size_pool[0] - 1) -1)/stride_pool[0]) +1)
    #W_out_pool =  math.floor(((W_out_conv + 2*padding_pool[1] - dilation_pool[1]*(kernel_size_pool[1] - 1) -1)/stride_pool[1]) +1)
    H_out_pool = math.floor(((H_out_conv + 2*padding_pool - dilation_pool*(kernel_size_pool - 1) -1)/stride_pool) +1)
    W_out_pool =  math.floor(((W_out_conv + 2*padding_pool - dilation_pool*(kernel_size_pool - 1) -1)/stride_pool) +1)
    
    if verbose:
        print("Input Channels =", in_channels, " | Output channels =", out_channels)
        print("Output shape: num_images X", out_channels, "X", H_out_pool, "X", W_out_pool)
        
    return out_channels, H_out_pool, W_out_pool

In [6]:
num_images = 1
C_in = 3
H_in = 32
W_in = 32
#input_layer = [C_in*H_in*W_in]
print("Input shape:", num_images, "X", C_in, "X", H_in, "X", W_in)
#######################################3
out_channels = 50
kernel_size = (5,5) #Size of the convolving kernel
stride = (7,5) #stride (int or tuple, optional) – Stride of the convolution. Default: 1

padding = (4, 4) #Padding added to all four sides of the input. Default: 0
dilation = (1,1) # Spacing between kernel elements. Default: 1 - default = (1, 1)
conn1 = torch.nn.Conv2d(in_channels = C_in,
                out_channels = out_channels,
                kernel_size = kernel_size,
                stride = stride,
                padding = padding,
                dilation = dilation) 


maxpoolayer = torch.nn.MaxPool2d(kernel_size=2)




Input shape: 1 X 3 X 32 X 32


In [7]:
one_image = X_train_t_normalize[0:100]

print("Input was one image, the input shape:", one_image.shape)
conv_output = conn1(one_image)
maxpool_output = maxpoolayer(conv_output)
print("Conv Output shape:", conv_output.shape)
print("Conv Output shape:", maxpool_output.shape)

print("-----------------------------------------------------------------------")
conv_maxpool_output_shape(conv_layer=conn1, maxpoolayer=maxpoolayer, H_in = 32, W_in = 32)

Input was one image, the input shape: torch.Size([100, 3, 32, 32])
Conv Output shape: torch.Size([100, 50, 6, 8])
Conv Output shape: torch.Size([100, 50, 3, 4])
-----------------------------------------------------------------------
Input Channels = 3  | Output channels = 50
Output shape: num_images X 50 X 3 X 4


(50, 3, 4)

## Defining the 1st model -   <font color = purple > Net models class: </font> 

### This class enables defining a neural net model.

The class needs to get as input:
1. **input_layer** = the dimension of the input layer (insert as a list)
2. **layers** = a list containing the dimensions  of the different layers ( without The input and output layers)
3. **output_layer** =  the dimension of the output layer (insert as a list)


4. **drop_out** = if you wish to include drop out in each hidden layer, then pass a float between 0 to 1. **default** = *False*
5. **batch_norm** = if you wish to add a batch normalization layer. **default** = *False*


Note: the input data has 3 dimensions - R G B

In [8]:
input_layer = [3*32*32]
layers=[512, 256]
[input_layer[0]]+ layers

[3072, 512, 256]

In [9]:
class Net_models(torch.nn.Module): # derive your model from the basic torch.nn.Model
    def __init__(self, input_layer = [3*32*32], layers=[512, 256], output_layer = [10], drop_out=False,
                batch_norm = False):
        super(Net_models,self).__init__()
        self.flatten = torch.nn.Flatten()
        self.input_layer = input_layer[0]
        #output_layer = [10]
        layers = input_layer + layers + output_layer # default: 784x512x256x10        
        
        layers_list = []
        for Layer_i in range(1, len(layers)): #1,2,3,... according to the number of layers given
            layers_list.append(torch.nn.Linear(layers[Layer_i-1], layers[Layer_i]) )
            #self.fully_connected_1 = torch.nn.Linear(28*28, 512)

            if Layer_i < len(layers)-1: #if the layer i is not the last layer - add a ReLU 
                layers_list.append( torch.nn.ReLU() )
                if drop_out:
                    layers_list.append(torch.nn.Dropout(drop_out))
                if batch_norm:
                    layers_list.append(torch.nn.BatchNorm1d(layers[Layer_i]))

        #if the layer i is the last layer - add a sog-softmax 
        layers_list.append( torch.nn.LogSoftmax())
        
        self.layers_stack = torch.nn.Sequential(*layers_list) #this will enable getting params that can be trained
        self.layers = layers_list 
        
    def forward(self,x):        
        x = x.view(-1,self.input_layer) # flatten image input. This code will work for input as vector or image. will also work for batches. 
        #x = self.flatten(x)
        logits = self.layers_stack(x)
        
        #for layer_index, layer in enumerate(self.layers): 
        #    x = self.layer(x)
        return logits

## Defining the 2nd  model -   <font color = purple > Net models class with covolusion: </font> 

### This class enables defining a neural net model.

The class needs to get as input:
1. **input_layer** = the dimension of the input layer (insert as a list)
2. **layers** = a list containing the dimensions  of the different layers ( without The input and output layers)
3. **output_layer** =  the dimension of the output layer (insert as a list)


4. **drop_out** = if you wish to include drop out in each hidden layer, then pass a float between 0 to 1. **default** = *False*
5. **batch_norm** = if you wish to add a batch normalization layer. **default** = *False*


Note: the input data has 3 dimensions - R G B

In [10]:
class Net_models_conv(torch.nn.Module): # derive your model from the basic torch.nn.Model
    def __init__(self, C_in = 3, H_in = 32, W_in = 32, layers=[512, 256], output_layer = [10],
                 drop_out=False,
                 batch_norm = False):
        super(Net_models_conv,self).__init__()
        self.C_in = C_in
        self.H_in = H_in
        self.W_in = W_in
        
        ###################################################
        conv1 = torch.nn.Conv2d(in_channels = C_in, #3
                                     out_channels=20,
                                     kernel_size=4,
                                     padding = 1) 
        relu1 = torch.nn.ReLU() 
        
        out_channels_conv1, H_out_conv1, W_out_conv1 = conv_output_shape(conv_layer = conv1,
                                                                         H_in = self.H_in,
                                                                         W_in = self.W_in, verbose = False)
        
        ###################################################
        conv2 = torch.nn.Conv2d(in_channels= conv1.out_channels, #16
                                     out_channels=4,
                                     kernel_size=5,
                                     padding  = 2) 
        relu2 = torch.nn.ReLU()

        out_channels_conv2, H_out_conv2, W_out_conv2 = conv_output_shape(conv_layer = conv2,
                                                                         H_in = H_out_conv1,
                                                                         W_in = W_out_conv1, verbose = False)
        ###################################################
        conv_layers_list = [conv1, relu1, conv2, relu2 ]
        self.conv_layers = conv_layers_list
        self.conv_layers_stack = torch.nn.Sequential(*conv_layers_list) #this will enable getting params that can be trained

        input_layer_for_fully_connected_layer_1 = [out_channels_conv2*H_out_conv2*W_out_conv2]
        layers = input_layer_for_fully_connected_layer_1 + layers + output_layer # default: 3072x512x256x10        
        
        layers_list = []
        for Layer_i in range(1, len(layers)): #1,2,3,... according to the number of layers given
            
            layers_list.append(torch.nn.Linear(layers[Layer_i-1], layers[Layer_i]) )

            if Layer_i < len(layers)-1: #if the layer i is not the last layer - add a ReLU 
                layers_list.append( torch.nn.ReLU() )
                if drop_out:
                    layers_list.append(torch.nn.Dropout(drop_out))
                if batch_norm:
                    layers_list.append(torch.nn.BatchNorm1d(layers[Layer_i]))

        #if the layer i is the last layer - add a sog-softmax 
        layers_list.append(torch.nn.LogSoftmax())
        
        self.layers_stack = torch.nn.Sequential(*layers_list) #this will enable getting params that can be trained
        
        self.layers = layers_list 
    def forward(self,x):        
        x = x.view(-1,self.C_in,self.H_in,self.W_in) # flatten image input. This code will work for input as vector or image. will also work for batches. 
        batch_size = x.shape[0] 

        x = self.conv_layers_stack(x)
        x = x.view(batch_size, -1) # flattern the "image". 
        logits = self.layers_stack(x)
        
        #for layer_index, layer in enumerate(self.layers): 
        #    x = self.layer(x)
        return logits

## Defining the 3rd  model -   <font color = purple > Net models class with covolusion and max pool: </font> 

### This class enables defining a neural net model.

The class needs to get as input:
1. **input_layer** = the dimension of the input layer (insert as a list)
2. **layers** = a list containing the dimensions  of the different layers ( without The input and output layers)
3. **output_layer** =  the dimension of the output layer (insert as a list)


4. **drop_out** = if you wish to include drop out in each hidden layer, then pass a float between 0 to 1. **default** = *False*
5. **batch_norm** = if you wish to add a batch normalization layer. **default** = *False*


Note: the input data has 3 dimensions - R G B

In [11]:
model_conv = Net_models_conv().to(device=device)
print(model_conv)

count_parameters(model_conv)

Net_models_conv(
  (conv_layers_stack): Sequential(
    (0): Conv2d(3, 20, kernel_size=(4, 4), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(20, 4, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (3): ReLU()
  )
  (layers_stack): Sequential(
    (0): Linear(in_features=3844, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=256, bias=True)
    (3): ReLU()
    (4): Linear(in_features=256, out_features=10, bias=True)
    (5): LogSoftmax(dim=None)
  )
)
+----------------------------+------------+
|          Modules           | Parameters |
+----------------------------+------------+
| conv_layers_stack.0.weight |    960     |
|  conv_layers_stack.0.bias  |     20     |
| conv_layers_stack.2.weight |    2000    |
|  conv_layers_stack.2.bias  |     4      |
|   layers_stack.0.weight    |  1968128   |
|    layers_stack.0.bias     |    512     |
|   layers_stack.2.weight    |   131072   |
|    layers_stack.2.bias     |    256   

2105522

In [12]:
class Net_models_conv_maxpool(torch.nn.Module): # derive your model from the basic torch.nn.Model
    def __init__(self, C_in = 3, H_in = 32, W_in = 32, layers=[512, 256], output_layer = [10],
                 drop_out=False,
                 batch_norm = False,
                conv1_out_channels = 16, conv1_kernel_size = 5, conv1_padding = 2, maxpool1_kernel_size = 2,
                conv2_out_channels = 4, conv2_kernel_size = 5, conv2_padding = 2, maxpool2_kernel_size = 2,
                conv3_out_channels = 4, conv3_kernel_size = 5, conv3_padding = 2, maxpool3_kernel_size = 2):
        super(Net_models_conv_maxpool,self).__init__()
        self.C_in = C_in
        self.H_in = H_in
        self.W_in = W_in
        
        ######################## conv layer 1 - conv + relu + maxpool ###########################
        conv1 = torch.nn.Conv2d(in_channels = C_in, #3
                                     out_channels=conv1_out_channels,
                                     kernel_size=conv1_kernel_size,
                                     padding = conv1_padding) 
        batch_norm_1 = torch.nn.BatchNorm2d(conv1_out_channels)
        relu1 = torch.nn.ReLU() 
        maxpool1 =  torch.nn.MaxPool2d(kernel_size=maxpool1_kernel_size)
        out_channels_conv1, H_out_conv1, W_out_conv1 = conv_maxpool_output_shape(conv_layer = conv1,
                                                                                 maxpoolayer = maxpool1,
                                                                                 H_in = self.H_in,
                                                                                 W_in = self.W_in, verbose = False)
        ######################## conv layer 2 - conv + relu + maxpool ###########################
        conv2 = torch.nn.Conv2d(in_channels= conv1.out_channels, #16
                                     out_channels=conv2_out_channels,
                                     kernel_size=conv2_kernel_size,
                                     padding  = conv2_padding) 
        batch_norm_2 = torch.nn.BatchNorm2d(conv2_out_channels)
        relu2 = torch.nn.ReLU()
        maxpool2 =  torch.nn.MaxPool2d(kernel_size=maxpool2_kernel_size)
        out_channels_conv2, H_out_conv2, W_out_conv2 = conv_maxpool_output_shape(conv_layer = conv2,
                                                                                 maxpoolayer=maxpool2,
                                                                                 H_in = H_out_conv1,
                                                                                 W_in = W_out_conv1, verbose = False)
        ######################## conv layer 3 - conv + relu + maxpool ###########################
        conv3 = torch.nn.Conv2d(in_channels= conv2.out_channels, #16
                                     out_channels=conv3_out_channels,
                                     kernel_size=conv3_kernel_size,
                                     padding  = conv3_padding) 
        batch_norm_3 = torch.nn.BatchNorm2d(conv3_out_channels)
        relu3 = torch.nn.ReLU()
        maxpool3 =  torch.nn.MaxPool2d(kernel_size=maxpool3_kernel_size)
        out_channels_conv3, H_out_conv3, W_out_conv3 = conv_maxpool_output_shape(conv_layer = conv3,
                                                                                 maxpoolayer=maxpool3,
                                                                                 H_in = H_out_conv2,
                                                                                 W_in = W_out_conv2, verbose = False)
        ##################### combining all the con layers ##############################
        conv_layers_list = [conv1,batch_norm_1, relu1, maxpool1, conv2, batch_norm_2, relu2, maxpool2,
                           conv3, batch_norm_3, relu3, maxpool3]
        self.conv_layers = conv_layers_list
        self.conv_layers_stack = torch.nn.Sequential(*conv_layers_list) #this will enable getting params that can be trained
        
        #################################### fully connected layers (+relu) ###############
        input_layer_for_fully_connected_layer_1 = [out_channels_conv3*H_out_conv3*W_out_conv3]
        layers = input_layer_for_fully_connected_layer_1 + layers + output_layer # default: 3072x512x256x10        
        
        layers_list = []
        for Layer_i in range(1, len(layers)): #1,2,3,... according to the number of layers given
            
            layers_list.append(torch.nn.Linear(layers[Layer_i-1], layers[Layer_i]) )

            if Layer_i < len(layers)-1: #if the layer i is not the last layer - add a ReLU 
                layers_list.append( torch.nn.ReLU() )
                if drop_out:
                    layers_list.append(torch.nn.Dropout(drop_out))
                if batch_norm:
                    layers_list.append(torch.nn.BatchNorm1d(layers[Layer_i]))

        #if the layer i is the last layer - add a sog-softmax 
        layers_list.append(torch.nn.LogSoftmax())
        
        self.layers_stack = torch.nn.Sequential(*layers_list) #this will enable getting params that can be trained
        
        self.layers = layers_list 
    def forward(self,x):        
        x = x.view(-1,self.C_in,self.H_in,self.W_in) # flatten image input. This code will work for input as vector or image. will also work for batches. 
        batch_size = x.shape[0] 

        x = self.conv_layers_stack(x)
        x = x.view(batch_size, -1) # flattern the "image". 
        logits = self.layers_stack(x)
        
        #for layer_index, layer in enumerate(self.layers): 
        #    x = self.layer(x)
        return logits

In [13]:
model_conv_maxpool = Net_models_conv_maxpool().to(device=device)
print(model_conv_maxpool)

count_parameters(model_conv_maxpool)

Net_models_conv_maxpool(
  (conv_layers_stack): Sequential(
    (0): Conv2d(3, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(16, 4, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (5): BatchNorm2d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU()
    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): Conv2d(4, 4, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (9): BatchNorm2d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): ReLU()
    (11): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layers_stack): Sequential(
    (0): Linear(in_features=64, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_feat

170450

## Step 5 - Define loss funtion and optimizer
In this step we need to define a loss function and an optimizer


In [14]:
criterion = torch.nn.CrossEntropyLoss() #The loss function
optimizer = torch.optim.SGD(model_conv.parameters(),lr = 0.01) #The optimizer

##  <font color = blue > Defining Model Accuracy function </font> 
This function will enable calculating a **classification model** performance - get its accuracy. It can be used on the training set, validation set and the test set.

The function gets as input:
1. **model** - The model object we wish to measure its accuracy
2. **x_data** - torch.tensor containing the X matrix
3. **y_data** - torch.tensor containing the labels



In [15]:
def model_accuracy(model, x_data, y_data):  
    device = next(model.parameters()).device # get device on which the model is loaded. 
    model.eval() # must set model to evaluation mode to disable dropout!
    with torch.no_grad():
        #number_of_matches = 0
        x_data, y_data = x_data.to(device=device), y_data.to(device=device)
        _, max_index = model(x_data).max(dim=1)
        number_of_matches = (max_index==y_data).to(dtype=torch.float32, device=torch.device("cpu")).sum() 
        return number_of_matches/len(y_data)

## <font color = blue > Define Training function </font> Fot MLP - Multilayer perceptron 
### The function gets as input:

1. neural net model, untrained
2. x_train matrix (normalized and tensor type)
3. y_train vector (normalized and tensor type)
4. **Optimizer** - (The optimizer responsible to take the parameters (weights) that needs to be optimized and updates them according to the derivative
5. **Criterion** - the loss function

------------------------------- optional : -------------------------------

6. **optional: x_validation_normalize_t** (x validation matrix - normalized and of type torch.tensor)
7. **optional: y_validation_t** (y validation vector - normalized and of type torch.tensor)

If *x_validation_normalize_t* is not provided, everything will be fine, you just won't get the validation accuracy score along the way.

--------------------------------------------------------------
8. n_epochs = the number of epochs, default = 50
9. batch_size = the size of each batch in the training loop, default = 500
10. device - use GPU  or CPU?, default = cpu

In [16]:
def train_MLP_net_images(model, x_train_normalize_t, y_train_t, criterion, optimizer,
                         x_validation_normalize_t = None, y_validation_t = None,
                         n_epochs=50, batch_size=500, device="cpu"):
    ##########################################
    train_images_norm = x_train_normalize_t.to(device=device)
    train_labels = y_train_t.to(device=device)
        
    #checking if given validation set arguements
    if x_validation_normalize_t is None or y_validation_t is None:
        validation_set_checker = False
    else:
        validation_images = x_validation_normalize_t.to(device=device)
        validation_labels = y_validation_t.to(device=device)
        validation_set_checker = True
    ##########################################
    training_time_epochs = []
    progress_log = []
    model.to(device)
    
    for epoch in range(n_epochs):
        model.train() # set model into train mode. 
        current_loss = 0
        # prepare data for the current epoch (iterate over random indeces)
        train_indeces = torch.randperm( (len(train_images_norm)//batch_size)*batch_size ).to(device=device) #  len(train_indeces) is an integer number of batches. 
        train_indeces = train_indeces.view(-1, batch_size) # shape: # batches x batch size, each *row* is a batch
        
        
        training_time_batches = []
        for current_batch_indeces in train_indeces: #for each row of indexes
            train_data = train_images_norm[current_batch_indeces].to(dtype=torch.float32)
            train_data_labels = train_labels[current_batch_indeces].to(dtype=torch.int64)
            
            optimizer.zero_grad() # clear gradients of all model parameters.
            
            start_train_time = time.time() #train time calculation
            output = model(train_data) # forward pass: compute model predictions
            batch_train_time = time.time() - start_train_time
            training_time_batches.append(batch_train_time)
            
            loss = criterion(output, train_data_labels) # calculate the current loss
            loss.backward() # backward pass: compute gradient of the loss for all model parameters. 
            optimizer.step() # use gradients to update the model parameters 

            current_loss += loss.item() * len(train_data) # weight loss by the size of the batch.
            

        training_time_epochs.append(np.mean(training_time_batches))
        # evaluate performance of the batch
        with torch.no_grad(): # disable autograd
            train_accuracy = model_accuracy(model, train_data, train_data_labels)
            if validation_set_checker:
                validate_accuracy = model_accuracy(model, validation_images, validation_labels)
            
            if validation_set_checker:
                print(f"{epoch} of {n_epochs} | Loss: {current_loss/ len(train_images_norm):0.4f} | Train accuracy: {train_accuracy:0.4f} | Validate accuracy: {validate_accuracy:0.4f}")
            else:
                print(f"{epoch} of {n_epochs} | Loss: {current_loss/ len(train_images_norm):0.4f} | Train accuracy: {train_accuracy:0.4f}")
                validate_accuracy = None

            progress_log.append({'epoch':epoch,
                                 'loss':current_loss/len(train_images_norm),
                                 'train epoch time': training_time_epochs[epoch],
                                 'train_accuracy':train_accuracy, 'validate_accuracy':validate_accuracy })

    print()
    total_train_time = np.sum(training_time_epochs)
    print("Total train time = ", total_train_time," seconds")
    print("Total train time = ", np.round(total_train_time/60,3)," minutes")
    model.eval(); # set model mode to evaluation. 
    return model, progress_log

In [17]:
def train_MLP_net_images(model, x_train_normalize_t, y_train_t, criterion, optimizer,
                         x_validation_normalize_t = None, y_validation_t = None,
                         n_epochs=50, batch_size=500, device="cpu"):
    ##########################################
    train_images_norm = x_train_normalize_t
    train_labels = y_train_t
        
    #checking if given validation set arguements
    if x_validation_normalize_t is None or y_validation_t is None:
        validation_set_checker = False
    else:
        validation_images = x_validation_normalize_t.to(device=device)
        validation_labels = y_validation_t.to(device=device)
        validation_set_checker = True
    ##########################################
    training_time_epochs = []
    progress_log = []
    model.to(device)
    
    for epoch in range(n_epochs):
        model.train() # set model into train mode. 
        current_loss = 0
        # prepare data for the current epoch (iterate over random indeces)
        train_indeces = torch.randperm( (len(train_images_norm)//batch_size)*batch_size ) #  len(train_indeces) is an integer number of batches. 
        train_indeces = train_indeces.view(-1, batch_size) # shape: # batches x batch size, each *row* is a batch
        
        
        training_time_batches = []
        for current_batch_indeces in train_indeces: #for each row of indexes
            train_data = train_images_norm[current_batch_indeces].to(dtype=torch.float32).to(device=device)
            train_data_labels = train_labels[current_batch_indeces].to(dtype=torch.int64).to(device=device)
            
            optimizer.zero_grad() # clear gradients of all model parameters.
            
            start_train_time = time.time() #train time calculation
            output = model(train_data) # forward pass: compute model predictions
            batch_train_time = time.time() - start_train_time
            training_time_batches.append(batch_train_time)
            
            loss = criterion(output, train_data_labels) # calculate the current loss
            loss.backward() # backward pass: compute gradient of the loss for all model parameters. 
            optimizer.step() # use gradients to update the model parameters 

            current_loss += loss.item() * len(train_data) # weight loss by the size of the batch.
            

        training_time_epochs.append(np.mean(training_time_batches))
        # evaluate performance of the batch
        with torch.no_grad(): # disable autograd
            train_accuracy = model_accuracy(model, train_data, train_data_labels)
            if validation_set_checker:
                validate_accuracy = model_accuracy(model, validation_images, validation_labels)
            
            if validation_set_checker:
                print(f"{epoch} of {n_epochs} | Loss: {current_loss/ len(train_images_norm):0.4f} | Train accuracy: {train_accuracy:0.4f} | Validate accuracy: {validate_accuracy:0.4f}")
            else:
                print(f"{epoch} of {n_epochs} | Loss: {current_loss/ len(train_images_norm):0.4f} | Train accuracy: {train_accuracy:0.4f}")
                validate_accuracy = None

            progress_log.append({'epoch':epoch,
                                 'loss':current_loss/len(train_images_norm),
                                 'train epoch time': training_time_epochs[epoch],
                                 'train_accuracy':train_accuracy, 'validate_accuracy':validate_accuracy })

    print()
    total_train_time = np.sum(training_time_epochs)
    print("Total train time = ", total_train_time," seconds")
    print("Total train time = ", np.round(total_train_time/60,3)," minutes")
    model.eval(); # set model mode to evaluation. 
    return model, progress_log

# **Calculating models performance**
# ---------------------------------------------------

## Model Accuracy without convolution:

In [23]:
#all together:
model = Net_models(layers=[650, 512, 256, 150], output_layer = [10],
                             drop_out=0.2,
                             batch_norm = True).to(device=device)
criterion = torch.nn.CrossEntropyLoss() #The loss function
optimizer = torch.optim.SGD(model.parameters(),lr = 0.18) #The optimizer

n_epochs = 100
batch_size = 550

model_new, progress_log = train_MLP_net_images(model = model,
                                 x_train_normalize_t = X_train_t_normalize , y_train_t = y_train_t,
                                 x_validation_normalize_t = X_validation_t_normalize, y_validation_t = y_validation_t,
                                 criterion = criterion, optimizer = optimizer,
                                 n_epochs = n_epochs, batch_size = batch_size, device = device)
    

  input = module(input)


0 of 100 | Loss: 1.9189 | Train accuracy: 0.3364 | Validate accuracy: 0.3392
1 of 100 | Loss: 1.7514 | Train accuracy: 0.3727 | Validate accuracy: 0.3660
2 of 100 | Loss: 1.7103 | Train accuracy: 0.4364 | Validate accuracy: 0.4104
3 of 100 | Loss: 1.6734 | Train accuracy: 0.4382 | Validate accuracy: 0.3996
4 of 100 | Loss: 1.6544 | Train accuracy: 0.4291 | Validate accuracy: 0.3852
5 of 100 | Loss: 1.6469 | Train accuracy: 0.4527 | Validate accuracy: 0.4160
6 of 100 | Loss: 1.6268 | Train accuracy: 0.4673 | Validate accuracy: 0.4204
7 of 100 | Loss: 1.6209 | Train accuracy: 0.4873 | Validate accuracy: 0.4324
8 of 100 | Loss: 1.5959 | Train accuracy: 0.4818 | Validate accuracy: 0.4260
9 of 100 | Loss: 1.5843 | Train accuracy: 0.4236 | Validate accuracy: 0.4284
10 of 100 | Loss: 1.5956 | Train accuracy: 0.4273 | Validate accuracy: 0.4164
11 of 100 | Loss: 1.5756 | Train accuracy: 0.4582 | Validate accuracy: 0.4392
12 of 100 | Loss: 1.5680 | Train accuracy: 0.5036 | Validate accuracy: 0.4

## Model Accuracy with convolution:

In [23]:
#all together:
model_conv = Net_models_conv(C_in = 3, H_in = 32, W_in = 32,
                             layers=[512, 256, 150, 100], output_layer = [10],
                             drop_out=0.3,
                             batch_norm = True).to(device=device)
criterion = torch.nn.CrossEntropyLoss() #The loss function
optimizer = torch.optim.SGD(model_conv.parameters(),lr = 0.13) #The optimizer

n_epochs = 60
batch_size = 500

model_new, progress_log = train_MLP_net_images(model = model_conv,
                                 x_train_normalize_t = X_train_t_normalize , y_train_t = y_train_t,
                                 x_validation_normalize_t = X_validation_t_normalize, y_validation_t = y_validation_t,
                                 criterion = criterion, optimizer = optimizer,
                                 n_epochs = n_epochs, batch_size = batch_size, device = device)
    

0 of 60 | Loss: 2.1714 | Train accuracy: 0.3320 | Validate accuracy: 0.3028
1 of 60 | Loss: 1.8748 | Train accuracy: 0.4160 | Validate accuracy: 0.3640
2 of 60 | Loss: 1.7654 | Train accuracy: 0.4060 | Validate accuracy: 0.3884
3 of 60 | Loss: 1.6947 | Train accuracy: 0.4480 | Validate accuracy: 0.4216
4 of 60 | Loss: 1.6431 | Train accuracy: 0.4420 | Validate accuracy: 0.4388
5 of 60 | Loss: 1.6048 | Train accuracy: 0.4800 | Validate accuracy: 0.4468
6 of 60 | Loss: 1.5674 | Train accuracy: 0.4400 | Validate accuracy: 0.4344
7 of 60 | Loss: 1.5396 | Train accuracy: 0.5160 | Validate accuracy: 0.4660
8 of 60 | Loss: 1.5098 | Train accuracy: 0.4820 | Validate accuracy: 0.4524
9 of 60 | Loss: 1.4822 | Train accuracy: 0.5260 | Validate accuracy: 0.4860
10 of 60 | Loss: 1.4583 | Train accuracy: 0.5700 | Validate accuracy: 0.4880
11 of 60 | Loss: 1.4354 | Train accuracy: 0.5420 | Validate accuracy: 0.5132
12 of 60 | Loss: 1.4214 | Train accuracy: 0.5720 | Validate accuracy: 0.5024
13 of 60 

## Model Accuracy with convolution and with max-pool:

In [19]:
#all together:
start_time = time.time()
model_conv_pool = Net_models_conv_maxpool(C_in = 3, H_in = 32, W_in = 32,
                layers=[600, 450, 300, 100], output_layer = [10],
                 drop_out = 0.2,
                 batch_norm = True,
                conv1_out_channels = 30, conv1_kernel_size = 3, conv1_padding = 2, maxpool1_kernel_size = 2,
                conv2_out_channels = 20, conv2_kernel_size = 5, conv2_padding = 2, maxpool2_kernel_size = 2,
                conv3_out_channels = 10, conv3_kernel_size = 5, conv3_padding = 2, maxpool3_kernel_size = 2).to(device=device)
                
criterion = torch.nn.CrossEntropyLoss() #The loss function
optimizer = torch.optim.SGD(model_conv_pool.parameters(),lr = 0.65, momentum=0.7) #The optimizer

n_epochs = 50
batch_size = 700

model_new, progress_log = train_MLP_net_images(model = model_conv_pool,
                                 x_train_normalize_t = X_train_t_normalize , y_train_t = y_train_t,
                                 x_validation_normalize_t = X_validation_t_normalize, y_validation_t = y_validation_t,
                                 criterion = criterion, optimizer = optimizer,
                                 n_epochs = n_epochs, batch_size = batch_size, device = device)

print("total time = ", round((time.time()-start_time)/60),3, "min")

0 of 50 | Loss: 1.9433 | Train accuracy: 0.2771 | Validate accuracy: 0.2440
1 of 50 | Loss: 1.7343 | Train accuracy: 0.3586 | Validate accuracy: 0.3472
2 of 50 | Loss: 1.6143 | Train accuracy: 0.3314 | Validate accuracy: 0.3224
3 of 50 | Loss: 1.5450 | Train accuracy: 0.3686 | Validate accuracy: 0.3468
4 of 50 | Loss: 1.5019 | Train accuracy: 0.4386 | Validate accuracy: 0.3884
5 of 50 | Loss: 1.4560 | Train accuracy: 0.4100 | Validate accuracy: 0.3900
6 of 50 | Loss: 1.4230 | Train accuracy: 0.4171 | Validate accuracy: 0.4008
7 of 50 | Loss: 1.3962 | Train accuracy: 0.4386 | Validate accuracy: 0.4164
8 of 50 | Loss: 1.3642 | Train accuracy: 0.3857 | Validate accuracy: 0.3756
9 of 50 | Loss: 1.3356 | Train accuracy: 0.4957 | Validate accuracy: 0.4612
10 of 50 | Loss: 1.3105 | Train accuracy: 0.4500 | Validate accuracy: 0.4204
11 of 50 | Loss: 1.2879 | Train accuracy: 0.5300 | Validate accuracy: 0.5084
12 of 50 | Loss: 1.2662 | Train accuracy: 0.5014 | Validate accuracy: 0.4740
13 of 50 

## Summarizing models performance on train, validation and test sets:


In [25]:
models = [model, model_conv, model_conv_pool]

train_accuracy_list = []
validate_accuracy_list = []
test_accuracy_list = []
testing_time_list = []

train_images_norm = X_train_t_normalize.to(device=device)
train_labels = y_train_t.to(device=device)
validation_images = X_validation_t_normalize.to(device=device)
validation_labels = y_validation_t.to(device=device)

for model0 in models:

    train_accuracy = model_accuracy(model0, train_images_norm, train_labels)
    validate_accuracy = model_accuracy(model0, validation_images, validation_labels)  

    start_testing_time = time.time() #measure the testing time
    test_accuracy = model_accuracy(model0, X_test_t_normalize, y_test_t)  
    testing_time_i = time.time() - start_testing_time #measure the testing time

    train_accuracy_list.append(train_accuracy)
    validate_accuracy_list.append(validate_accuracy)
    test_accuracy_list.append(test_accuracy)
    testing_time_list.append(testing_time_i)
    
    
    

In [26]:
total_params_list = []
model_names_list = []
for i in range(len(models)):
    total_params_list.append(sum(p.numel() for p in models[i].parameters()))
    model_name_i = "model " + str(i+1)
    model_names_list.append(model_name_i)

df_summary_all = pd.DataFrame({"number of paramaters" : total_params_list,
                               "Train Accuracy" : np.array(train_accuracy_list),
                               "Validation Accuracy" : np.array(validate_accuracy_list),
                               "Test Accuracy" : np.array(test_accuracy_list),
                               "Test time" : testing_time_list
                               },
                              index = [model_names_list])
df_summary_all

Unnamed: 0,number of paramaters,Train Accuracy,Validation Accuracy,Test Accuracy,Test time
model 1,2505286,0.596126,0.5048,0.4966,1.103254
model 2,2159648,0.903411,0.51,0.5059,7.373261
model 3,540960,0.728147,0.54,0.5372,7.039357


# Conclusion

### We can see that the best performing model is the third one (the model that uses convolusion and maxpooling). It also has the lowest amount of parameters. This makes sense as we know that CNN or ConvNet, is a class of neural networks that specializes in processing data that has a grid-like topology, such as an image. We also know that a digital image is a binary representation of visual data that contains a series of pixels arranged in a grid-like fashion that contains pixel values to denote how bright and what color each pixel should be. The low number of parameters is due to the max pooling that decreased the parameters. Max-pooling's objective is to down-sample the image's representation, reducing its dimensionality and allowing for assumptions to be made about features contained in sub-regions of the image. Accordingly, these methods allow to prevent overfitting.


# <font color = crimson > Question 1-b : </font> Apply **data augmentation techniques**

It is possible that the relatively small size of the dataset limits performance of your algorithm and/or measurements of its accuracy (due to small size of the test set).

* Use **data augmentation** to generate **variations of images** thus **increasing the size of the available data**.
* Use few augmenting transforms e.g. :
    - ColorJitter
    - crop and resize
    - flip
    - and other random transforms, along, perhaps with RandomChoice
    
to alter images. Take a look at **Image Augmentation.ipynb notebook** attached to this assignment for demonstration.

* How models you have developed in part a) of this assignment perform with augmented images? Compare accuracy in two scenarios: with and without re-training the models.

## <font color = blue >  Fucntion image_transform: transform the input image into 9 different augmented images </font>

The function gets as input:
1. **img** - the image must be of type torch.tensor and dtype - int between 0-255 or float between 0 and 1.
2. **transform** - A torchvision.transforms object
3. **random_state** - if you want the transformation to be replicatable - use a random state

The function returns 9 images made by the tranformation on the initial image

In [18]:
def image_transform(img, transform, random_state = None):
    
    if random_state:
        torch.manual_seed(random_state)

    #dealing with shape issue - we need to save it so we can later reshpae the transformed image
    initial_shape = img.shape # 3 X H X W
    image_c_shape = initial_shape[0] # c = 3
    image_H_shape = initial_shape[1]
    image_W_shape = initial_shape[2]
    
    %matplotlib inline
    import matplotlib.pyplot as plt
    fig, ax = plt.subplots(3,3, figsize=(12,9))
    for i in range(3):
        for j in range(3):
            imgae_transformed = transform(img) #shape = 3 X H X W
            image_transformed_reshape = imgae_transformed.reshape(image_H_shape, image_W_shape, image_c_shape)
            
            ax[i][j].imshow(image_transformed_reshape, interpolation='nearest',cmap='gray')
            ax[i][j].axis('off')
    return fig

## <font color = blue > Fucntion create_augemnted_data : </font>  
This function gets as input Images and labels (in torch tensor int32 type) and returns the same data + transformation

+ The function gets as **input**:
1. **images** - images must be of type torch.tensor and dtype - int32 (between 0-255) or float (between 0 and 1).
2. **labels** - The given images labels (so the function will be able to create a augmented tensor of the labels). The labels needs to already be in torch tensor format.
3. **transform** - A torchvision.transforms object, according to it the transformation will be done
4. **num_of_replicates** - The number of transformed replication to make for each given image. By default = **10**
5. **random_state** - if you want the transformation to be replicable - use a random state. By default = **None**
6. **verbose** - if True, the function will print the number of **new** images that were created and the **total** number of images that are returned by the function. By default = **False**
7. **return_the_initial_images** - by default = **True** - this will make the function to return the given data + the augmented transformed data. If **False** then the function won't return the initial given data, and only return the augmented transformed data. This option can be beneficial if you wish to use a couple of transformation on the given data and at the end concatenate all the augmented transformed data (so you can't have the initial given data repeated in each one of them).

+ The function **returns** 2 things:
1. **The augmented data**
2. **The augmented data labels**

In [19]:
def create_augemnted_data(images, labels, tranform, num_of_replicates = 10, random_state = None, verbose = False,
                         return_the_initial_images = True):
    if random_state:
        torch.manual_seed(random_state)
        
    num_given_images = images.shape[0] #the umber of images inserted as input
    augmented_data = images
    augmented_labels = labels
    
    for i in range(num_of_replicates):
        new_images = tranform(images)
        augmented_data =  torch.cat((augmented_data, new_images), dim = 0) # add the new transformed images
        augmented_labels = torch.cat((augmented_labels, labels), dim = 0) #add the labels of the new transformed images
    
    if return_the_initial_images is False:
        augmented_data = augmented_data[images.shape[0]:]
        augmented_labels = augmented_labels[labels.shape[0]:]
        print("Not returning the initial given images")
        print("------------------------------------------------------")
        num_given_images = 0
        
    if verbose:
        print("Number of new images created = ", augmented_data.shape[0] - num_given_images)
        print("Total Number of images in output = ", augmented_data.shape[0])
        
    return augmented_data, augmented_labels

## Creating different transfomations to choose from randomly in the following sections:

In [20]:
#### optional transformations

#ColorJitter = Randomly change the brightness, contrast, saturation and hue of an image.
tr_ColorJitter_1 = torchvision.transforms.Compose([
        torchvision.transforms.ColorJitter(brightness=10, contrast=0, saturation=0, hue=0)
    ])

tr_ColorJitter_2 = torchvision.transforms.Compose([
        torchvision.transforms.ColorJitter(brightness=0, contrast=10, saturation=0, hue=0)
    ])

tr_ColorJitter_3 = torchvision.transforms.Compose([
        torchvision.transforms.ColorJitter(brightness=0, contrast=0, saturation=10, hue=0)
    ])

tr_ColorJitter_4 = torchvision.transforms.Compose([
        torchvision.transforms.ColorJitter(brightness=0, contrast=0, saturation=0, hue=0.5)
    ])

tr_ColorJitter_5 = torchvision.transforms.Compose([
        torchvision.transforms.ColorJitter(brightness=5, contrast=5, saturation=5, hue=0.5)
    ])


#Random crop
#FiveCrop

#Rotate / Shift / Scale - Random affine transformation of the image keeping center invariant
tr_RandomAffine_1 = torchvision.transforms.Compose([
                                torchvision.transforms.RandomAffine(degrees=(-45,45))
        ])

#Resize / scale / ratio - Crop a random portion of image and resize it to a given size.
### size = (32,32) as this is the dimention of the images in the data
tr_RandomResizedCrop_1 = torchvision.transforms.RandomResizedCrop(size=(32,32), scale =(0.1,2), ratio =(0.75, 1.4))

#### horizontal flip + vertical flip + GaussianBlur
tr_flip_blur_1 = torchvision.transforms.Compose(
    [
        torchvision.transforms.RandomHorizontalFlip(p=0.5),
        torchvision.transforms.RandomVerticalFlip(p=0.5),
        torchvision.transforms.RandomApply([ 
            torchvision.transforms.GaussianBlur(kernel_size=5, sigma=(0.1, 2.0))     ],
                p=0.5)
    ])
        
#image_transform(one_image_t, tr_flip_blur_1, random_state = 1998) ;

In [21]:
tr = torchvision.transforms.Compose([
    torchvision.transforms.RandomChoice(
    [torchvision.transforms.ColorJitter(brightness=10, contrast=0, saturation=0, hue=0),
     torchvision.transforms.ColorJitter(brightness=0, contrast=10, saturation=0, hue=0),
     torchvision.transforms.ColorJitter(brightness=0, contrast=0, saturation=10, hue=0),
     torchvision.transforms.ColorJitter(brightness=0, contrast=0, saturation=0, hue=0.5),
     torchvision.transforms.ColorJitter(brightness=5, contrast=5, saturation=5, hue=0.5),
     torchvision.transforms.RandomAffine(degrees=(-45,45)),
     torchvision.transforms.RandomResizedCrop(size=(32,32), scale =(0.1,2), ratio =(0.75, 1.4)),
     tr_flip_blur_1

])
])

In [64]:
#cifar_train_augmented = torchvision.datasets.CIFAR10(root="./data", train = True, download = True, transform = tr)
#cifar_test = torchvision.datasets.CIFAR10(root="./data", train = False, download = True)

#cifar_train_augmented.data.shape

Files already downloaded and verified


### Checking the function create_augemnted_data:

In [63]:
### check the function create_augemnted_data
some_images_t = X_train_t[0:100]
some_y_train_t = y_train_t[0:100]
num_of_replicates = 5
new_images, new_labels = create_augemnted_data(some_images_t, some_y_train_t, tr, num_of_replicates=50, random_state=1998, verbose=True,
                                              return_the_initial_images=False)
print()
print(new_images.shape)
print(new_labels.shape)

print(new_labels[0] ,"|", new_labels[100] )
print(new_labels[1] ,"|", new_labels[101] )
print(new_labels[2] ,"|", new_labels[102] )
print(new_labels[3] ,"|", new_labels[103] )
print(new_labels[99] ,"|", new_labels[199] )


Not returning the initial given images
------------------------------------------------------
Number of new images created =  5000
Total Number of images in output =  5000

torch.Size([5000, 3, 32, 32])
torch.Size([5000])
tensor(6, dtype=torch.uint8) | tensor(6, dtype=torch.uint8)
tensor(8, dtype=torch.uint8) | tensor(8, dtype=torch.uint8)
tensor(4, dtype=torch.uint8) | tensor(4, dtype=torch.uint8)
tensor(2, dtype=torch.uint8) | tensor(2, dtype=torch.uint8)
tensor(7, dtype=torch.uint8) | tensor(7, dtype=torch.uint8)


###  Creating augmented data from random transformations:

In [14]:
### for loop to create the augmented data
################### input for us to change: ###################
imgaes_to_augment = X_train_t[0:100]
labels_to_augment = y_train_t[0:100]

transformations_list = [tr_ColorJitter_1, tr_ColorJitter_2, tr_ColorJitter_3, tr_ColorJitter_4, tr_ColorJitter_5,
                       tr_RandomAffine_1,
                       tr_RandomResizedCrop_1,
                       tr_flip_blur_1]

num_of_replicates = 10
#########################################
augmented_data = imgaes_to_augment
augmented_labels = labels_to_augment
######################################### The for loop: ###################

for i, transform_1 in enumerate(transformations_list):
    new_images, new_labels = create_augemnted_data(images = imgaes_to_augment, labels = labels_to_augment,
                                                   tranform = transform_1,
                                                   num_of_replicates = num_of_replicates,
                                                   random_state = 1998,
                                                   verbose = False,
                                                   return_the_initial_images = False)

    augmented_data =  torch.cat((augmented_data, new_images), dim = 0) # add the new transformed images
    augmented_labels = torch.cat((augmented_labels, new_labels), dim = 0) #add the labels of the new transformed images
    print("Total amount of images after transformation ", i, " =", augmented_data.shape[0])
    
print()
print("Total amount of images = ", augmented_data.shape[0])

Not returning the initial given images
------------------------------------------------------
Total amount of images after transformation  0  = 1100
Not returning the initial given images
------------------------------------------------------
Total amount of images after transformation  1  = 2100
Not returning the initial given images
------------------------------------------------------
Total amount of images after transformation  2  = 3100
Not returning the initial given images
------------------------------------------------------
Total amount of images after transformation  3  = 4100
Not returning the initial given images
------------------------------------------------------
Total amount of images after transformation  4  = 5100
Not returning the initial given images
------------------------------------------------------
Total amount of images after transformation  5  = 6100
Not returning the initial given images
------------------------------------------------------
Total amount



Not returning the initial given images
------------------------------------------------------
Total amount of images after transformation  7  = 8100

Total amount of images =  8100


# Creating augemnted data and modeling

In [22]:
### for loop to create the augmented data
################### input for us to change: ###################
imgaes_to_augment = X_train_t_normalize #[0:100]
labels_to_augment = y_train_t #[0:100]
print("input data shape:", imgaes_to_augment.shape)
print()

transformations_list = [tr]

#transformations_list = [tr_ColorJitter_1, tr_ColorJitter_2,tr_flip_blur_1]

#transformations_list = [tr_ColorJitter_1, tr_ColorJitter_2,  tr_ColorJitter_5,
#                       tr_RandomAffine_1, tr_RandomResizedCrop_1, tr_flip_blur_1]

#transformations_list = [tr_ColorJitter_1, tr_ColorJitter_2, tr_ColorJitter_3, tr_ColorJitter_4, tr_ColorJitter_5,
#                       tr_RandomAffine_1,
#                       tr_RandomResizedCrop_1,
#                       tr_flip_blur_1]

num_of_replicates = 3
#########################################
augmented_data = imgaes_to_augment
augmented_labels = labels_to_augment
######################################### The for loop: ###################

for i, transform_1 in enumerate(transformations_list):
    new_images, new_labels = create_augemnted_data(images = imgaes_to_augment, labels = labels_to_augment,
                                                   tranform = transform_1,
                                                   num_of_replicates = num_of_replicates,
                                                   random_state = 1998,
                                                   verbose = False,
                                                   return_the_initial_images = False)

    augmented_data =  torch.cat((augmented_data, new_images), dim = 0) # add the new transformed images
    augmented_labels = torch.cat((augmented_labels, new_labels), dim = 0) #add the labels of the new transformed images
    print("Total amount of images after transformation ", i, " =", augmented_data.shape[0])
    
print()
print("Total amount of images = ", augmented_data.shape[0])

X_train_t_normalize_augmented = augmented_data
y_train_t_augmented = augmented_labels

input data shape: torch.Size([47500, 3, 32, 32])

Not returning the initial given images
------------------------------------------------------
Total amount of images after transformation  0  = 190000

Total amount of images =  190000


## Modeling with the augmented data

## Model 1 - without convolution:

In [31]:
#all together:
model_augmented = Net_models(layers=[650, 512, 256, 150], output_layer = [10],
                             drop_out=0.2,
                             batch_norm = True).to(device=device)
criterion = torch.nn.CrossEntropyLoss() #The loss function
optimizer = torch.optim.SGD(model_augmented.parameters(),lr = 0.4,  momentum=0.7) #The optimizer

n_epochs = 50
batch_size = 500

model_augmented_new, progress_log = train_MLP_net_images(model = model_augmented,
                                 x_train_normalize_t = X_train_t_normalize_augmented , y_train_t = y_train_t_augmented,
                                 x_validation_normalize_t = X_validation_t_normalize, y_validation_t = y_validation_t,
                                 criterion = criterion, optimizer = optimizer,
                                 n_epochs = n_epochs, batch_size = batch_size, device = device)
    

  input = module(input)


0 of 40 | Loss: 1.9597 | Train accuracy: 0.3800 | Validate accuracy: 0.4204
1 of 40 | Loss: 1.8010 | Train accuracy: 0.3680 | Validate accuracy: 0.4692
2 of 40 | Loss: 1.7364 | Train accuracy: 0.4300 | Validate accuracy: 0.4504
3 of 40 | Loss: 1.6907 | Train accuracy: 0.4320 | Validate accuracy: 0.4572
4 of 40 | Loss: 1.6579 | Train accuracy: 0.4600 | Validate accuracy: 0.4996
5 of 40 | Loss: 1.6292 | Train accuracy: 0.4440 | Validate accuracy: 0.5016
6 of 40 | Loss: 1.6048 | Train accuracy: 0.4540 | Validate accuracy: 0.5000
7 of 40 | Loss: 1.5815 | Train accuracy: 0.4360 | Validate accuracy: 0.5052
8 of 40 | Loss: 1.5624 | Train accuracy: 0.4620 | Validate accuracy: 0.5188
9 of 40 | Loss: 1.5478 | Train accuracy: 0.4680 | Validate accuracy: 0.5208
10 of 40 | Loss: 1.5303 | Train accuracy: 0.5380 | Validate accuracy: 0.5220
11 of 40 | Loss: 1.5178 | Train accuracy: 0.5280 | Validate accuracy: 0.5092
12 of 40 | Loss: 1.5035 | Train accuracy: 0.4900 | Validate accuracy: 0.5344
13 of 40 

## Model 2  - with convolution and without maxpool:

In [33]:
#all together:
model_conv_augmented = Net_models_conv(C_in = 3, H_in = 32, W_in = 32,
                             layers=[512, 256, 150, 100], output_layer = [10],
                             drop_out=0.1,
                             batch_norm = True).to(device=device)
criterion = torch.nn.CrossEntropyLoss() #The loss function
optimizer = torch.optim.SGD(model_conv_augmented.parameters(),lr = 0.47, momentum=0.7) #The optimizer

n_epochs = 50
batch_size = 500

model_conv_augmented_new, progress_log = train_MLP_net_images(model = model_conv_augmented,
                                 x_train_normalize_t = X_train_t_normalize_augmented , y_train_t = y_train_t_augmented,
                                 x_validation_normalize_t = X_validation_t_normalize, y_validation_t = y_validation_t,
                                 criterion = criterion, optimizer = optimizer,
                                 n_epochs = n_epochs, batch_size = batch_size, device = device)
    

0 of 60 | Loss: 2.0819 | Train accuracy: 0.3000 | Validate accuracy: 0.3316
1 of 60 | Loss: 1.8932 | Train accuracy: 0.2920 | Validate accuracy: 0.3808
2 of 60 | Loss: 1.7832 | Train accuracy: 0.3960 | Validate accuracy: 0.4600
3 of 60 | Loss: 1.6933 | Train accuracy: 0.4020 | Validate accuracy: 0.4764
4 of 60 | Loss: 1.6269 | Train accuracy: 0.4520 | Validate accuracy: 0.4972
5 of 60 | Loss: 1.5795 | Train accuracy: 0.4660 | Validate accuracy: 0.4992
6 of 60 | Loss: 1.5251 | Train accuracy: 0.4740 | Validate accuracy: 0.5084
7 of 60 | Loss: 1.4765 | Train accuracy: 0.5480 | Validate accuracy: 0.5244
8 of 60 | Loss: 1.4298 | Train accuracy: 0.5440 | Validate accuracy: 0.5132
9 of 60 | Loss: 1.4053 | Train accuracy: 0.5440 | Validate accuracy: 0.5188
10 of 60 | Loss: 1.3554 | Train accuracy: 0.5880 | Validate accuracy: 0.5092
11 of 60 | Loss: 1.3174 | Train accuracy: 0.5820 | Validate accuracy: 0.5216
12 of 60 | Loss: 1.3160 | Train accuracy: 0.5940 | Validate accuracy: 0.5140
13 of 60 

## Model 3  - with convolution and  maxpool:

In [34]:
#all together:
start_time = time.time()
model_conv_pool_augmented = Net_models_conv_maxpool(C_in = 3, H_in = 32, W_in = 32,
                layers=[600, 450, 300, 100], output_layer = [10],
                 drop_out = 0.20,
                 batch_norm = True,
                conv1_out_channels = 35, conv1_kernel_size = 4, conv1_padding = 3, maxpool1_kernel_size = 2,
                conv2_out_channels = 20, conv2_kernel_size = 5, conv2_padding = 2, maxpool2_kernel_size = 4,
                conv3_out_channels = 10, conv3_kernel_size = 5, conv3_padding = 2, maxpool3_kernel_size = 4).to(device=device)
                
criterion = torch.nn.CrossEntropyLoss() #The loss function
optimizer = torch.optim.SGD(model_conv_pool_augmented.parameters(),lr = 0.55, momentum=0.7) #The optimizer

n_epochs = 50
batch_size = 600

model_conv_pool_augmented_new, progress_log = train_MLP_net_images(model = model_conv_pool_augmented,
                                 x_train_normalize_t = X_train_t_normalize_augmented , y_train_t = y_train_t_augmented,
                                 x_validation_normalize_t = X_validation_t_normalize, y_validation_t = y_validation_t,
                                 criterion = criterion, optimizer = optimizer,
                                 n_epochs = n_epochs, batch_size = batch_size, device = device)

print("total time = ", round((time.time()-start_time)/60),3, "min")

0 of 50 | Loss: 1.9700 | Train accuracy: 0.3367 | Validate accuracy: 0.3680
1 of 50 | Loss: 1.7692 | Train accuracy: 0.3917 | Validate accuracy: 0.4096
2 of 50 | Loss: 1.6796 | Train accuracy: 0.3900 | Validate accuracy: 0.4636
3 of 50 | Loss: 1.6269 | Train accuracy: 0.3850 | Validate accuracy: 0.4576
4 of 50 | Loss: 1.5876 | Train accuracy: 0.3833 | Validate accuracy: 0.4356
5 of 50 | Loss: 1.5597 | Train accuracy: 0.4517 | Validate accuracy: 0.5224
6 of 50 | Loss: 1.5370 | Train accuracy: 0.4267 | Validate accuracy: 0.4784
7 of 50 | Loss: 1.5141 | Train accuracy: 0.4650 | Validate accuracy: 0.5420
8 of 50 | Loss: 1.4987 | Train accuracy: 0.4300 | Validate accuracy: 0.5144
9 of 50 | Loss: 1.4842 | Train accuracy: 0.4517 | Validate accuracy: 0.5156
10 of 50 | Loss: 1.4727 | Train accuracy: 0.3633 | Validate accuracy: 0.4564
11 of 50 | Loss: 1.4616 | Train accuracy: 0.4367 | Validate accuracy: 0.5116
12 of 50 | Loss: 1.4526 | Train accuracy: 0.4850 | Validate accuracy: 0.5276
13 of 50 

# Summarizing the models performance on train, validation and test sets:

In [35]:
models = [model_augmented, model_conv_augmented, model_conv_pool_augmented]

train_accuracy_list = []
validate_accuracy_list = []
test_accuracy_list = []
testing_time_list = []

train_images_norm = X_train_t_normalize.to(device="cpu")
train_labels = y_train_t.to(device="cpu")
validation_images = X_validation_t_normalize.to(device="cpu")
validation_labels = y_validation_t.to(device="cpu")
i=0
for model0 in models:
    i+=1
    print("evaluating model ", i)
    model0.to(device="cpu")
    train_accuracy = model_accuracy(model0, train_images_norm, train_labels)
    validate_accuracy = model_accuracy(model0, validation_images, validation_labels)  

    start_testing_time = time.time() #measure the testing time
    test_accuracy = model_accuracy(model0, X_test_t_normalize, y_test_t)  
    testing_time_i = time.time() - start_testing_time #measure the testing time

    train_accuracy_list.append(train_accuracy)
    validate_accuracy_list.append(validate_accuracy)
    test_accuracy_list.append(test_accuracy)
    testing_time_list.append(testing_time_i)

evaluating model  1
evaluating model  2
evaluating model  3


In [36]:
total_params_list = []
model_names_list = []
for i in range(len(models)):
    total_params_list.append(sum(p.numel() for p in models[i].parameters()))
    model_name_i = "model " + str(i+1)
    model_names_list.append(model_name_i)

df_summary_all_augmented = pd.DataFrame({"number of paramaters" : total_params_list,
                               "Train Accuracy" : np.array(train_accuracy_list),
                               "Validation Accuracy" : np.array(validate_accuracy_list),
                               "Test Accuracy" : np.array(test_accuracy_list),
                               "Test time" : testing_time_list
                               },
                              index = [model_names_list])
df_summary_all_augmented

Unnamed: 0,number of paramaters,Train Accuracy,Validation Accuracy,Test Accuracy,Test time
model 1,2505286,0.713305,0.5392,0.5428,0.317125
model 2,2159648,0.979537,0.496,0.4895,3.05233
model 3,470735,0.649811,0.5684,0.5657,6.480661


In [None]:
#### lets compare to the model's performance with the original data (not augmented)
df_summary_all

### Conclusions PART  B - We can see that the models that are trained with the *augmented data are better performing* than the same models that were trained on the regular data. This make sense as the models had more training points. We know that increasing the training data (as with augmentation) decreases a model's variance and ultimately decreases regularization error. Moreover, the benefit of data augmentation generally comes down to reducing overfitting. For example, a classification model trained on three images will be limited to recognizing and classifying those exact images. Even adding slight variations to data will improve the generalizability, as we saw in real life with CIFAR 10 data.



In [1]:
import gc
del optimizer
gc.collect()
torch.cuda.empty_cache()