# **HW5:**

# Question 1: Implement CNN on CIFAR-10 dataset (5 Points)

* Use the following network architecture
 * Convolutional layer (with bias) with 32 5x5 filters, with zero-padding of 2
 * ReLU
 * Convolutional layer (with bias) with 16 3x3 filters, with zero-padding of 1
 * ReLU
 * Fully-connected layer (with bias) to compute scores for 10 classes

 Optimize model using stochastic gradient descent with nesterov momentum of 0.9
 Use learning rate of 1e-2
 Initialize weight matrices of the model using the Kaiming normal initialization method

In [2]:
# Importing the necessary libraries
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import torch.nn.functional as F


import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix
import random

from datetime import datetime
from pathlib import Path
import plotly.io as pio
pio.renderers.default = 'colab'

In [3]:
# Import random function
import random

# Fix seed value
SEED = 2345
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
data_folder = Path('/content/drive/MyDrive/Data/DL')

In [6]:
lecture_folder = Path('/content/drive/MyDrive/Data/Models/HW5')

In [7]:
# Install wandb and update it to the latest version
%%capture
!pip install wandb --upgrade

In [8]:
# Import wandb
import wandb

# Login to W&B
wandb.login()

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [9]:
# Load dataset and necessary data loaders and transformations
# Transform to convert images to pytorch tensors and normalize the data
trans = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.49139968,0.48215827, 0.44653124), (0.24703233, 0.24348505, 0.26158768))])

train_full = torchvision.datasets.CIFAR10(root=data_folder,
                                              train=True, 
                                              transform=trans,
                                              download=True)
trainset, validset = torch.utils.data.random_split(train_full, [40000, 10000], generator=torch.Generator().manual_seed(42) )
testset  = torchvision.datasets.CIFAR10(root=data_folder,
                                              train=False, 
                                              transform=trans,
                                              download=True)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')


Files already downloaded and verified
Files already downloaded and verified


In [None]:
# Check length of original train and valid datasets
len(trainset),len(validset)

(40000, 10000)

In [10]:
# Create subset of 50 images
# n sample points
train_sample_size = 50
valid_sample_size = 10

# Getting n random indices
train_subset_indices = random.sample(range(0, len(trainset)), train_sample_size)
valid_subset_indices = random.sample(range(0, len(testset)), valid_sample_size)

# Getting subset of dataset
train_subset = torch.utils.data.Subset(trainset, train_subset_indices)
valid_subset = torch.utils.data.Subset(validset, valid_subset_indices)

In [None]:
# Shape of training data
train_full.data.shape

(50000, 32, 32, 3)

In [None]:
len(trainset.dataset)

50000

In [None]:
# Shape of testing data
testset.data.shape

(10000, 32, 32, 3)

In [None]:
# check the max value of inputs
train_full.data.max()

255

In [None]:
# check the min value of inputs
train_full.data.min()

0

In [None]:
# check the min value of inputs
train_full.data.mean()/255

0.4733630004850899

In [None]:
# check the min value of inputs
train_full.data.std()/255

0.2515689250632208

In [None]:
# calculations for transformations above

imgs = [item[0] for item in train_full]
imgs = torch.stack(imgs, dim=0).numpy()

#calc mean over the 3 channels
mean_0 = imgs[:,0,:,:].mean()
mean_1 = imgs[:,1,:,:].mean()
mean_2 = imgs[:,2,:,:].mean()
print(mean_0, mean_1, mean_2)

#calc std for each of 3 channels
std_0 = imgs[:,0,:,:].std()
std_1 = imgs[:,1,:,:].std()
std_2 = imgs[:,2,:,:].std()
print(std_0, std_1, std_2)

0.49139968 0.48215827 0.44653124
0.24703233 0.24348505 0.26158768


In [None]:
# Check smaller subset
len(train_subset)

50

In [None]:
check_loader = torch.utils.data.DataLoader(trainset, batch_size = 32, shuffle = True)

In [None]:
len(check_loader)

1250

In [None]:
1250*32

40000

In [None]:
# check imputs and outputs for transformations
for input, target in check_loader:
  print(f'shape of inputs is :{input.shape}')
  print(f'\nmax input value  :{input.max()}')
  print(f'\nmin input value  :{input.min()}')
  print(f'\nmean input value  :{input.mean()}')
  print(f'\nstd input value  :{input.std()}')
  print(f'\nshape of targets is :{target.shape}')
   
  break

shape of inputs is :torch.Size([32, 3, 32, 32])

max input value  :2.0934102535247803

min input value  :-1.881643533706665

mean input value  :-0.0411696620285511

std input value  :1.0160225629806519

shape of targets is :torch.Size([32])


In [None]:
def get_cifar10_labels(labels):  
    """ 
    Function to generate labels.
    Input: numerical labels
    Output: actual string labels
    """

    # Create a list of labels
    text_labels = ['plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck']

    # Return text_labels according to numerical values
    return [text_labels[int(i)] for i in labels]

In [None]:
# Define custom model using nn.Module()
class CustomDeepNetwork(nn.Module):
  def __init__(self,  output_dim, h_sizes, dprob, non_linearity, batch_norm):
        
    super().__init__()

    self.conv_layer = nn.Sequential(
        nn.Conv2d(3, 32, kernel_size=(5,5), padding=2),
        nn.ReLU(),
        nn.Conv2d(32, 16, kernel_size=(3,3),padding=1),
        nn.ReLU()
    )

    self.flatten = nn.Flatten()

    self.fullyconnected = nn.Linear(16 * 32 * 32, 10)

  def forward(self,x):

    #conv layer
    out = self.conv_layer(x)

    out = self.flatten(out)

    #fully connected layers
    x = F.relu(self.fullyconnected(out))

    # we are not using softmax function in the forward passs
    # nn.crossentropy loss (which we will use to define our loss) combines  nn.LogSoftmax() and nn.NLLLoss() in one single class
    return x  

In [11]:
# Training data epochs
def train(train_loader, model, optimizer, loss_function, log_batch, log_interval):

  """ 
  Function for training the model in each epoch
  Input: iterator for train dataset, initial weights and bias, epochs, learning rate.
  Output: final weights, bias, train loss, train accuracy
  """
  # initilalize variables as global
  # these counts will be updated every epoch
  global example_ct_train
  global batch_ct_train

  # Training Loop loop
  # Initialize train_loss at the he start of the epoch
  running_train_loss = 0
  running_train_correct = 0
  
  # put the model in training mode
  model.train()

  # Iterate on batches from the dataset using train_loader
  for input, targets in train_loader:
    
    # move inputs and outputs to GPUs
    input = input.to(device)
    targets = targets.to(device)

    # Forward pass
    output = model(input)
    loss = loss_function(output, targets)

    # Correct prediction
    y_pred = torch.argmax(output, dim = 1)
    correct = torch.sum(y_pred == targets)

    example_ct_train +=  len(targets)
    batch_ct_train += 1

    # set gradients to zero 
    optimizer.zero_grad()

    # Backward pass
    loss.backward()

    # Update parameters using their gradient
    optimizer.step()
          
    # Add train loss of a batch 
    running_train_loss += loss.item()

    # Add Corect counts of a batch
    running_train_correct += correct

    # log batch loss and accuracy
    if log_batch:
      if ((batch_ct_train + 1) % log_interval) == 0:
        wandb.log({f"Train Batch Loss  :": loss})
        wandb.log({f"Train Batch Acc :": correct/len(targets)})

  
  # Calculate mean train loss for the whole dataset for a particular epoch
  train_loss = running_train_loss/len(train_loader)

  # Calculate accuracy for the whole dataset for a particular epoch
  train_acc = running_train_correct/len(train_loader.dataset)

  return train_loss, train_acc

In [12]:
# Validation data epochs
def valid(loader, model, optimizer, loss_function, log_batch, log_interval):

  """ 
  Function for training the model and plotting the graph for train & valid loss vs epoch.
  Input: iterator for train dataset, initial weights and bias, epochs, learning rate, batch size.
  Output: final weights, bias and train loss and valid loss for each epoch.
  """

  # initilalize variables as global
  # these counts will be updated every epoch
  global example_ct_valid
  global batch_ct_valid

  # Validation loop
  running_valid_loss = 0
  running_valid_correct = 0
  
  # put the model in evaluation mode
  model.eval()

  with torch.no_grad():
    for input,targets in loader:

      # move inputs and outputs to GPUs
      input = input.to(device)
      targets = targets.to(device)

      # Forward pass
      output = model(input)
      loss = loss_function(output,targets)

      # Correct Predictions
      y_pred = torch.argmax(output, dim = 1)
      correct = torch.sum(y_pred == targets)

      # count of images and batches
      example_ct_valid +=  len(targets)
      batch_ct_valid += 1

      # Add valid loss of a batch 
      running_valid_loss += loss.item()

      # Add correct count for each batch
      running_valid_correct += correct

      # log batch loss and accuracy
      if log_batch:
        if ((batch_ct_valid + 1) % log_interval) == 0:
          wandb.log({f"Valid Batch Loss  :": loss})
          wandb.log({f"Valid Batch Accuracy :": correct/len(targets)})


    # Calculate mean valid loss for the whole dataset for a particular epoch
    valid_loss = running_valid_loss/len(valid_loader)

    # Calculate accuracy for the whole dataset for a particular epoch
    valid_acc = running_valid_correct/len(valid_loader.dataset)
    
  return valid_loss, valid_acc

In [13]:
# Model Training
def train_loop(train_loader, valid_loader, model, loss_function, optimizer, epochs, device,
               file_model):

  '''
  model: specify your model for training
  criterion: loss function 
  optimizer: optimizer like SGD , ADAM etc.
  train loader: function to carete batches for training data
  loader : function to create batches for valid data set
  file_model : specify file name for saving your model. This way we can upload the model weights from file. We will not to run model again.
  

  '''
  # Create lists to store train and valid loss at each epoch

  train_loss_history = []
  valid_loss_history = []
  train_acc_history = []
  valid_acc_history = []
  delta = 0
  best_score = None
  valid_loss_min = np.Inf


  # Iterate for the given number of epochs
  for epoch in range(epochs):
    t0 = datetime.now()
    # Get train loss and accuracy for one epoch

    train_loss, train_acc = train(train_loader, model, optimizer, loss_function, 
                                  wandb.config.log_batch, wandb.config.log_interval)
    valid_loss, valid_acc = valid(valid_loader, model, optimizer, loss_function,
                                    wandb.config.log_batch, wandb.config.log_interval)

    dt = datetime.now() - t0

    # Save history of the Losses and accuracy
    train_loss_history.append(train_loss)
    train_acc_history.append(train_acc)
    valid_loss_history.append(valid_loss)
    valid_acc_history.append(valid_acc)

    score = -valid_loss
    if best_score is None:
      best_score=score
      print(f'Validation loss has decreased ({valid_loss_min:.6f} --> {valid_loss:.6f}). Saving Model...')
      torch.save(model.state_dict(), file_model)
      valid_loss_min = valid_loss

    elif score < best_score + delta:
      print(f'Validation loss has not decreased ({valid_loss_min:.6f} --> {valid_loss:.6f}). Not Saving Model...')
    
    else:
      best_score = score
      print(f'Validation loss has decreased ({valid_loss_min:.6f} --> {valid_loss:.6f}). Saving model...')
      torch.save(model.state_dict(), file_model)
      valid_loss_min = valid_loss

    # Log the train and valid loss to W&B
    wandb.log({f"Train epoch Loss :": train_loss, f"Valid epoch Loss :": valid_loss })
    wandb.log({f"Train epoch Acc :": train_acc, f"Valid epoch Acc :": valid_acc})



    # Print the train loss and accuracy for given number of epochs, batch size and number of samples
    print(f'Epoch : {epoch+1} / {epochs}')
    print(f'Time to complete {epoch+1} is {dt}')
    print(f'Train Loss: {train_loss : .4f} | Train Accuracy: {train_acc * 100 : .4f}%')
    print(f'Valid Loss: {valid_loss : .4f} | Valid Accuracy: {valid_acc * 100 : .4f}%')
    print()

  return train_loss_history, train_acc_history, valid_loss_history, valid_acc_history


In [14]:
# Metadata for model
hyperparameters = dict(
    epochs = 10,
    output_dim = 10,
    h_sizes = [16 * 32 * 32],
    batch_norm = False,
    dprob = [0],
    batch_size = 25,
    learning_rate = 1e-2,
    dataset="CIFAR10",
    architecture="CustomDeepNetwork",
    log_interval = 1,
    log_batch = False,
    file_model = lecture_folder/'Q1.pt',
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu'),
    non_linearity=F.relu
   )

non_linearity = F.relu 
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')


In [15]:
# Initilialize wandb
wandb.init(name = 'Q1', project = 'DL_Course_HW5', config = hyperparameters)

[34m[1mwandb[0m: Currently logged in as: [33mymegan[0m (use `wandb login --relogin` to force relogin)


In [16]:
# Dataloader, loss function, model, optimizer, and weight initializer for overfitting
# Fix seed value
SEED = 2345
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

# Data Loader
train_loader = torch.utils.data.DataLoader(train_subset, batch_size=wandb.config.batch_size, shuffle = True)
valid_loader = torch.utils.data.DataLoader(validset, batch_size=wandb.config.batch_size, shuffle = False)
# test_loader = torch.utils.data.DataLoader(testset, batch_size=wandb.config.batch_size,   shuffle = False)

# cross entropy loss function
loss_function = nn.CrossEntropyLoss()

# device 
model = CustomDeepNetwork(wandb.config.output_dim, wandb.config.h_sizes, wandb.config.dprob, non_linearity, wandb.config.batch_norm)

def init_weights(m):
  if type(m) == nn.Linear:
        torch.nn.init.kaiming_normal_(m.weight)
        torch.nn.init.zeros_(m.bias)

#model.apply(init_weights) # No special initialization defaults to lecun's

# put model to GPUs
model.to(wandb.config.device)

# Intialize stochiastic gradient descent optimizer
optimizer = torch.optim.SGD(model.parameters(), lr = wandb.config.learning_rate, momentum=0.9)

NameError: ignored

In [None]:
wandb.watch(model, log = 'all', log_freq=1, log_graph=True)

[34m[1mwandb[0m: logging graph, to disable use `wandb.watch(log_graph=False)`


[<wandb.wandb_torch.TorchGraph at 0x7f6943a93250>]

In [None]:
example_ct_train, batch_ct_train, example_ct_valid, batch_ct_valid = 0, 0, 0, 0
train_loss_history, train_acc_history, valid_loss_history, valid_acc_history = train_loop(train_loader, valid_loader, model, loss_function, optimizer, 
                                                                                          wandb.config.epochs, wandb.config.device,
                                                                                          wandb.config.file_model)

Validation loss has decreased (inf --> 2.299154). Saving Model...
Epoch : 1 / 10
Time to complete 1 is 0:00:07.204882
Train Loss:  2.3076 | Train Accuracy:  8.0000%
Valid Loss:  2.2992 | Valid Accuracy:  10.1500%

Validation loss has not decreased (2.299154 --> 2.327588). Not Saving Model...
Epoch : 2 / 10
Time to complete 2 is 0:00:07.093627
Train Loss:  2.2670 | Train Accuracy:  16.0000%
Valid Loss:  2.3276 | Valid Accuracy:  10.1000%

Validation loss has not decreased (2.299154 --> 2.328787). Not Saving Model...
Epoch : 3 / 10
Time to complete 3 is 0:00:07.031868
Train Loss:  2.2053 | Train Accuracy:  16.0000%
Valid Loss:  2.3288 | Valid Accuracy:  10.1000%

Validation loss has not decreased (2.299154 --> 2.309868). Not Saving Model...
Epoch : 4 / 10
Time to complete 4 is 0:00:07.301112
Train Loss:  2.0587 | Train Accuracy:  22.0000%
Valid Loss:  2.3099 | Valid Accuracy:  14.9000%

Validation loss has not decreased (2.299154 --> 2.405445). Not Saving Model...
Epoch : 5 / 10
Time to 

# Question 2 (15 Points): 
In this question, you can experiment with different convent architectures
on CIFAR-10. You can experiment with different architectures, hyper-parameters, loss functions,
and optimizers to train a model that achieves close to 80% accuracy on the CIFAR-
10 validation set within 10 epochs.

In [29]:
# Define custom model using nn.Module()
class CustomDeepNetwork(nn.Module):
  def __init__(self,  output_dim, h_sizes, dprob, non_linearity, batch_norm):
        
    super().__init__()

    self.conv_layer = nn.Sequential(
        nn.Conv2d(3, 32, kernel_size=(3,3), padding=1),
        nn.ReLU(),
        nn.BatchNorm2d(32),
        nn.Conv2d(32, 64, kernel_size=(3,3), padding=1),
        nn.ReLU(),
        # nn.MaxPool2d(2, 2),
        nn.BatchNorm2d(64),

        nn.Conv2d(64, 128, kernel_size =(3,3), padding=1),
        nn.ReLU(),
        nn.BatchNorm2d(128),
        nn.Conv2d(128, 128, kernel_size=(3,3), padding=1),
        nn.ReLU(),
        nn.BatchNorm2d(128)
    )

    self.flatten = nn.Flatten()

    self.fullyconnected = nn.Linear(128 * 32 * 32, 512)

    self.fullyconnected2 = nn.Linear(512, 128)

    self.fullyconnected3 = nn.Linear(128, 64)

    self.fullyconnected4 = nn.Linear(64, 32)

    self.fullyconnected5 = nn.Linear(32, 10)

  def forward(self,x):

    #conv layer
    out = self.conv_layer(x)

    out = self.flatten(out)

    #fully connected layers
    x = F.relu(self.fullyconnected(out))

    x = F.relu(self.fullyconnected2(x))

    x = F.relu(self.fullyconnected3(x))

    x = F.relu(self.fullyconnected4(x))

    x = F.relu(self.fullyconnected5(x))

    # we are not using softmax function in the forward passs
    # nn.crossentropy loss (which we will use to define our loss) combines  nn.LogSoftmax() and nn.NLLLoss() in one single class
    return x  

In [30]:
# Metadata for model
hyperparameters = dict(
    epochs = 10,
    output_dim = 10,
    h_sizes = [64 * 32 * 32],
    batch_norm = True,
    dprob = [0],
    batch_size = 25,
    learning_rate = 1e-2,
    dataset="CIFAR10",
    architecture="CustomDeepNetwork",
    log_interval = 1,
    log_batch = False,
    file_model = lecture_folder/'Q2_exp13.pt',
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu'),
    non_linearity=F.relu
   )

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
non_linearity = F.relu 

In [31]:
# Initilialize wandb
wandb.init(name = 'Q2', project = 'DL_Course_HW5', config = hyperparameters)

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Train epoch Acc :,█▁▁▁▁▁▁▁▁▁
Train epoch Loss :,█▁▁▁▁▁▁▁▁▁
Valid epoch Acc :,▁▁▁▁▁▁▁▁▁▁
Valid epoch Loss :,▁▁▁▁▁▁▁▁▁▁

0,1
Train epoch Acc :,0.10067
Train epoch Loss :,2.30259
Valid epoch Acc :,0.0973
Valid epoch Loss :,2.30259


In [34]:
# Dataloader, loss function, model, optimizer, and weight initializer for overfitting
# Fix seed value
SEED = 2345
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

# Data Loader
train_loader = torch.utils.data.DataLoader(trainset, batch_size=wandb.config.batch_size, shuffle = True)
valid_loader = torch.utils.data.DataLoader(validset, batch_size=wandb.config.batch_size, shuffle = False)
# test_loader = torch.utils.data.DataLoader(testset, batch_size=wandb.config.batch_size,   shuffle = False)

# cross entropy loss function
loss_function = nn.CrossEntropyLoss()

# device 
model = CustomDeepNetwork(wandb.config.output_dim, wandb.config.h_sizes, wandb.config.dprob, 
                              non_linearity, wandb.config.batch_norm)

def init_weights(m):
  if type(m) == nn.Linear:
        torch.nn.init.xavier_normal_(m.weight)
        torch.nn.init.zeros_(m.bias)

model.apply(init_weights) 

# put model to GPUs
model.to(wandb.config.device)

# Intialize stochiastic gradient descent optimizer
optimizer = torch.optim.SGD(model.parameters(), lr = wandb.config.learning_rate)

In [35]:
wandb.watch(model, log = 'all', log_freq=1, log_graph=True)

[34m[1mwandb[0m: logging graph, to disable use `wandb.watch(log_graph=False)`


[<wandb.wandb_torch.TorchGraph at 0x7fa1ce24f050>]

In [None]:
example_ct_train, batch_ct_train, example_ct_valid, batch_ct_valid = 0, 0, 0, 0
train_loss_history, train_acc_history, valid_loss_history, valid_acc_history = train_loop(train_loader, valid_loader, model, loss_function, optimizer, 
                                                                                          wandb.config.epochs, wandb.config.device,
                                                                                          wandb.config.file_model)

Validation loss has decreased (inf --> 1.108236). Saving Model...
Epoch : 1 / 10
Time to complete 1 is 1:07:43.504608
Train Loss:  1.4774 | Train Accuracy:  50.1825%
Valid Loss:  1.1082 | Valid Accuracy:  62.2400%

Validation loss has decreased (1.108236 --> 0.906543). Saving model...
Epoch : 2 / 10
Time to complete 2 is 1:07:43.579365
Train Loss:  0.8840 | Train Accuracy:  69.6300%
Valid Loss:  0.9065 | Valid Accuracy:  68.0200%

Validation loss has not decreased (0.906543 --> 0.914927). Not Saving Model...
Epoch : 3 / 10
Time to complete 3 is 1:07:46.913750
Train Loss:  0.5157 | Train Accuracy:  82.5325%
Valid Loss:  0.9149 | Valid Accuracy:  69.8000%

Validation loss has not decreased (0.906543 --> 1.046267). Not Saving Model...
Epoch : 4 / 10
Time to complete 4 is 1:08:51.232808
Train Loss:  0.2458 | Train Accuracy:  91.8125%
Valid Loss:  1.0463 | Valid Accuracy:  69.8900%

Validation loss has not decreased (0.906543 --> 1.200690). Not Saving Model...
Epoch : 5 / 10
Time to complet