# **HW 6B - Custom Data**






# Import libraries

In [2]:
# Install wandb and update it to the latest version
%%capture
!pip install wandb --upgrade

In [3]:
# Importing the necessary libraries
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import torch.nn.functional as F
from torchsummary import summary
from torch.utils.data import Dataset, random_split, DataLoader

from torch.optim.lr_scheduler import ReduceLROnPlateau, ExponentialLR, CyclicLR, OneCycleLR, StepLR

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix
import random

from datetime import datetime
from pathlib import Path
import pathlib
import plotly.io as pio
pio.renderers.default = 'colab'
import cv2 as cv
from torchvision import models

In [4]:
# Import random function
import random

# Fix seed value
SEED = 2345
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

In [5]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [6]:
data_folder = Path('/content/drive/MyDrive/Data/DL')

In [7]:
folder = Path('/content/drive/MyDrive/Data/DL/HW6B')

In [7]:
lecture_folder = Path('/content/drive/MyDrive/teaching_fall_2021/dl_fall_2021/Lecture_9_CNN_part_2')

We will be using W&B for visualization.

In [8]:
# Import wandb
import wandb

# Login to W&B
wandb.login()

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

# **Custom Dataset**

Now download the dataset using wget command from the link : https://s3.amazonaws.com/fast-ai-imageclas/imagenette2.tgz.
https://s3.amazonaws.com/fast-ai-imageclas/imagewoof.tgz
We will save our file as imagenette.tgz in our local directory.

In [9]:
# Download the dataset
!wget 'https://s3.amazonaws.com/fast-ai-imageclas/imagenette2.tgz' -O 'imagenette.tgz'

--2021-12-15 15:49:01--  https://s3.amazonaws.com/fast-ai-imageclas/imagenette2.tgz
Resolving s3.amazonaws.com (s3.amazonaws.com)... 52.217.204.232
Connecting to s3.amazonaws.com (s3.amazonaws.com)|52.217.204.232|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1557161267 (1.5G) [application/x-tar]
Saving to: ‘imagenette.tgz’


2021-12-15 15:49:38 (40.2 MB/s) - ‘imagenette.tgz’ saved [1557161267/1557161267]



In [10]:
# Extract all the content from tar file
!tar -xzf /content/imagenette.tgz -C /content

Now, we can observe our dataset is present in our local directory and we can start working on it.

Let us first observe how our dataset looks like.

In [11]:
# We will use glob to get files present in our dataset
path = Path('imagenette2/').glob('*')

for x in path:
  print(x)

imagenette2/val
imagenette2/noisy_imagenette.csv
imagenette2/train


We have 3 files present in our dataset folder. We will be working on train and val folder in this module. Train and val folder contains 10 folders each containing images for 10 different classes.

Let us create a dictionary to map the 10 classes with intergers and also create an array which will map integral indices with the actual class names.

In [12]:
# Dictionary to map the 10 classes with intergers
label_dict = {
    'n01440764' : 0,
    'n02102040' : 1,
    'n02979186' : 2,
    'n03000684' : 3,
    'n03028079' : 4,
    'n03394916' : 5,
    'n03417042' : 6,
    'n03425413' : 7,
    'n03445777' : 8,
    'n03888257' : 9
}

# Array to map integral indices with the actual class names
actual_label_dict = [
    'tench',
    'English springer',
    'cassette player',
    'chain saw',
    'church',
    'French horn',
    'garbage truck',
    'gas pump',
    'golf ball',
    'parachute'
]

## **Custom Dataset Class**

Now we will create our custom dataset class which will help to access our data easily.

In [13]:
# Create a custom class for the dataset which inherits Dataset
class ImagenetteDataset(Dataset):
  
  """
  Init Function
  Input: - path (string): Path to the dataset folder.
         - transform (optional): Optional transform to be applied on a sample.
  """
  def __init__(self, path, transform = None):

    # Initialize private member path to the path of the dataset
    self._path = path

    # Initialize private member img_names to store all the path of images in the dataset
    self._img_names = list(map(str, pathlib.Path(self._path).glob('*/*')))

    # Initilize private member len to calculate number of images in the dataset
    self._len = len(self._img_names)

    # Initialize private transform
    self._transform = transform
   
  """
  Len function: It will return the number of elements in the dataset
  """
  def __len__(self):
    return self._len

  """
  Get Item function
  Input: Index
  Output: It will return the image and label at the index.
  """
  def __getitem__(self, index):

    # Get the image at the index and store it in "im" as an array
    im = cv.imread(self._img_names[index])
    im = cv.cvtColor(im, cv.COLOR_BGR2RGB)

    # If transform is not None, apply it to the image
    if self._transform:
      im = self._transform(im)

    # Get label from the base directory of image path at the given index
    label_dir = str(pathlib.Path(self._img_names[index]).resolve().parent).split('/')[-1] 

    # Get the integral label from the dictionary created
    label = label_dict.get(label_dir)

    # Return image and its label
    return im, label

In [107]:
# Create a composition of all the tranforms
trans = transforms.Compose([transforms.ToTensor(),
                            transforms.Resize((224, 224)),
                            # transforms.RandomCrop(160, padding=4), 
                            transforms.RandomHorizontalFlip(),
                            transforms.RandomRotation(30),
                            transforms.ColorJitter(hue=0.2),
                            transforms.ColorJitter(brightness=2),
                            # transforms.Grayscale(), 
                            # transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225) # Use for transforming for inception v3 model
                            transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010), inplace=True) # Use for transforming for resnet and vgg16
])

trans_v = transforms.Compose([transforms.ToTensor(),
                            transforms.Resize((224, 224)), 
                            # transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225) # Use for transforming for inception v3 model
                            transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)) # Use for transforming for resnet and vgg16
])

# Initilize object for train set
train_full = ImagenetteDataset('/content/imagenette2/train', trans) 

# Initilize object for test set
testset = ImagenetteDataset('/content/imagenette2/val', trans_v) 

In [17]:
# print length of train and test set
len(train_full), len(testset)

(9469, 3925)

## Train and Test Dataset

In [108]:
# Transform to convert images to pytorch tensors and normalize the data

trainset, validset = torch.utils.data.random_split(train_full, [6469, 3000], generator=torch.Generator().manual_seed(42) )


In [109]:
# n sample points
train_sample_size = 1000
valid_sample_size = 200

# Getting n random indices
train_subset_indices = random.sample(range(0, len(trainset)), train_sample_size)
valid_subset_indices = random.sample(range(0, len(testset)), valid_sample_size)

# Getting subset of dataset
train_subset = torch.utils.data.Subset(trainset, train_subset_indices)
valid_subset = torch.utils.data.Subset(validset, valid_subset_indices)

In [20]:
len(train_subset )

1000

In [21]:
len(valid_subset )

200

# Model

In [17]:
# load model with random weights
vgg16_model = models.vgg16(pretrained=True)

Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth


  0%|          | 0.00/528M [00:00<?, ?B/s]

In [18]:
print(vgg16_model)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [19]:
vgg16_mod = models.vgg16(pretrained=True)

count = 0
for child in vgg16_mod.children():
    count+=1
    print(count)
    print()
    print(child)


1

Sequential(
  (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU(inplace=True)
  (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (3): ReLU(inplace=True)
  (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (6): ReLU(inplace=True)
  (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (8): ReLU(inplace=True)
  (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (11): ReLU(inplace=True)
  (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (13): ReLU(inplace=True)
  (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (15): ReLU(inplace=True)
  (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (17): Conv2d(256, 512, kernel

In [20]:
# load model with random weights
model_resnet50 = models.resnet50(pretrained=True)

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth


  0%|          | 0.00/97.8M [00:00<?, ?B/s]

In [21]:
print(model_resnet50)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [22]:
resnet50 = models.resnet50(pretrained=True)

count = 0
for child in resnet50.children():
    count+=1
    print(count)
    print()
    print(child)


1

Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
2

BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
3

ReLU(inplace=True)
4

MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
5

Sequential(
  (0): Bottleneck(
    (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (downsample): Sequential(
      (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (1): BatchNorm2d(256, eps=1e-05, momentum=0.1,

# Training Functions

## Training Epoch 

In [23]:
def train(train_loader, model, optimizer, loss_function, log_batch, log_interval, grad_clipping, max_norm):

  """ 
  Function for training the model in each epoch
  Input: iterator for train dataset, initial weights and bias, epochs, learning rate.
  Output: final weights, bias, train loss, train accuracy
  """
  # initilalize variables as global
  # these counts will be updated every epoch
  global example_ct_train
  global batch_ct_train

  # Training Loop loop
  # Initialize train_loss at the he start of the epoch
  running_train_loss = 0
  running_train_correct = 0
  
  # put the model in training mode
  model.train()

  # Iterate on batches from the dataset using train_loader
  for input, targets in train_loader:
    
    # move inputs and outputs to GPUs
    input = input.to(device)
    targets = targets.to(device)

    # Forward pass
    output = model(input)
    loss = loss_function(output, targets)

    # Correct prediction
    y_pred = torch.argmax(output, dim = 1)
    correct = torch.sum(y_pred == targets)

    example_ct_train +=  len(targets)
    batch_ct_train += 1

    # set gradients to zero 
    optimizer.zero_grad()

    # Backward pass
    loss.backward()

    # Gradient Clipping
    if grad_clipping:
      nn.utils.clip_grad_norm_(model.parameters(), max_norm=max_norm, norm_type=2)

    # Update parameters using their gradient
    optimizer.step()

    #scheduler.step()
          
    # Add train loss of a batch 
    running_train_loss += loss.item()

    # Add Corect counts of a batch
    running_train_correct += correct

    # log batch loss and accuracy
    if log_batch:
      if ((batch_ct_train + 1) % log_interval) == 0:
        wandb.log({f"Train Batch Loss  :": loss})
        wandb.log({f"Train Batch Acc :": correct/len(targets)})
        #print(f'Learning rate: {scheduler.get_last_lr()}')

    
    
  # Calculate mean train loss for the whole dataset for a particular epoch
  train_loss = running_train_loss/len(train_loader)



  # Calculate accuracy for the whole dataset for a particular epoch
  train_acc = running_train_correct/len(train_loader.dataset)

  return train_loss, train_acc

## Validation/Test Epoch

In [24]:
def valid(loader, model, optimizer, loss_function, log_batch, log_interval):

  """ 
  Function for training the model and plotting the graph for train & valid loss vs epoch.
  Input: iterator for train dataset, initial weights and bias, epochs, learning rate, batch size.
  Output: final weights, bias and train loss and valid loss for each epoch.
  """

  # initilalize variables as global
  # these counts will be updated every epoch
  global example_ct_valid
  global batch_ct_valid

  # Validation loop
  # Initialize train_loss at the he strat of the epoch
  running_valid_loss = 0
  running_valid_correct = 0
  
  # put the model in evaluation mode
  model.eval()

  with torch.no_grad():
    for input,targets in loader:

      # move inputs and outputs to GPUs
      input = input.to(device)
      targets = targets.to(device)

      # Forward pass
      output = model(input)
      loss = loss_function(output,targets)

      # Correct Predictions
      y_pred = torch.argmax(output, dim = 1)
      correct = torch.sum(y_pred == targets)

      # count of images and batches
      example_ct_valid +=  len(targets)
      batch_ct_valid += 1

      # Add valid loss of a batch 
      running_valid_loss += loss.item()

      # Add correct count for each batch
      running_valid_correct += correct

      # log batch loss and accuracy
      if log_batch:
        if ((batch_ct_valid + 1) % log_interval) == 0:
          wandb.log({f"Valid Batch Loss  :": loss})
          wandb.log({f"Valid Batch Accuracy :": correct/len(targets)})


    # Calculate mean valid loss for the whole dataset for a particular epoch
    valid_loss = running_valid_loss/len(valid_loader)

    # scheduler step
    # scheduler.step(valid_loss)
    # scheduler.step()

    # Calculate accuracy for the whole dataset for a particular epoch
    valid_acc = running_valid_correct/len(valid_loader.dataset)
    
  return valid_loss, valid_acc

##  Model Training Loop


In [25]:
def train_loop(train_loader, valid_loader, model, loss_function, optimizer, epochs, device, patience, early_stopping,
               file_model):

  '''
  model: specify your model for training
  criterion: loss function 
  optimizer: optimizer like SGD , ADAM etc.
  train loader: function to carete batches for training data
  loader : function to create batches for valid data set
  file_model : specify file name for saving your model. This way we can upload the model weights from file. We will not to run model again.
  

  '''
  # Create lists to store train and valid loss at each epoch

  train_loss_history = []
  valid_loss_history = []
  train_acc_history = []
  valid_acc_history = []
  delta = 0
  best_score = None
  valid_loss_min = np.Inf
  counter_early_stop=0
  early_stop=False


  # Iterate for the given number of epochs
  for epoch in range(epochs):
    t0 = datetime.now()
    # Get train loss and accuracy for one epoch

    train_loss, train_acc = train(train_loader, model, optimizer, loss_function, 
                                  wandb.config.log_batch, wandb.config.log_interval,
                                  wandb.config.grad_clipping, wandb.config.max_norm)
    valid_loss, valid_acc = valid(valid_loader, model, optimizer, loss_function,
                                    wandb.config.log_batch, wandb.config.log_interval)

    dt = datetime.now() - t0

    # Save history of the Losses and accuracy
    train_loss_history.append(train_loss)
    train_acc_history.append(train_acc)
    valid_loss_history.append(valid_loss)
    valid_acc_history.append(valid_acc)

    if early_stopping:
      score = -valid_loss
      if best_score is None:
        best_score=score
        print(f'Validation loss has decreased ({valid_loss_min:.6f} --> {valid_loss:.6f}). Saving Model...')
        torch.save(model.state_dict(), file_model)
        valid_loss_min = valid_loss

      elif score < best_score + delta:
        counter_early_stop += 1
        print(f'Early stoping counter: {counter_early_stop} out of {patience}')
        if counter_early_stop > patience:
          early_stop = True

      
      else:
        best_score = score
        print(f'Validation loss has decreased ({valid_loss_min:.6f} --> {valid_loss:.6f}). Saving model...')
        torch.save(model.state_dict(), file_model)
        counter_early_stop=0
        valid_loss_min = valid_loss

      if early_stop:
        print('Early Stopping')
        break

    else:

      score = -valid_loss
      if best_score is None:
        best_score=score
        print(f'Validation loss has decreased ({valid_loss_min:.6f} --> {valid_loss:.6f}). Saving Model...')
        torch.save(model.state_dict(), file_model)
        valid_loss_min = valid_loss

      elif score < best_score + delta:
        print(f'Validation loss has not decreased ({valid_loss_min:.6f} --> {valid_loss:.6f}). Not Saving Model...')
      
      else:
        best_score = score
        print(f'Validation loss has decreased ({valid_loss_min:.6f} --> {valid_loss:.6f}). Saving model...')
        torch.save(model.state_dict(), file_model)
        valid_loss_min = valid_loss



    # Log the train and valid loss to W&B
    wandb.log({f"Train epoch Loss :": train_loss, f"Valid epoch Loss :": valid_loss })
    wandb.log({f"Train epoch Acc :": train_acc, f"Valid epoch Acc :": valid_acc})



    # Print the train loss and accuracy for given number of epochs, batch size and number of samples
    print(f'Epoch : {epoch+1} / {epochs}')
    print(f'Time to complete {epoch+1} is {dt}')
    # print(f'Learning rate: {scheduler.get_last_lr()}')
    # print(f'Learning rate: {scheduler._last_lr[0]}')
    print(f'Train Loss: {train_loss : .4f} | Train Accuracy: {train_acc * 100 : .4f}%')
    print(f'Valid Loss: {valid_loss : .4f} | Valid Accuracy: {valid_acc * 100 : .4f}%')
    print()
    torch.cuda.empty_cache()

  return train_loss_history, train_acc_history, valid_loss_history, valid_acc_history


#Model Training

## **Meta data**



In [110]:
hyperparameters = dict(
    epochs = 10,
    output_dim = 10, 
    
    batch_size = 64,
    learning_rate = 0.01,
    dataset="CustomDataset",
    architecture="CNN",
    log_interval = 100,
    log_batch = True,
    file_model = folder/'HW6b_resnet50_5.pt',
    grad_clipping = True,
    max_norm = 1,
    patience = 5,
    early_stopping = True,
    weight_decay = 0,
    scheduler_factor = 0.5,
    scheduler_patience = 0,
   )

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [85]:
device

device(type='cuda', index=0)

## Initialize wandb

In [111]:
wandb.init(name = 'HW6b_resnet50_5', project = 'HW6b_imagnet', config = hyperparameters)

VBox(children=(Label(value=' 0.02MB of 0.02MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Train Batch Acc :,▁▆▆▅████▆█
Train Batch Loss :,█▄▂▄▁▁▁▁▂▁
Train epoch Acc :,▁▇▇███████
Train epoch Loss :,█▂▂▁▁▁▁▁▁▁
Valid Batch Accuracy :,█▅▁▅
Valid Batch Loss :,▁▄█▇
Valid epoch Acc :,▁▇▆▄▇▄█▆▄▆
Valid epoch Loss :,█▂▂▃▄▄▁▂▄▂

0,1
Train Batch Acc :,1.0
Train Batch Loss :,0.00502
Train epoch Acc :,0.99675
Train epoch Loss :,0.0105
Valid Batch Accuracy :,0.98438
Valid Batch Loss :,0.05527
Valid epoch Acc :,0.982
Valid epoch Loss :,0.05525


All results here: https://wandb.ai/ymegan/HW6b_imagnet?workspace=user-ymegan

In [112]:
wandb.config.device = device
print(wandb.config.device )

cuda:0


## Specify Dataloader, Loss_function, Model, Optimizer, Weight Initialization

In [113]:
# Fix seed value
SEED = 2345
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

# Data Loader
train_loader = torch.utils.data.DataLoader(trainset, batch_size=wandb.config.batch_size, shuffle = True)
valid_loader = torch.utils.data.DataLoader(validset, batch_size=wandb.config.batch_size, shuffle = False)
test_loader = torch.utils.data.DataLoader(testset, batch_size=wandb.config.batch_size,   shuffle = False)

# cross entropy loss function
loss_function = nn.CrossEntropyLoss()

# # model 
# vgg16_mod = models.vgg16(pretrained=True)
# vgg16_mod.classifier[6].out_features = wandb.config.output_dim

# # freeze convolution weights
# for param in vgg16_mod.features.parameters():
#     param.requires_grad = False

# for param in vgg16_mod.features[:17].parameters():
#    param.requires_grad = False

# model = vgg16_mod 


resnet50_mod2 = models.resnet50(pretrained=True)
resnet50_mod2.fc = nn.Sequential(
    nn.Linear(2048, wandb.config.output_dim)
)
# resnet50_task4.Linear.out_features = wandb.config.output_dim

count = 0
for child in resnet50_mod2.children():
  count+=1
  if count < 6:
    for param in child.parameters():
        param.requires_grad = False

model = resnet50_mod2

def init_weights(m):
  if type(m) == nn.Conv2d:
        torch.nn.init.kaiming_normal_(m.weight)
        torch.nn.init.zeros_(m.bias)

  if type(m) == nn.Conv2d:
        torch.nn.init.kaiming_normal_(m.weight)
        torch.nn.init.zeros_(m.bias)

        
# apply initialization recursively  to all modules
# model.apply(init_weights)

# wandb.config.init_weights = init_weights

# put model to GPUs
model.to(wandb.config.device)

# Intialize stochiastic gradient descent optimizer
optimizer = torch.optim.SGD(model.parameters(), lr = wandb.config.learning_rate, weight_decay=wandb.config.weight_decay, momentum = 0.9)
# optimizer = torch.optim.Adam(model.parameters(), lr = wandb.config.learning_rate, weight_decay=wandb.config.weight_decay)

wandb.config.optimizer = optimizer

# scheduler = ReduceLROnPlateau(optimizer, mode='min', factor= wandb.config.scheduler_factor, 
#                              patience=wandb.config.scheduler_patience, verbose=True)

# scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=0.2, total_steps=len(train_loader) * 20 , 
#                                                epochs=20, three_phase=True, pct_start = 0.3)

# scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=0.2, total_steps=len(train_loader) * 30 , 
#                                                epochs=25, three_phase=True, pct_start = 0.2)

# scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, 50, eta_min=0, last_epoch=- 1, verbose=True)

scheduler = StepLR(optimizer, gamma=0.1,step_size=7, verbose=True)



Adjusting learning rate of group 0 to 1.0000e-02.


## Sanity Check
- Check the loss without any training. For Cross entropy the expected value will be log(number of classes)

In [37]:
# Fix seed value

SEED = 2345
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

for input, targets in train_loader:
  
  # move inputs and outputs to GPUs
  input = input.to(device)
  targets = targets.to(device)
  model.eval()
  # Forward pass
  output = model(input)
  loss = loss_function(output, targets)
  print(f'Actual loss: {loss}')
  break

print(f'Expected Theoretical loss: {np.log(10)}')



Actual loss: 20.355043411254883
Expected Theoretical loss: 2.302585092994046


## Train Model and Save best model

In [114]:
wandb.watch(model, log = 'all', log_freq=25, log_graph=True)

[34m[1mwandb[0m: logging graph, to disable use `wandb.watch(log_graph=False)`


[<wandb.wandb_torch.TorchGraph at 0x7f319c5441d0>]

*VGG-16*: 
- learning_rate = 0.01
-  OneCycleLR rate scheduler
- Unfreeze classifier layer
- Batch size 64
- Data augmentation random flip

Full run

In [42]:
example_ct_train, batch_ct_train, example_ct_valid, batch_ct_valid = 0, 0, 0, 0
train_loss_history, train_acc_history, valid_loss_history, valid_acc_history = train_loop(train_loader, valid_loader, model, loss_function, optimizer, 
                                                                                          wandb.config.epochs, wandb.config.device,
                                                                                          wandb.config.patience, wandb.config.early_stopping,
                                                                                          wandb.config.file_model)

Validation loss has decreased (inf --> 0.117227). Saving Model...
Epoch : 1 / 10
Time to complete 1 is 0:02:06.156868
Train Loss:  4.4156 | Train Accuracy:  60.5503%
Valid Loss:  0.1172 | Valid Accuracy:  96.8667%

Validation loss has decreased (0.117227 --> 0.059937). Saving model...
Epoch : 2 / 10
Time to complete 2 is 0:02:07.770483
Train Loss:  0.1184 | Train Accuracy:  96.6455%
Valid Loss:  0.0599 | Valid Accuracy:  98.1333%

Validation loss has decreased (0.059937 --> 0.048894). Saving model...
Epoch : 3 / 10
Time to complete 3 is 0:02:08.336156
Train Loss:  0.0533 | Train Accuracy:  98.4232%
Valid Loss:  0.0489 | Valid Accuracy:  98.4333%

Validation loss has decreased (0.048894 --> 0.047965). Saving model...
Epoch : 4 / 10
Time to complete 4 is 0:02:08.994368
Train Loss:  0.0254 | Train Accuracy:  99.2889%
Valid Loss:  0.0480 | Valid Accuracy:  98.4000%

Early stoping counter: 1 out of 5
Epoch : 5 / 10
Time to complete 5 is 0:02:08.492006
Train Loss:  0.0150 | Train Accuracy:  

Run 2:
- StepLR Scheduler

Doesn't converge as quickly as the OneCycleLR scheduler so will stick with OneCycleLR for rest of runs.

In [31]:
example_ct_train, batch_ct_train, example_ct_valid, batch_ct_valid = 0, 0, 0, 0
train_loss_history, train_acc_history, valid_loss_history, valid_acc_history = train_loop(train_loader, valid_loader, model, loss_function, optimizer, 
                                                                                          wandb.config.epochs, wandb.config.device,
                                                                                          wandb.config.patience, wandb.config.early_stopping,
                                                                                          wandb.config.file_model)

Validation loss has decreased (inf --> 0.143946). Saving Model...
Epoch : 1 / 10
Time to complete 1 is 0:02:04.927579
Train Loss:  4.5546 | Train Accuracy:  57.6132%
Valid Loss:  0.1439 | Valid Accuracy:  96.4000%

Validation loss has decreased (0.143946 --> 0.069969). Saving model...
Epoch : 2 / 10
Time to complete 2 is 0:02:06.595926
Train Loss:  0.1312 | Train Accuracy:  96.3209%
Valid Loss:  0.0700 | Valid Accuracy:  97.9333%

Validation loss has decreased (0.069969 --> 0.053808). Saving model...
Epoch : 3 / 10
Time to complete 3 is 0:02:05.925093
Train Loss:  0.0704 | Train Accuracy:  97.8977%
Valid Loss:  0.0538 | Valid Accuracy:  98.2333%

Validation loss has decreased (0.053808 --> 0.047901). Saving model...
Epoch : 4 / 10
Time to complete 4 is 0:02:06.531270
Train Loss:  0.0396 | Train Accuracy:  98.7170%
Valid Loss:  0.0479 | Valid Accuracy:  98.3667%

Early stoping counter: 1 out of 5
Epoch : 5 / 10
Time to complete 5 is 0:02:06.870442
Train Loss:  0.0204 | Train Accuracy:  

Run 3:
- Unfreeze last convolution layer

Gives better train accuracy upon first epoch and better validation accuracy initially than previous runs

In [37]:
example_ct_train, batch_ct_train, example_ct_valid, batch_ct_valid = 0, 0, 0, 0
train_loss_history, train_acc_history, valid_loss_history, valid_acc_history = train_loop(train_loader, valid_loader, model, loss_function, optimizer, 
                                                                                          wandb.config.epochs, wandb.config.device,
                                                                                          wandb.config.patience, wandb.config.early_stopping,
                                                                                          wandb.config.file_model)

Validation loss has decreased (inf --> 0.117227). Saving Model...
Epoch : 1 / 10
Time to complete 1 is 0:02:03.245754
Train Loss:  4.4156 | Train Accuracy:  60.5503%
Valid Loss:  0.1172 | Valid Accuracy:  96.8667%

Validation loss has decreased (0.117227 --> 0.059937). Saving model...
Epoch : 2 / 10
Time to complete 2 is 0:02:05.998283
Train Loss:  0.1184 | Train Accuracy:  96.6455%
Valid Loss:  0.0599 | Valid Accuracy:  98.1333%

Validation loss has decreased (0.059937 --> 0.048894). Saving model...
Epoch : 3 / 10
Time to complete 3 is 0:02:06.194072
Train Loss:  0.0533 | Train Accuracy:  98.4232%
Valid Loss:  0.0489 | Valid Accuracy:  98.4333%

Validation loss has decreased (0.048894 --> 0.047965). Saving model...
Epoch : 4 / 10
Time to complete 4 is 0:02:06.155237
Train Loss:  0.0254 | Train Accuracy:  99.2889%
Valid Loss:  0.0480 | Valid Accuracy:  98.4000%

Early stoping counter: 1 out of 5
Epoch : 5 / 10
Time to complete 5 is 0:02:07.071741
Train Loss:  0.0150 | Train Accuracy:  

Run 4:
- Unfreeze more convolution layers

In [45]:
example_ct_train, batch_ct_train, example_ct_valid, batch_ct_valid = 0, 0, 0, 0
train_loss_history, train_acc_history, valid_loss_history, valid_acc_history = train_loop(train_loader, valid_loader, model, loss_function, optimizer, 
                                                                                          wandb.config.epochs, wandb.config.device,
                                                                                          wandb.config.patience, wandb.config.early_stopping,
                                                                                          wandb.config.file_model)

Validation loss has decreased (inf --> 0.117227). Saving Model...
Epoch : 1 / 10
Time to complete 1 is 0:02:02.869743
Train Loss:  4.4156 | Train Accuracy:  60.5503%
Valid Loss:  0.1172 | Valid Accuracy:  96.8667%

Validation loss has decreased (0.117227 --> 0.059937). Saving model...
Epoch : 2 / 10
Time to complete 2 is 0:02:06.056197
Train Loss:  0.1184 | Train Accuracy:  96.6455%
Valid Loss:  0.0599 | Valid Accuracy:  98.1333%

Validation loss has decreased (0.059937 --> 0.048894). Saving model...
Epoch : 3 / 10
Time to complete 3 is 0:02:05.413762
Train Loss:  0.0533 | Train Accuracy:  98.4232%
Valid Loss:  0.0489 | Valid Accuracy:  98.4333%

Validation loss has decreased (0.048894 --> 0.047965). Saving model...
Epoch : 4 / 10
Time to complete 4 is 0:02:05.629445
Train Loss:  0.0254 | Train Accuracy:  99.2889%
Valid Loss:  0.0480 | Valid Accuracy:  98.4000%

Early stoping counter: 1 out of 5
Epoch : 5 / 10
Time to complete 5 is 0:02:05.930584
Train Loss:  0.0150 | Train Accuracy:  

Run 5:
- Try more different data augmentations to see effects

Decreased accuracy by ~5%

In [55]:
example_ct_train, batch_ct_train, example_ct_valid, batch_ct_valid = 0, 0, 0, 0
train_loss_history, train_acc_history, valid_loss_history, valid_acc_history = train_loop(train_loader, valid_loader, model, loss_function, optimizer, 
                                                                                          wandb.config.epochs, wandb.config.device,
                                                                                          wandb.config.patience, wandb.config.early_stopping,
                                                                                          wandb.config.file_model)

Validation loss has decreased (inf --> 0.397062). Saving Model...
Epoch : 1 / 10
Time to complete 1 is 0:03:09.142722
Train Loss:  3.6680 | Train Accuracy:  50.0541%
Valid Loss:  0.3971 | Valid Accuracy:  87.1333%

Validation loss has decreased (0.397062 --> 0.286883). Saving model...
Epoch : 2 / 10
Time to complete 2 is 0:03:15.383267
Train Loss:  0.4314 | Train Accuracy:  86.6749%
Valid Loss:  0.2869 | Valid Accuracy:  91.0667%

Validation loss has decreased (0.286883 --> 0.252781). Saving model...
Epoch : 3 / 10
Time to complete 3 is 0:03:15.028458
Train Loss:  0.3086 | Train Accuracy:  90.0448%
Valid Loss:  0.2528 | Valid Accuracy:  91.9000%

Validation loss has decreased (0.252781 --> 0.235593). Saving model...
Epoch : 4 / 10
Time to complete 4 is 0:03:13.689657
Train Loss:  0.2742 | Train Accuracy:  91.3124%
Valid Loss:  0.2356 | Valid Accuracy:  92.2000%

Validation loss has decreased (0.235593 --> 0.208803). Saving model...
Epoch : 5 / 10
Time to complete 5 is 0:03:13.305140
Tr

Resnet 50:
- LR: 0.01
- OneCycle Scheduler
- batch size 64
- Keep previous data augmentations from vgg16 model (will change up later)
- Unfreeze multiple layers

In [66]:
example_ct_train, batch_ct_train, example_ct_valid, batch_ct_valid = 0, 0, 0, 0
train_loss_history, train_acc_history, valid_loss_history, valid_acc_history = train_loop(train_loader, valid_loader, model, loss_function, optimizer, 
                                                                                          wandb.config.epochs, wandb.config.device,
                                                                                          wandb.config.patience, wandb.config.early_stopping,
                                                                                          wandb.config.file_model)

Validation loss has decreased (inf --> 0.106111). Saving Model...
Epoch : 1 / 10
Time to complete 1 is 0:02:36.035811
Train Loss:  0.5487 | Train Accuracy:  84.6808%
Valid Loss:  0.1061 | Valid Accuracy:  96.3667%

Validation loss has decreased (0.106111 --> 0.085336). Saving model...
Epoch : 2 / 10
Time to complete 2 is 0:02:35.725800
Train Loss:  0.1172 | Train Accuracy:  96.4137%
Valid Loss:  0.0853 | Valid Accuracy:  97.0667%

Validation loss has decreased (0.085336 --> 0.079990). Saving model...
Epoch : 3 / 10
Time to complete 3 is 0:02:36.182165
Train Loss:  0.0775 | Train Accuracy:  97.6194%
Valid Loss:  0.0800 | Valid Accuracy:  96.9667%

Early stoping counter: 1 out of 5
Epoch : 4 / 10
Time to complete 4 is 0:02:36.379399
Train Loss:  0.0621 | Train Accuracy:  98.1450%
Valid Loss:  0.0818 | Valid Accuracy:  97.4667%

Early stoping counter: 2 out of 5
Epoch : 5 / 10
Time to complete 5 is 0:02:35.923250
Train Loss:  0.0415 | Train Accuracy:  98.7015%
Valid Loss:  0.1038 | Valid 

Run 2:
- StepLR Scheduler

Increased accuracy on test dataset so will keep StepLR scheduler for rest of runs

In [79]:
example_ct_train, batch_ct_train, example_ct_valid, batch_ct_valid = 0, 0, 0, 0
train_loss_history, train_acc_history, valid_loss_history, valid_acc_history = train_loop(train_loader, valid_loader, model, loss_function, optimizer, 
                                                                                          wandb.config.epochs, wandb.config.device,
                                                                                          wandb.config.patience, wandb.config.early_stopping,
                                                                                          wandb.config.file_model)

Validation loss has decreased (inf --> 0.105932). Saving Model...
Epoch : 1 / 10
Time to complete 1 is 0:02:34.543860
Train Loss:  0.5603 | Train Accuracy:  85.3300%
Valid Loss:  0.1059 | Valid Accuracy:  96.8667%

Validation loss has decreased (0.105932 --> 0.077564). Saving model...
Epoch : 2 / 10
Time to complete 2 is 0:02:35.229491
Train Loss:  0.1244 | Train Accuracy:  96.3055%
Valid Loss:  0.0776 | Valid Accuracy:  97.6333%

Early stoping counter: 1 out of 5
Epoch : 3 / 10
Time to complete 3 is 0:02:35.537040
Train Loss:  0.0822 | Train Accuracy:  97.6658%
Valid Loss:  0.0791 | Valid Accuracy:  97.3333%

Early stoping counter: 2 out of 5
Epoch : 4 / 10
Time to complete 4 is 0:02:36.491942
Train Loss:  0.0622 | Train Accuracy:  98.1605%
Valid Loss:  0.0814 | Valid Accuracy:  97.4333%

Early stoping counter: 3 out of 5
Epoch : 5 / 10
Time to complete 5 is 0:02:35.715657
Train Loss:  0.0503 | Train Accuracy:  98.4696%
Valid Loss:  0.0800 | Valid Accuracy:  97.5000%

Validation loss 

Run 3:
- Unfreeze more layers

Converges faster and increased accuracy

In [90]:
example_ct_train, batch_ct_train, example_ct_valid, batch_ct_valid = 0, 0, 0, 0
train_loss_history, train_acc_history, valid_loss_history, valid_acc_history = train_loop(train_loader, valid_loader, model, loss_function, optimizer, 
                                                                                          wandb.config.epochs, wandb.config.device,
                                                                                          wandb.config.patience, wandb.config.early_stopping,
                                                                                          wandb.config.file_model)

Validation loss has decreased (inf --> 0.081354). Saving Model...
Epoch : 1 / 10
Time to complete 1 is 0:02:38.292701
Train Loss:  0.5392 | Train Accuracy:  86.3967%
Valid Loss:  0.0814 | Valid Accuracy:  97.4333%

Validation loss has decreased (0.081354 --> 0.057273). Saving model...
Epoch : 2 / 10
Time to complete 2 is 0:02:39.265615
Train Loss:  0.0979 | Train Accuracy:  97.2948%
Valid Loss:  0.0573 | Valid Accuracy:  98.2667%

Early stoping counter: 1 out of 5
Epoch : 3 / 10
Time to complete 3 is 0:02:38.721955
Train Loss:  0.0608 | Train Accuracy:  98.1605%
Valid Loss:  0.0601 | Valid Accuracy:  98.1333%

Early stoping counter: 2 out of 5
Epoch : 4 / 10
Time to complete 4 is 0:02:39.299684
Train Loss:  0.0432 | Train Accuracy:  98.9334%
Valid Loss:  0.0609 | Valid Accuracy:  98.0333%

Early stoping counter: 3 out of 5
Epoch : 5 / 10
Time to complete 5 is 0:02:40.399036
Train Loss:  0.0336 | Train Accuracy:  98.9952%
Valid Loss:  0.0691 | Valid Accuracy:  98.0000%

Early stoping co

Run 4:
- Unfreeze more layers

In [102]:
example_ct_train, batch_ct_train, example_ct_valid, batch_ct_valid = 0, 0, 0, 0
train_loss_history, train_acc_history, valid_loss_history, valid_acc_history = train_loop(train_loader, valid_loader, model, loss_function, optimizer, 
                                                                                          wandb.config.epochs, wandb.config.device,
                                                                                          wandb.config.patience, wandb.config.early_stopping,
                                                                                          wandb.config.file_model)

Validation loss has decreased (inf --> 0.077966). Saving Model...
Epoch : 1 / 10
Time to complete 1 is 0:02:45.435584
Train Loss:  0.5391 | Train Accuracy:  86.2421%
Valid Loss:  0.0780 | Valid Accuracy:  97.6333%

Validation loss has decreased (0.077966 --> 0.054258). Saving model...
Epoch : 2 / 10
Time to complete 2 is 0:02:44.902152
Train Loss:  0.0953 | Train Accuracy:  97.3721%
Valid Loss:  0.0543 | Valid Accuracy:  98.4333%

Early stoping counter: 1 out of 5
Epoch : 3 / 10
Time to complete 3 is 0:02:45.238432
Train Loss:  0.0578 | Train Accuracy:  98.3769%
Valid Loss:  0.0544 | Valid Accuracy:  98.2333%

Early stoping counter: 2 out of 5
Epoch : 4 / 10
Time to complete 4 is 0:02:44.536908
Train Loss:  0.0411 | Train Accuracy:  98.9334%
Valid Loss:  0.0599 | Valid Accuracy:  98.0000%

Early stoping counter: 3 out of 5
Epoch : 5 / 10
Time to complete 5 is 0:02:44.677057
Train Loss:  0.0277 | Train Accuracy:  99.1343%
Valid Loss:  0.0613 | Valid Accuracy:  98.3667%

Early stoping co

Run 5:
- Different data augmentations to see effects

In [116]:
example_ct_train, batch_ct_train, example_ct_valid, batch_ct_valid = 0, 0, 0, 0
train_loss_history, train_acc_history, valid_loss_history, valid_acc_history = train_loop(train_loader, valid_loader, model, loss_function, optimizer, 
                                                                                          wandb.config.epochs, wandb.config.device,
                                                                                          wandb.config.patience, wandb.config.early_stopping,
                                                                                          wandb.config.file_model)

Validation loss has decreased (inf --> 0.173655). Saving Model...
Epoch : 1 / 10
Time to complete 1 is 0:02:49.420748
Train Loss:  0.7036 | Train Accuracy:  81.0326%
Valid Loss:  0.1737 | Valid Accuracy:  94.4667%

Validation loss has decreased (0.173655 --> 0.158583). Saving model...
Epoch : 2 / 10
Time to complete 2 is 0:02:49.986210
Train Loss:  0.1772 | Train Accuracy:  94.8988%
Valid Loss:  0.1586 | Valid Accuracy:  95.4000%

Validation loss has decreased (0.158583 --> 0.136391). Saving model...
Epoch : 3 / 10
Time to complete 3 is 0:02:50.675918
Train Loss:  0.1345 | Train Accuracy:  96.0581%
Valid Loss:  0.1364 | Valid Accuracy:  95.6667%

Validation loss has decreased (0.136391 --> 0.120892). Saving model...
Epoch : 4 / 10
Time to complete 4 is 0:02:51.599424
Train Loss:  0.1110 | Train Accuracy:  96.7537%
Valid Loss:  0.1209 | Valid Accuracy:  96.0333%

Validation loss has decreased (0.120892 --> 0.114929). Saving model...
Epoch : 5 / 10
Time to complete 5 is 0:02:50.611669
Tr

In [None]:
wandb.finish()

# **Accuracy and Predictions**

Now we have final values for weights and bias after training the model. We will use these values to make predictions on the test dataset.

## Function to get predictions

In [56]:
def get_acc_pred(data_loader, model):
  """ 
  Function to get predictions for a given test set and calculate accuracy.
  Input: Iterator to the test set.
  Output: Prections and Accuracy for test set.
  """
  model.eval()
  with torch.no_grad():
    # Array to store predicted labels
    predictions = torch.Tensor()
    predictions = predictions.to(device)

    # Array to store actual labels
    y = torch.Tensor()
    y = y.to(device)
    # Iterate over batches from test set
    for input, targets in data_loader:
      
      # move inputs and outputs to GPUs
      input = input.to(device)
      targets = targets.to(device)

      # Calculated the predicted labels
      output = model(input)

      # Choose the label with maximum probability
      indices = torch.argmax(output, dim = 1)

      # Add the predicted labels to the array
      predictions = torch.cat((predictions, indices)) 

      # Add the actual labels to the array
      y = torch.cat((y, targets)) 

    # Check for complete dataset if actual and predicted labels are same or not
    # Calculate accuracy
    acc = (predictions == y).float().mean()

  # Return array containing predictions and accuracy
  return predictions, acc
  

## Load saved model from file 

VGG16 Model

In [57]:
# For vgg16 model run
model_nn =  vgg16_mod
model_nn.to(device)
model_nn.load_state_dict(torch.load(wandb.config.file_model))

<All keys matched successfully>

In [58]:
print(wandb.config.file_model)

/content/drive/MyDrive/Data/DL/HW6B/HW6b_vgg16_5.pt


In [59]:
# Get the prediction and accuracy for the test dataset
predictions, acc_test = get_acc_pred(test_loader, model_nn)

In [60]:
# Print Accuracy for test dataset
print(acc_test * 100)
wandb.config.test_accuracy = acc_test

tensor(97.8344, device='cuda:0')


Achieved 97.8% test accuracy

RESNET 50

In [70]:
# For resnet50 model run
model_nn =  resnet50_mod2
model_nn.to(device)
model_nn.load_state_dict(torch.load(wandb.config.file_model))

<All keys matched successfully>

In [71]:
print(wandb.config.file_model)

/content/drive/MyDrive/Data/DL/HW6B/HW6b_resnet50_1.pt


In [72]:
# Get the prediction and accuracy for the test dataset
predictions, acc_test = get_acc_pred(test_loader, model_nn)

In [73]:
# Print Accuracy for test dataset
print(acc_test * 100)
wandb.config.test_accuracy = acc_test

tensor(98.2675, device='cuda:0')


Achieved 98.27% test accuracy

In [80]:
# For resnet50 model run
model_nn =  resnet50_mod2
model_nn.to(device)
model_nn.load_state_dict(torch.load(wandb.config.file_model))

<All keys matched successfully>

In [81]:
print(wandb.config.file_model)

/content/drive/MyDrive/Data/DL/HW6B/HW6b_resnet50_2.pt


In [82]:
# Get the prediction and accuracy for the test dataset
predictions, acc_test = get_acc_pred(test_loader, model_nn)

In [83]:
# Print Accuracy for test dataset
print(acc_test * 100)
wandb.config.test_accuracy = acc_test

tensor(98.5223, device='cuda:0')


Run 2 achieved 98.5% test accuracy

In [91]:
# For resnet50 model run
model_nn =  resnet50_mod2
model_nn.to(device)
model_nn.load_state_dict(torch.load(wandb.config.file_model))

<All keys matched successfully>

In [92]:
print(wandb.config.file_model)

/content/drive/MyDrive/Data/DL/HW6B/HW6b_resnet50_3.pt


In [93]:
# Get the prediction and accuracy for the test dataset
predictions, acc_test = get_acc_pred(test_loader, model_nn)

In [94]:
# Print Accuracy for test dataset
print(acc_test * 100)
wandb.config.test_accuracy = acc_test

tensor(98.9554, device='cuda:0')


Run 3 achieved 98.95% accuracy

In [103]:
# For resnet50 model run
model_nn =  resnet50_mod2
model_nn.to(device)
model_nn.load_state_dict(torch.load(wandb.config.file_model))

<All keys matched successfully>

In [104]:
print(wandb.config.file_model)

/content/drive/MyDrive/Data/DL/HW6B/HW6b_resnet50_4.pt


In [105]:
# Get the prediction and accuracy for the test dataset
predictions, acc_test = get_acc_pred(test_loader, model_nn)

In [106]:
# Print Accuracy for test dataset
print(acc_test * 100)
wandb.config.test_accuracy = acc_test

tensor(98.9809, device='cuda:0')


Run 4 achieved 98.98% test accuracy

In [117]:
# For resnet50 model run
model_nn =  resnet50_mod2
model_nn.to(device)
model_nn.load_state_dict(torch.load(wandb.config.file_model))

<All keys matched successfully>

In [118]:
print(wandb.config.file_model)

/content/drive/MyDrive/Data/DL/HW6B/HW6b_resnet50_5.pt


In [119]:
# Get the prediction and accuracy for the test dataset
predictions, acc_test = get_acc_pred(test_loader, model_nn)

In [120]:
# Print Accuracy for test dataset
print(acc_test * 100)
wandb.config.test_accuracy = acc_test

tensor(98.6242, device='cuda:0')


Run 5 achieved 98.62% test accuracy