### **Importing Libraries**

In [None]:
!pip3 install wandb -qq

[K     |████████████████████████████████| 1.9 MB 38.2 MB/s 
[K     |████████████████████████████████| 182 kB 63.2 MB/s 
[K     |████████████████████████████████| 168 kB 62.7 MB/s 
[K     |████████████████████████████████| 63 kB 2.0 MB/s 
[K     |████████████████████████████████| 166 kB 7.9 MB/s 
[K     |████████████████████████████████| 166 kB 54.8 MB/s 
[K     |████████████████████████████████| 162 kB 53.8 MB/s 
[K     |████████████████████████████████| 162 kB 47.1 MB/s 
[K     |████████████████████████████████| 158 kB 52.5 MB/s 
[K     |████████████████████████████████| 157 kB 53.2 MB/s 
[K     |████████████████████████████████| 157 kB 57.5 MB/s 
[K     |████████████████████████████████| 157 kB 59.3 MB/s 
[K     |████████████████████████████████| 157 kB 70.9 MB/s 
[K     |████████████████████████████████| 157 kB 68.3 MB/s 
[K     |████████████████████████████████| 157 kB 53.8 MB/s 
[K     |████████████████████████████████| 157 kB 53.7 MB/s 
[K     |██████████████████

In [None]:
# Importing the necessary libraries
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms

import numpy as np
import random

from datetime import datetime
from pathlib import Path
import sys
from types import SimpleNamespace

from IPython.display import Image
import matplotlib.pyplot as plt
%matplotlib inline

import wandb

### **Set seed**

In [None]:
SEED = 2345
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

### **Wanbd Login**

In [None]:
# Login to W&B
wandb.login()

ERROR:wandb.jupyter:Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit: 

··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

### **Data Folder declaration**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
base_folder_drive = Path("/content/drive/MyDrive/Colab Notebooks/Deep Learning/Datasets/")

In [None]:
# base_folder = Path('u:\\OneDrive - The University of Texas at Dallas\\6382\\Datasets')

In [None]:
data_folder = base_folder_drive/'cifar10/'
archive_folder = base_folder_drive/'archive'
model_folder = base_folder_drive/'models/cifar10'
custom_functions = base_folder_drive/'custom-functions'

In [None]:
model_folder.mkdir(exist_ok=True, parents=True)
data_folder.mkdir(exist_ok=True, parents=True)
sys.path.append(str(model_folder))
sys.path

['/content',
 '/env/python',
 '/usr/lib/python37.zip',
 '/usr/lib/python3.7',
 '/usr/lib/python3.7/lib-dynload',
 '',
 '/usr/local/lib/python3.7/dist-packages',
 '/usr/lib/python3/dist-packages',
 '/usr/local/lib/python3.7/dist-packages/IPython/extensions',
 '/root/.ipython',
 '/content/drive/MyDrive/Colab Notebooks/Deep Learning/Datasets/models/cifar10']

### **Transformations and downloading dataset**

In [None]:
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])

valid_transform = transforms.Compose([
    transforms.Resize((224, 224)),                                     
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])

# Download the training_validation data (we will create two subsets - trainset and valset frpm this)
train_set = torchvision.datasets.CIFAR10(root = data_folder, 
                                             train = True, 
                                             transform = train_transform, 
                                             download = True)

valid_set = torchvision.datasets.CIFAR10(root = data_folder, 
                                             train = True, 
                                             transform = valid_transform, 
                                             download = True)

# Download the testing data
testset = torchvision.datasets.CIFAR10(root = data_folder, 
                                            train = False, 
                                            transform = valid_transform, 
                                            download = True)

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


In [None]:
def split_dataset(base_dataset, fraction, seed):
    split_a_size = int(fraction * len(base_dataset))
    split_b_size = len(base_dataset) - split_a_size
    return torch.utils.data.random_split(base_dataset, [split_a_size, split_b_size], 
                                         generator=torch.Generator().manual_seed(seed)
    )

In [None]:
trainset, _ = split_dataset(train_set, 0.8, 42)
_, validset = split_dataset(valid_set, 0.8, 42)

In [None]:
def train(train_loader, loss_function, model, optimizer, device, grad_clipping, max_norm, log_batch, log_interval):

  # Training Loop 

  # initilalize variables as global
  # these counts will be updated every epoch
  global batch_ct_train

  # Initialize train_loss at the he start of the epoch
  running_train_loss = 0
  running_train_correct = 0
  
  # put the model in training mode

  model.train()
  # Iterate on batches from the dataset using train_loader
  for input_, targets in train_loader:
    
    # move inputs and outputs to GPUs
    input_ = input_.to(device)
    targets = targets.to(device)


    # Step 1: Forward Pass: Compute model's predictions 
    output = model(input_)
    
    # Step 2: Compute loss
    loss = loss_function(output, targets)

    # Correct prediction
    y_pred = torch.argmax(output, dim = 1)
    correct = torch.sum(y_pred == targets)

    batch_ct_train += 1

    # Step 3: Backward pass -Compute the gradients
    optimizer.zero_grad()
    loss.backward()

    # Gradient Clipping
    if grad_clipping:
      nn.utils.clip_grad_norm_(model.parameters(), max_norm=max_norm, norm_type=2)

    # Step 4: Update the parameters
    optimizer.step()
          
    # Add train loss of a batch 
    running_train_loss += loss.item()

    # Add Corect counts of a batch
    running_train_correct += correct

    # log batch loss and accuracy
    if log_batch:
      if ((batch_ct_train + 1) % log_interval) == 0:
        wandb.log({f"Train Batch Loss  :": loss})
        wandb.log({f"Train Batch Acc :": correct/len(targets)})

  
  # Calculate mean train loss for the whole dataset for a particular epoch
  train_loss = running_train_loss/len(train_loader)

  # Calculate accuracy for the whole dataset for a particular epoch
  train_acc = running_train_correct/len(train_loader.dataset)
  

  return train_loss, train_acc

In [None]:
def validate(valid_loader, loss_function, model, device, log_batch, log_interval):

  # initilalize variables as global
  # these counts will be updated every epoch
  global batch_ct_valid

  # Validation/Test loop
  # Initialize valid_loss at the he strat of the epoch
  running_val_loss = 0
  running_val_correct = 0

  # put the model in evaluation mode
  model.eval()

  with torch.no_grad():
    for input_,targets in valid_loader:

      # move inputs and outputs to GPUs
      input_ = input_.to(device)
      targets = targets.to(device)

      # Step 1: Forward Pass: Compute model's predictions 
      output = model(input_)

      # Step 2: Compute loss
      loss = loss_function(output, targets)

      # Correct Predictions
      y_pred = torch.argmax(output, dim = 1)
      correct = torch.sum(y_pred == targets)

      batch_ct_valid += 1

      # Add val loss of a batch 
      running_val_loss += loss.item()

      # Add correct count for each batch
      running_val_correct += correct

      # log batch loss and accuracy
      if log_batch:
        if ((batch_ct_valid + 1) % log_interval) == 0:
          wandb.log({f"Valid Batch Loss  :": loss})
          wandb.log({f"Valid Batch Accuracy :": correct/len(targets)})

    # Calculate mean val loss for the whole dataset for a particular epoch
    val_loss = running_val_loss/len(valid_loader)

    # Calculate accuracy for the whole dataset for a particular epoch
    val_acc = running_val_correct/len(valid_loader.dataset)

    # scheduler step
    # scheduler.step(val_loss)
    # scheduler.step()
    
  return val_loss, val_acc

In [None]:
def train_loop(train_loader, valid_loader, model, optimizer, loss_function, epochs, device, patience, early_stopping,
               file_model, save_best_model):
    
  """ 
  Function for training the model and plotting the graph for train & validation loss vs epoch.
  Input: iterator for train dataset, initial weights and bias, epochs, learning rate, batch size.
  Output: final weights, bias and train loss and validation loss for each epoch.
  """

  # Create lists to store train and val loss at each epoch
  train_loss_history = []
  valid_loss_history = []
  train_acc_history = []
  valid_acc_history = []

  # initialize variables for early stopping

  delta = 0
  best_score = None
  valid_loss_min = np.Inf
  counter_early_stop=0
  early_stop=False

  # Iterate for the given number of epochs
  # Step 5: Repeat steps 1 - 4

  for epoch in range(epochs):

    t0 = datetime.now()

    # Get train loss and accuracy for one epoch
    train_loss, train_acc = train(train_loader, loss_function, model, optimizer,
                                  wandb.config.device, wandb.config.grad_clipping, 
                                  wandb.config.max_norm, wandb.config.log_batch, wandb.config.log_interval)
    valid_loss, valid_acc   = validate(valid_loader, loss_function, model, wandb.config.device, wandb.config.log_batch, wandb.config.log_interval)

    dt = datetime.now() - t0

    # Save history of the Losses and accuracy
    train_loss_history.append(train_loss)
    train_acc_history.append(train_acc)

    valid_loss_history.append(valid_loss)
    valid_acc_history.append(valid_acc)

    # Log the train and valid loss to wandb
    wandb.log({f"Train Loss :": train_loss, "epoch": epoch})
    wandb.log({f"Train Acc :": train_acc, "epoch": epoch})

    wandb.log({f"Valid Loss :": valid_loss, "epoch": epoch})
    wandb.log({f"Valid Acc :": valid_acc, "epoch": epoch})

    if early_stopping:
      score = -valid_loss
      if best_score is None:
        best_score=score
        print(f'Validation loss has decreased ({valid_loss_min:.6f} --> {valid_loss:.6f}). Saving Model...')
        torch.save(model.state_dict(), file_model)
        valid_loss_min = valid_loss

      elif score < best_score + delta:
        counter_early_stop += 1
        print(f'Early stoping counter: {counter_early_stop} out of {patience}')
        if counter_early_stop > patience:
          early_stop = True

      
      else:
        best_score = score
        print(f'Validation loss has decreased ({valid_loss_min:.6f} --> {valid_loss:.6f}). Saving model...')
        torch.save(model.state_dict(), file_model)
        counter_early_stop=0
        valid_loss_min = valid_loss

      if early_stop:
        print('Early Stopping')
        break

    elif save_best_model:

      score = -valid_loss
      if best_score is None:
        best_score=score
        print(f'Validation loss has decreased ({valid_loss_min:.6f} --> {valid_loss:.6f}). Saving Model...')
        torch.save(model.state_dict(), file_model)
        valid_loss_min = valid_loss

      elif score < best_score + delta:
        print(f'Validation loss has not decreased ({valid_loss_min:.6f} --> {valid_loss:.6f}). Not Saving Model...')
      
      else:
        best_score = score
        print(f'Validation loss has decreased ({valid_loss_min:.6f} --> {valid_loss:.6f}). Saving model...')
        torch.save(model.state_dict(), file_model)
        valid_loss_min = valid_loss
        
    else:
        torch.save(model.state_dict(), file_model)
    
    # Print the train loss and accuracy for given number of epochs, batch size and number of samples
    print(f'Epoch : {epoch+1} / {epochs}')
    print(f'Time to complete {epoch+1} is {dt}')
    # print(f'Learning rate: {scheduler._last_lr[0]}')
    print(f'Train Loss: {train_loss : .4f} | Train Accuracy: {train_acc * 100 : .4f}%')
    print(f'Valid Loss: {valid_loss : .4f} | Valid Accuracy: {valid_acc * 100 : .4f}%')
    print()
    torch.cuda.empty_cache()

  return train_loss_history, train_acc_history, valid_loss_history, valid_acc_history

In [None]:
def get_acc_pred(data_loader, model, device):
    
  """ 
  Function to get predictions and accuracy for a given data using estimated model
  Input: Data iterator, Final estimated weoights, bias
  Output: Prections and Accuracy for given dataset
  """

  # Array to store predicted labels
  predictions = torch.Tensor() # empty tensor
  predictions = predictions.to(device) # move predictions to GPU

  # Array to store actual labels
  y = torch.Tensor() # empty tensor
  y = y.to(device)

  # put the model in evaluation mode
  model.eval()
  
  # Iterate over batches from data iterator
  with torch.no_grad():
    for input_, targets in data_loader:
      
      # move inputs and outputs to GPUs
      
      input_ = input_.to(device)
      targets = targets.to(device)
      
      # Calculated the predicted labels
      output = model(input_)

      # Choose the label with maximum probability
      prediction = torch.argmax(output, dim = 1)

      # Add the predicted labels to the array
      predictions = torch.cat((predictions, prediction)) 

      # Add the actual labels to the array
      y = torch.cat((y, targets)) 

  # Check for complete dataset if actual and predicted labels are same or not
  # Calculate accuracy
  acc = (predictions == y).float().mean()

  # Return tuple containing predictions and accuracy
  return predictions, acc  

In [None]:
def count_parameters(model): 
  """
  Function to count the number of trainable parameters in the model
  Input: model
  Output: Number of trainable parameters in the input model
  """
  return sum(p.numel() for p in model.parameters() if p.requires_grad)


In [None]:
model_vgg = torchvision.models.vgg16(pretrained=True)

  f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth


  0%|          | 0.00/528M [00:00<?, ?B/s]

In [None]:
# Display the model structure
model_vgg

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [None]:
# Print the number of parameters in the model
print(count_parameters(model_vgg))

138357544


In [None]:
# Task 1
vgg16_task1 = torchvision.models.vgg16(pretrained=True)
vgg16_task1.classifier[6].out_features = 10

# freeze convolution weights
for param in vgg16_task1.features.parameters():
    param.requires_grad = False

### Hyperparameters

In [None]:
hyperparameters = SimpleNamespace(
    epochs = 2,
    batch_size=256,
    learning_rate=0.05,
    dataset="CIFAR10",
    architecture="Pretrained-VGG",
    log_interval = 100,
    log_batch = True,
    file_model = model_folder/'pre_vgg_cifar10_task1.pt',
    grad_clipping = True,
    max_norm = 1,
    patience = 5,
    early_stopping = False,
    weight_decay = 0,
    save_best_model = True,
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    )

In [None]:
wandb.init(name = "exp1", project = 'VGG_HW7_Copy' , config = hyperparameters)

[34m[1mwandb[0m: Currently logged in as: [33mpiyushrs[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [None]:
wandb.config

{'epochs': 2, 'batch_size': 256, 'learning_rate': 0.05, 'dataset': 'CIFAR10', 'architecture': 'Pretrained-VGG', 'log_interval': 100, 'log_batch': True, 'file_model': '/content/drive/MyDrive/Colab Notebooks/Deep Learning/Datasets/models/cifar10/pre_vgg_cifar10_task1.pt', 'grad_clipping': True, 'max_norm': 1, 'patience': 5, 'early_stopping': False, 'weight_decay': 0, 'save_best_model': True, 'device': 'cuda:0'}

In [None]:
train_loader = torch.utils.data.DataLoader(trainset, batch_size=wandb.config.batch_size, shuffle = True)
valid_loader = torch.utils.data.DataLoader(validset, batch_size=wandb.config.batch_size, shuffle = False)
test_loader = torch.utils.data.DataLoader(testset, batch_size=wandb.config.batch_size,   shuffle = False)

In [None]:
model = vgg16_task1
model.to(wandb.config.device)
# model.apply(init_weights)

# loss_function
loss_function = nn.CrossEntropyLoss()
loss_function.to(wandb.config.device)
# optimizer = torch.optim.Adam(model.parameters(), lr = wandb.config.learning_rate)
# optimizer = torch.optim.Adagrad(model.parameters(), lr = wandb.config.learning_rate)
optimizer = torch.optim.SGD(model.parameters(), lr =wandb.config.learning_rate, 
                            weight_decay=wandb.config.weight_decay, momentum = 0.9)
# schedular = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, eta_min = 0.0001, T_max=10)

In [None]:
model

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [None]:
wandb.watch(model)

[]

In [None]:
batch_ct_train, batch_ct_valid = 0, 0
train_loss_history, train_acc_history, valid_loss_history, valid_acc_history = train_loop(train_loader, 
                                                                                          valid_loader, 
                                                                                          model, 
                                                                                          optimizer, 
                                                                                          loss_function, 
                                                                                          wandb.config.epochs, 
                                                                                          wandb.config.device,
                                                                                          wandb.config.patience,
                                                                                          wandb.config.early_stopping,
                                                                                          wandb.config.file_model,
                                                                                          wandb.config.save_best_model
                                                                                       )

Validation loss has decreased (inf --> 0.414534). Saving Model...
Epoch : 1 / 2
Time to complete 1 is 0:08:00.855478
Train Loss:  1.2072 | Train Accuracy:  70.9600%
Valid Loss:  0.4145 | Valid Accuracy:  85.8800%

Validation loss has decreased (0.414534 --> 0.350146). Saving model...
Epoch : 2 / 2
Time to complete 2 is 0:07:59.609661
Train Loss:  0.4104 | Train Accuracy:  85.7075%
Valid Loss:  0.3501 | Valid Accuracy:  87.5200%



In [None]:
# Get the prediction and accuracy for the test dataseta
predictions_test, acc_test = get_acc_pred(test_loader, model, wandb.config.device)

print('Test accuracy', acc_test * 100)

wandb.log({'Best_test_Acc': acc_test})

Test accuracy tensor(86.9200, device='cuda:0')


In [None]:
wandb.finish()

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Best_test_Acc,▁
Train Acc :,▁█
Train Batch Acc :,▁▄█
Train Batch Loss :,█▅▁
Train Loss :,█▁
Valid Acc :,▁█
Valid Loss :,█▁
epoch,▁▁▁▁████

0,1
Best_test_Acc,0.8692
Train Acc :,0.85707
Train Batch Acc :,0.86719
Train Batch Loss :,0.33893
Train Loss :,0.41043
Valid Acc :,0.8752
Valid Loss :,0.35015
epoch,1.0


In [None]:
# Task 2
vgg16_task2 = torchvision.models.vgg16(pretrained=True)
vgg16_task2.classifier[6].out_features = 10

# freeze convolution weights
for param in vgg16_task2.features[:24].parameters():
    param.requires_grad = False

In [None]:
hyperparameters = SimpleNamespace(
    epochs = 2,
    batch_size=256,
    learning_rate=0.05,
    dataset="CIFAR10",
    architecture="Pretrained-VGG",
    log_interval = 100,
    log_batch = True,
    file_model = model_folder/'pre_vgg_cifar10_task2.pt',
    grad_clipping = True,
    max_norm = 1,
    patience = 5,
    early_stopping = False,
    weight_decay = 0,
    save_best_model = True,
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    )

In [None]:
wandb.init(name = "exp2", project = 'VGG_HW7_Copy' , config = hyperparameters)

In [None]:
wandb.config

{'epochs': 2, 'batch_size': 256, 'learning_rate': 0.05, 'dataset': 'CIFAR10', 'architecture': 'Pretrained-VGG', 'log_interval': 100, 'log_batch': True, 'file_model': '/content/drive/MyDrive/Colab Notebooks/Deep Learning/Datasets/models/cifar10/pre_vgg_cifar10_task2.pt', 'grad_clipping': True, 'max_norm': 1, 'patience': 5, 'early_stopping': False, 'weight_decay': 0, 'save_best_model': True, 'device': 'cuda:0'}

In [None]:
train_loader = torch.utils.data.DataLoader(trainset, batch_size=wandb.config.batch_size, shuffle = True)
valid_loader = torch.utils.data.DataLoader(validset, batch_size=wandb.config.batch_size, shuffle = False)
test_loader = torch.utils.data.DataLoader(testset, batch_size=wandb.config.batch_size,   shuffle = False)

model = vgg16_task2
model.to(wandb.config.device)
# model.apply(init_weights)

# loss_function
loss_function = nn.CrossEntropyLoss()
loss_function.to(wandb.config.device)
# optimizer = torch.optim.Adam(model.parameters(), lr = wandb.config.learning_rate)
optimizer = torch.optim.SGD(model.parameters(), lr =wandb.config.learning_rate, 
                            weight_decay=wandb.config.weight_decay, momentum = 0.9)
schedular = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, eta_min = 0.0001, T_max=10)

In [None]:
wandb.watch(model)

[]

In [None]:
batch_ct_train, batch_ct_valid = 0, 0
train_loss_history, train_acc_history, valid_loss_history, valid_acc_history = train_loop(train_loader, 
                                                                                          valid_loader, 
                                                                                          model, 
                                                                                          optimizer, 
                                                                                          loss_function, 
                                                                                          wandb.config.epochs, 
                                                                                          wandb.config.device,
                                                                                          wandb.config.patience,
                                                                                          wandb.config.early_stopping,
                                                                                          wandb.config.file_model,
                                                                                          wandb.config.save_best_model
                                                                                       )

Validation loss has decreased (inf --> 0.391150). Saving Model...
Epoch : 1 / 2
Time to complete 1 is 0:08:54.779366
Train Loss:  1.1711 | Train Accuracy:  70.2775%
Valid Loss:  0.3912 | Valid Accuracy:  86.7200%

Validation loss has decreased (0.391150 --> 0.313125). Saving model...
Epoch : 2 / 2
Time to complete 2 is 0:08:51.494638
Train Loss:  0.3483 | Train Accuracy:  88.2325%
Valid Loss:  0.3131 | Valid Accuracy:  89.4000%



In [None]:
# Get the prediction and accuracy for the test dataseta
predictions_test, acc_test = get_acc_pred(test_loader, model, wandb.config.device)

print('Test accuracy', acc_test * 100)

wandb.log({'Best_test_Acc': acc_test})

Test accuracy tensor(88.8400, device='cuda:0')


In [None]:
wandb.finish()

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Best_test_Acc,▁
Train Acc :,▁█
Train Batch Acc :,▁▆█
Train Batch Loss :,█▃▁
Train Loss :,█▁
Valid Acc :,▁█
Valid Loss :,█▁
epoch,▁▁▁▁████

0,1
Best_test_Acc,0.8884
Train Acc :,0.88232
Train Batch Acc :,0.91406
Train Batch Loss :,0.24868
Train Loss :,0.34826
Valid Acc :,0.894
Valid Loss :,0.31312
epoch,1.0


In [None]:
# Task 3
vgg16_task3 = torchvision.models.vgg16(pretrained=True)
vgg16_task3.classifier[6].out_features = 10

# freeze convolution weights
for param in vgg16_task3.features[:17].parameters():
    param.requires_grad = False

In [None]:
hyperparameters = SimpleNamespace(
    epochs = 2,
    batch_size=256,
    learning_rate=0.05,
    dataset="CIFAR10",
    architecture="Pretrained-VGG",
    log_interval = 100,
    log_batch = True,
    file_model = model_folder/'pre_vgg_cifar10_task3.pt',
    grad_clipping = True,
    max_norm = 1,
    patience = 5,
    early_stopping = False,
    weight_decay = 0,
    save_best_model = True,
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    )

In [None]:
wandb.init(name = "exp3", project = 'VGG_HW7_Copy' , config = hyperparameters)

In [None]:
wandb.config

{'epochs': 2, 'batch_size': 256, 'learning_rate': 0.05, 'dataset': 'CIFAR10', 'architecture': 'Pretrained-VGG', 'log_interval': 100, 'log_batch': True, 'file_model': '/content/drive/MyDrive/Colab Notebooks/Deep Learning/Datasets/models/cifar10/pre_vgg_cifar10_task3.pt', 'grad_clipping': True, 'max_norm': 1, 'patience': 5, 'early_stopping': False, 'weight_decay': 0, 'save_best_model': True, 'device': 'cuda:0'}

In [None]:
train_loader = torch.utils.data.DataLoader(trainset, batch_size=wandb.config.batch_size, shuffle = True)
valid_loader = torch.utils.data.DataLoader(validset, batch_size=wandb.config.batch_size, shuffle = False)
test_loader = torch.utils.data.DataLoader(testset, batch_size=wandb.config.batch_size,   shuffle = False)

model = vgg16_task3
model.to(wandb.config.device)
# model.apply(init_weights)

# loss_function
loss_function = nn.CrossEntropyLoss()
loss_function.to(wandb.config.device)
# optimizer = torch.optim.Adam(model.parameters(), lr = wandb.config.learning_rate)
optimizer = torch.optim.SGD(model.parameters(), lr =wandb.config.learning_rate, 
                            weight_decay=wandb.config.weight_decay, momentum = 0.9)
# schedular = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, eta_min = 0.0001, T_max=10)
wandb.watch(model)

[]

In [None]:
torch.cuda.empty_cache()
batch_ct_train, batch_ct_valid = 0, 0
train_loss_history, train_acc_history, valid_loss_history, valid_acc_history = train_loop(train_loader, 
                                                                                          valid_loader, 
                                                                                          model, 
                                                                                          optimizer, 
                                                                                          loss_function, 
                                                                                          wandb.config.epochs, 
                                                                                          wandb.config.device,
                                                                                          wandb.config.patience,
                                                                                          wandb.config.early_stopping,
                                                                                          wandb.config.file_model,
                                                                                          wandb.config.save_best_model
                                                                                       )

Validation loss has decreased (inf --> 0.435898). Saving Model...
Epoch : 1 / 2
Time to complete 1 is 0:12:19.291782
Train Loss:  0.4081 | Train Accuracy:  86.2075%
Valid Loss:  0.4359 | Valid Accuracy:  85.1800%

Validation loss has decreased (0.435898 --> 0.267120). Saving model...
Epoch : 2 / 2
Time to complete 2 is 0:12:19.456733
Train Loss:  0.2838 | Train Accuracy:  90.2950%
Valid Loss:  0.2671 | Valid Accuracy:  91.2900%



In [None]:
# Get the prediction and accuracy for the test dataseta
predictions_test, acc_test = get_acc_pred(test_loader, model, wandb.config.device)

print('Test accuracy', acc_test * 100)

wandb.log({'Best_test_Acc': acc_test})

Test accuracy tensor(90.6900, device='cuda:0')


## Best test accuracy is from task 3

In [None]:
wandb.finish()

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Best_test_Acc,▁
Train Acc :,▁█
Train Batch Acc :,▁▇█
Train Batch Loss :,█▄▁
Train Loss :,█▁
Valid Acc :,▁█
Valid Loss :,█▁
epoch,▁▁▁▁████

0,1
Best_test_Acc,0.9069
Train Acc :,0.90295
Train Batch Acc :,0.92578
Train Batch Loss :,0.20842
Train Loss :,0.28384
Valid Acc :,0.9129
Valid Loss :,0.26712
epoch,1.0


**Wandb Link: https://wandb.ai/piyushrs/VGG_HW7_Copy**

In [None]:
resnet = torchvision.models.resnet34(pretrained = True)
resnet.fc.out_features = 10
count_parameters(resnet)

  f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
Downloading: "https://download.pytorch.org/models/resnet34-b627a593.pth" to /root/.cache/torch/hub/checkpoints/resnet34-b627a593.pth


  0%|          | 0.00/83.3M [00:00<?, ?B/s]

21797672

In [None]:
for i in resnet.layer1.parameters():
    i.requires_grad = False
for i in resnet.layer2.parameters():
    i.requires_grad = False
for i in resnet.layer3.parameters():
    i.requires_grad = False
for i in resnet.layer4.parameters():
    i.requires_grad = False
count_parameters(resnet)

522536

In [None]:
hyperparameters = SimpleNamespace(
    epochs = 5,
    batch_size=256,
    learning_rate=0.1,
    dataset="CIFAR10",
    architecture="Pretrained-Resnet",
    log_interval = 100,
    log_batch = True,
    file_model = model_folder/'pre_vgg_cifar10.pt',
    grad_clipping = True,
    max_norm = 1,
    patience = 5,
    early_stopping = False,
    weight_decay = 0,
    save_best_model = True,
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    )

In [None]:
wandb.init(name = "exp1", project = 'Resnet_HW7' , config = hyperparameters)

In [None]:
train_loader = torch.utils.data.DataLoader(trainset, batch_size=wandb.config.batch_size, shuffle = True)
valid_loader = torch.utils.data.DataLoader(validset, batch_size=wandb.config.batch_size, shuffle = False)
test_loader = torch.utils.data.DataLoader(testset, batch_size=wandb.config.batch_size,   shuffle = False)

model = resnet
model.to(wandb.config.device)
# model.apply(init_weights)

# loss_function
loss_function = nn.CrossEntropyLoss()
loss_function.to(wandb.config.device)
# optimizer = torch.optim.Adam(model.parameters(), lr = wandb.config.learning_rate)
optimizer = torch.optim.SGD(model.parameters(), lr =wandb.config.learning_rate, 
                            weight_decay=wandb.config.weight_decay, momentum = 0.9)

scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max = 10, eta_min = 0.01)
wandb.watch(model)

[]

In [None]:
batch_ct_train, batch_ct_valid = 0, 0
train_loss_history, train_acc_history, valid_loss_history, valid_acc_history = train_loop(train_loader, 
                                                                                          valid_loader, 
                                                                                          model, 
                                                                                          optimizer, 
                                                                                          loss_function, 
                                                                                          wandb.config.epochs, 
                                                                                          wandb.config.device,
                                                                                          wandb.config.patience,
                                                                                          wandb.config.early_stopping,
                                                                                          wandb.config.file_model,
                                                                                          wandb.config.save_best_model
                                                                                       )

Validation loss has decreased (inf --> 0.944730). Saving Model...
Epoch : 1 / 5
Time to complete 1 is 0:05:23.497324
Train Loss:  1.3987 | Train Accuracy:  60.5975%
Valid Loss:  0.9447 | Valid Accuracy:  69.0700%

Validation loss has decreased (0.944730 --> 0.859663). Saving model...
Epoch : 2 / 5
Time to complete 2 is 0:05:27.801040
Train Loss:  0.7615 | Train Accuracy:  74.7875%
Valid Loss:  0.8597 | Valid Accuracy:  71.7000%

Validation loss has not decreased (0.859663 --> 0.914757). Not Saving Model...
Epoch : 3 / 5
Time to complete 3 is 0:05:25.501295
Train Loss:  0.7215 | Train Accuracy:  76.2900%
Valid Loss:  0.9148 | Valid Accuracy:  71.6500%

Validation loss has not decreased (0.859663 --> 1.153191). Not Saving Model...
Epoch : 4 / 5
Time to complete 4 is 0:05:25.161420
Train Loss:  0.6627 | Train Accuracy:  78.3000%
Valid Loss:  1.1532 | Valid Accuracy:  67.6200%

Validation loss has decreased (0.859663 --> 0.822112). Saving model...
Epoch : 5 / 5
Time to complete 5 is 0:05:2

In [None]:
wandb.finish()

VBox(children=(Label(value='0.001 MB of 0.010 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.072722…

0,1
Train Acc :,▁▆▇██
Train Batch Acc :,▁▄▅▂▅▂█
Train Batch Loss :,▅▄▃█▂█▁
Train Loss :,█▂▂▁▁
Valid Acc :,▂▅▅▁█
Valid Batch Accuracy :,█▁
Valid Batch Loss :,▁█
Valid Loss :,▄▂▃█▁
epoch,▁▁▁▁▃▃▃▃▅▅▅▅▆▆▆▆████

0,1
Train Acc :,0.7956
Train Batch Acc :,0.84766
Train Batch Loss :,0.53838
Train Loss :,0.63096
Valid Acc :,0.7491
Valid Batch Accuracy :,0.69531
Valid Batch Loss :,0.99346
Valid Loss :,0.82211
epoch,4.0


In [None]:
resnet = torchvision.models.resnet34(pretrained = True)
resnet.fc.out_features = 10
for i in resnet.layer1.parameters():
    i.requires_grad = False
for i in resnet.layer2.parameters():
    i.requires_grad = False
for i in resnet.layer3.parameters():
    i.requires_grad = False
count_parameters(resnet)

13636904

In [None]:
hyperparameters = SimpleNamespace(
    epochs = 5,
    batch_size=256,
    learning_rate=0.1,
    dataset="CIFAR10",
    architecture="Pretrained-Resnet",
    log_interval = 100,
    log_batch = True,
    file_model = model_folder/'pre_vgg_cifar10.pt',
    grad_clipping = True,
    max_norm = 1,
    patience = 5,
    early_stopping = False,
    weight_decay = 0,
    save_best_model = True,
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    )

In [None]:
wandb.init(name = "exp2", project = 'Resnet_HW7' , config = hyperparameters)

[34m[1mwandb[0m: Currently logged in as: [33mpiyushrs[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [None]:
train_loader = torch.utils.data.DataLoader(trainset, batch_size=wandb.config.batch_size, shuffle = True)
valid_loader = torch.utils.data.DataLoader(validset, batch_size=wandb.config.batch_size, shuffle = False)
test_loader = torch.utils.data.DataLoader(testset, batch_size=wandb.config.batch_size,   shuffle = False)

model = resnet
model.to(wandb.config.device)
# model.apply(init_weights)

# loss_function
loss_function = nn.CrossEntropyLoss()
loss_function.to(wandb.config.device)
# optimizer = torch.optim.Adam(model.parameters(), lr = wandb.config.learning_rate)
optimizer = torch.optim.SGD(model.parameters(), lr =wandb.config.learning_rate, 
                            weight_decay=wandb.config.weight_decay, momentum = 0.9)

scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max = 10, eta_min = 0.01)
wandb.watch(model)

[]

In [None]:
torch.cuda.empty_cache()
batch_ct_train, batch_ct_valid = 0, 0
train_loss_history, train_acc_history, valid_loss_history, valid_acc_history = train_loop(train_loader, 
                                                                                          valid_loader, 
                                                                                          model, 
                                                                                          optimizer, 
                                                                                          loss_function, 
                                                                                          wandb.config.epochs, 
                                                                                          wandb.config.device,
                                                                                          wandb.config.patience,
                                                                                          wandb.config.early_stopping,
                                                                                          wandb.config.file_model,
                                                                                          wandb.config.save_best_model
                                                                                       )

Validation loss has decreased (inf --> 0.553969). Saving Model...
Epoch : 1 / 5
Time to complete 1 is 0:05:19.222575
Train Loss:  0.8892 | Train Accuracy:  77.3200%
Valid Loss:  0.5540 | Valid Accuracy:  80.6100%

Validation loss has decreased (0.553969 --> 0.345280). Saving model...
Epoch : 2 / 5
Time to complete 2 is 0:05:14.241381
Train Loss:  0.3412 | Train Accuracy:  88.5600%
Valid Loss:  0.3453 | Valid Accuracy:  88.2700%

Validation loss has not decreased (0.345280 --> 0.349029). Not Saving Model...
Epoch : 3 / 5
Time to complete 3 is 0:05:12.670823
Train Loss:  0.2528 | Train Accuracy:  91.3500%
Valid Loss:  0.3490 | Valid Accuracy:  89.0700%

Validation loss has not decreased (0.345280 --> 0.389586). Not Saving Model...
Epoch : 4 / 5
Time to complete 4 is 0:05:13.121605
Train Loss:  0.2004 | Train Accuracy:  93.2075%
Valid Loss:  0.3896 | Valid Accuracy:  87.9600%

Validation loss has not decreased (0.345280 --> 0.358907). Not Saving Model...
Epoch : 5 / 5
Time to complete 5 i

In [None]:
wandb.finish()

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train Acc :,▁▆▇▇█
Train Batch Acc :,▁▅▅▇██▇
Train Batch Loss :,█▄▄▂▁▂▁
Train Loss :,█▃▂▁▁
Valid Acc :,▁▇█▇█
Valid Batch Accuracy :,█▁
Valid Batch Loss :,▁█
Valid Loss :,█▁▁▂▁
epoch,▁▁▁▁▃▃▃▃▅▅▅▅▆▆▆▆████

0,1
Train Acc :,0.9456
Train Batch Acc :,0.92969
Train Batch Loss :,0.16713
Train Loss :,0.1594
Valid Acc :,0.8899
Valid Batch Accuracy :,0.87891
Valid Batch Loss :,0.3918
Valid Loss :,0.35891
epoch,4.0


In [None]:
resnet = torchvision.models.resnet34(pretrained = True)
resnet.fc.out_features = 10
for i in resnet.layer1.parameters():
    i.requires_grad = False
for i in resnet.layer2.parameters():
    i.requires_grad = False
count_parameters(resnet)

20459304

In [None]:
hyperparameters = SimpleNamespace(
    epochs = 5,
    batch_size=256,
    learning_rate=0.01,
    dataset="CIFAR10",
    architecture="Pretrained-Resnet",
    log_interval = 100,
    log_batch = True,
    file_model = model_folder/'pre_vgg_cifar10.pt',
    grad_clipping = True,
    max_norm = 1,
    patience = 5,
    early_stopping = False,
    weight_decay = 0,
    save_best_model = True,
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    )

In [None]:
wandb.init(name = "exp4", project = 'Resnet_HW7' , config = hyperparameters)

In [None]:
train_loader = torch.utils.data.DataLoader(trainset, batch_size=wandb.config.batch_size, shuffle = True)
valid_loader = torch.utils.data.DataLoader(validset, batch_size=wandb.config.batch_size, shuffle = False)
test_loader = torch.utils.data.DataLoader(testset, batch_size=wandb.config.batch_size,   shuffle = False)

model = resnet
model.to(wandb.config.device)
# model.apply(init_weights)

# loss_function
loss_function = nn.CrossEntropyLoss()
loss_function.to(wandb.config.device)
# optimizer = torch.optim.Adam(model.parameters(), lr = wandb.config.learning_rate)
optimizer = torch.optim.SGD(model.parameters(), lr =wandb.config.learning_rate, 
                            weight_decay=wandb.config.weight_decay, momentum = 0.9)

scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max = 5, eta_min = 0.001)
wandb.watch(model)

[]

In [None]:
torch.cuda.empty_cache()
batch_ct_train, batch_ct_valid = 0, 0
train_loss_history, train_acc_history, valid_loss_history, valid_acc_history = train_loop(train_loader, 
                                                                                          valid_loader, 
                                                                                          model, 
                                                                                          optimizer, 
                                                                                          loss_function, 
                                                                                          wandb.config.epochs, 
                                                                                          wandb.config.device,
                                                                                          wandb.config.patience,
                                                                                          wandb.config.early_stopping,
                                                                                          wandb.config.file_model,
                                                                                          wandb.config.save_best_model
                                                                                       )

Validation loss has decreased (inf --> 0.355662). Saving Model...
Epoch : 1 / 5
Time to complete 1 is 0:05:51.019145
Train Loss:  1.8006 | Train Accuracy:  65.0075%
Valid Loss:  0.3557 | Valid Accuracy:  87.8100%

Validation loss has decreased (0.355662 --> 0.252315). Saving model...
Epoch : 2 / 5
Time to complete 2 is 0:05:43.938371
Train Loss:  0.2547 | Train Accuracy:  91.5775%
Valid Loss:  0.2523 | Valid Accuracy:  91.0000%

Validation loss has decreased (0.252315 --> 0.186996). Saving model...
Epoch : 3 / 5
Time to complete 3 is 0:05:44.897572
Train Loss:  0.1578 | Train Accuracy:  94.7550%
Valid Loss:  0.1870 | Valid Accuracy:  93.6000%

Validation loss has decreased (0.186996 --> 0.171614). Saving model...
Epoch : 4 / 5
Time to complete 4 is 0:05:44.071149
Train Loss:  0.0998 | Train Accuracy:  96.6750%
Valid Loss:  0.1716 | Valid Accuracy:  94.2100%

Validation loss has decreased (0.171614 --> 0.165217). Saving model...
Epoch : 5 / 5
Time to complete 5 is 0:05:44.903250
Train L

In [None]:
wandb.finish()

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train Acc :,▁▇▇██
Train Batch Acc :,▁▆▆▆█▇█
Train Batch Loss :,█▃▃▃▂▂▁
Train Loss :,█▂▁▁▁
Valid Acc :,▁▄▇██
Valid Batch Accuracy :,▁█
Valid Batch Loss :,█▁
Valid Loss :,█▄▂▁▁
epoch,▁▁▁▁▃▃▃▃▅▅▅▅▆▆▆▆████

0,1
Train Acc :,0.9777
Train Batch Acc :,0.98047
Train Batch Loss :,0.04563
Train Loss :,0.06725
Valid Acc :,0.9465
Valid Batch Accuracy :,0.94531
Valid Batch Loss :,0.21814
Valid Loss :,0.16522
epoch,4.0


**Wandb Link: https://wandb.ai/piyushrs/Resnet_HW7**