**Install requirements**

In [0]:

!pip3 install 'torch==1.4.0'
!pip3 install 'torchvision==0.5.0'
!pip3 install 'Pillow-SIMD'
!pip3 install 'tqdm'


**Import libraries**

In [0]:
import os
import logging

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Subset, DataLoader
from torch.backends import cudnn

import torchvision
from torchvision import transforms
from torchvision.models import alexnet

from PIL import Image
from tqdm import tqdm
import matplotlib.pyplot as plt
%matplotlib inline

**Set Arguments**

In [0]:
DEVICE = 'cuda' # 'cuda' or 'cpu'

NUM_CLASSES = 101 # 101 + 1: There is am extra Background class that should be removed 

BATCH_SIZE = 256     # Higher batch sizes allows for larger learning rates. An empirical heuristic suggests that, when changing
                     # the batch size, learning rate should change by the same factor to have comparable results

LR = 3e-2           # The initial Learning Rate
MOMENTUM = 0.9       # Hyperparameter for SGD, keep this at 0.9 when using SGD
WEIGHT_DECAY = 5e-5  # Regularization, you can keep this at the default

NUM_EPOCHS = 30      # Total number of training epochs (iterations over dataset)
STEP_SIZE = 20       # How many epochs before decreasing learning rate (if using a step-down policy)
GAMMA = 0.1          # Multiplicative factor for learning rate step-down

LOG_FREQUENCY = 10

BASE_FILE_PATH = "LR3e-2ADAM"


**Define Data Preprocessing**

In [0]:
# Define transforms for training phase
train_transform = transforms.Compose([transforms.Resize(256),      # Resizes short size of the PIL image to 256
                                      transforms.CenterCrop(224),  # Crops a central square patch of the image
                                                                   # 224 because torchvision's AlexNet needs a 224x224 input!
                                                                   # Remember this when applying different transformations, otherwise you get an error
                                      transforms.ToTensor(), # Turn PIL Image to torch.Tensor
                                      transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) # Normalizes tensor with mean and standard deviation
])
# Define transforms for the evaluation phase
eval_transform = transforms.Compose([transforms.Resize(256),
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor(),
                                      transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))                                    
])

**Prepare Dataset**

In [24]:
# Clone github repository with data
if not os.path.isdir('./Caltech101'):
  !git clone https://github.com/lore-lml/Caltech101-ML-HW2.git
  !mv 'Caltech101-ML-HW2' 'Caltech101'

DATA_DIR = 'Caltech101/101_ObjectCategories'
from Caltech101.caltech_dataset import Caltech
from Caltech101.pytorchtools import EarlyStopping
from torch.utils.data import random_split
from sklearn.model_selection import train_test_split
import numpy as np

# Prepare Pytorch train/test Datasets
trainVal_dataset = Caltech(DATA_DIR, split='train',  transform=train_transform)
test_dataset = Caltech(DATA_DIR, split='test', transform=eval_transform)

"""train_len = int(train_dataset.__len__() / 2)
val_len = train_dataset.__len__() - train_len
train_dataset, val_dataset = random_split(train_dataset, [train_len, val_len])"""

y_true = trainVal_dataset.get_labels()
train_idx, val_idx = trainVal_dataset.get_train_val_idxs()

train_dataset = Subset(trainVal_dataset, train_idx)
val_dataset = Subset(trainVal_dataset, val_idx)

# Check dataset sizes
print(f"# classes train_set: {len(set(y_true[train_idx]))}")
print(f"# classes val_set: {len(set(y_true[val_idx]))}")
print(f"# classes test_set: {len(set(test_dataset.get_labels()))}")
print('Train Dataset: {}'.format(len(train_dataset)))
print('Valid Dataset: {}'.format(len(val_dataset)))
print('Test Dataset: {}'.format(len(test_dataset)))


# classes train_set: 101
# classes val_set: 101
# classes test_set: 101
Train Dataset: 2892
Valid Dataset: 2892
Test Dataset: 2893


**Prepare Dataloaders**

In [0]:
# Dataloaders iterate over pytorch datasets and transparently provide useful functions (e.g. parallelization and shuffling)
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

**Prepare Network**

In [0]:
net = alexnet() # Loading AlexNet model

# AlexNet has 1000 output neurons, corresponding to the 1000 ImageNet's classes
# We need 101 outputs for Caltech-101
net.classifier[6] = nn.Linear(4096, NUM_CLASSES) # nn.Linear in pytorch is a fully connected layer
                                                 # The convolutional layer is nn.Conv2d

# We just changed the last layer of AlexNet with a new fully connected layer with 101 outputs
# It is strongly suggested to study torchvision.models.alexnet source code

**Prepare Training**

In [0]:
# Define loss function
criterion = nn.CrossEntropyLoss() # for classification, we use Cross Entropy

# Choose parameters to optimize
# To access a different set of parameters, you have to access submodules of AlexNet
# (nn.Module objects, like AlexNet, implement the Composite Pattern)
# e.g.: parameters of the fully connected layers: net.classifier.parameters()
# e.g.: parameters of the convolutional layers: look at alexnet's source code ;) 
parameters_to_optimize = net.parameters() # In this case we optimize over all the parameters of AlexNet

# Define optimizer
# An optimizer updates the weights based on loss
# We use SGD with momentum
optimizer = optim.SGD(parameters_to_optimize, lr=LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
# optimizer = optim.Adam(parameters_to_optimize, lr=LR, weight_decay=WEIGHT_DECAY)

# Define scheduler
# A scheduler dynamically changes learning rate
# The most common schedule is the step(-down), which multiplies learning rate by gamma every STEP_SIZE epochs
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=STEP_SIZE, gamma=GAMMA)

In [0]:
def evaluate(data_loader, model, is_validation=False):
  model.train(False) # Set Network to evaluation mode
  losses = []
  cumulative_loss = .0
  running_corrects = 0
  for images, labels in tqdm(data_loader):
    images = images.to(DEVICE)
    labels = labels.to(DEVICE)

    # Forward Pass
    outputs = model(images)

    # Loss
    if is_validation:
      loss = criterion(outputs, labels)
      cumulative_loss += loss.item()

    # Get predictions
    _, preds = torch.max(outputs.data, 1)

    # Update Corrects
    running_corrects += torch.sum(preds == labels.data).data.item()

  # Calculate Accuracy
  accuracy = running_corrects / float(len(val_dataset))

  print('Validation Accuracy: {}'.format(accuracy))
  
  return accuracy, cumulative_loss / len(data_loader)

**Train**

In [29]:
# List to store every loss of each step.
train_losses = []
val_losses = []
accuracies = []
loss_min = -1
# By default, everything is loaded to cpu
net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda

cudnn.benchmark # Calling this optimizes runtime

current_step = 0
# Start iterating over the epochs
for epoch in range(NUM_EPOCHS):
  print('Starting epoch {}/{}, LR = {}'.format(epoch+1, NUM_EPOCHS, scheduler.get_lr()))

  cumulative_loss = .0
  # Iterate over the dataset
  for images, labels in train_dataloader:
  
    # Bring data over the device of choice
    images = images.to(DEVICE)
    labels = labels.to(DEVICE)

    net.train() # Sets module in training mode

    # PyTorch, by default, accumulates gradients after each backward pass
    # We need to manually set the gradients to zero before starting a new iteration
    optimizer.zero_grad() # Zero-ing the gradients

    # Forward pass to the network
    outputs = net(images)

    # Compute loss based on output and ground truth
    loss = criterion(outputs, labels)
    cumulative_loss += loss.item()

    # Log loss
    if current_step % LOG_FREQUENCY == 0:
      print('Step {}, Loss_train {}'.format(current_step, loss.item()))

    # Compute gradients for each layer and update weights
    loss.backward()  # backward pass: computes gradients
    optimizer.step() # update weights based on accumulated gradients

    current_step += 1

  # Evaluate Validation set and get accuracy and losses for the current epoch
  curr_accuracy, curr_val_loss = evaluate(val_dataloader, net, is_validation=True)
  print('Step {}, Loss_val {}'.format(current_step, curr_val_loss))
  # Avg of cumulative loss
  train_losses.append(cumulative_loss / len(train_dataloader))
  val_losses.append(curr_val_loss)
  accuracies.append(curr_accuracy)
  # If the accuracy reached an higher value than before, save model state
  if loss_min == -1 or loss_min > curr_val_loss:
    loss_min = curr_val_loss
    torch.save(net, f"{BASE_FILE_PATH}_best_model.pth")
  # Step the scheduler
  scheduler.step() 

print("\n************ END OF TRAINING ***************")

Starting epoch 1/30, LR = [0.03]




Step 0, Loss_train 4.615142345428467
Step 10, Loss_train 61471.31640625


100%|██████████| 12/12 [00:07<00:00,  1.59it/s]


Validation Accuracy: 0.005878284923928077
Step 11, Loss_val 299362.5286458333
Starting epoch 2/30, LR = [0.03]
Step 20, Loss_train 110148.296875


100%|██████████| 12/12 [00:07<00:00,  1.61it/s]


Validation Accuracy: 0.004495159059474412
Step 22, Loss_val 15010.596272786459
Starting epoch 3/30, LR = [0.03]
Step 30, Loss_train 3861.4423828125


100%|██████████| 12/12 [00:07<00:00,  1.62it/s]


Validation Accuracy: 0.009681881051175657
Step 33, Loss_val 4.748828490575154
Starting epoch 4/30, LR = [0.03]
Step 40, Loss_train 10824.6474609375


100%|██████████| 12/12 [00:07<00:00,  1.56it/s]


Validation Accuracy: 0.009681881051175657
Step 44, Loss_val 4.707338293393453
Starting epoch 5/30, LR = [0.03]
Step 50, Loss_train 727.183349609375


100%|██████████| 12/12 [00:07<00:00,  1.62it/s]


Validation Accuracy: 0.01313969571230982
Step 55, Loss_val 4.638735334078471
Starting epoch 6/30, LR = [0.03]
Step 60, Loss_train 891.0399780273438


100%|██████████| 12/12 [00:07<00:00,  1.61it/s]


Validation Accuracy: 0.023167358229598894
Step 66, Loss_val 4.570297837257385
Starting epoch 7/30, LR = [0.03]
Step 70, Loss_train 375.8700256347656


100%|██████████| 12/12 [00:07<00:00,  1.62it/s]


Validation Accuracy: 0.023167358229598894
Step 77, Loss_val 4.50904377301534
Starting epoch 8/30, LR = [0.03]
Step 80, Loss_train 3751.5869140625


100%|██████████| 12/12 [00:07<00:00,  1.61it/s]


Validation Accuracy: 0.023167358229598894
Step 88, Loss_val 4.455222407976787
Starting epoch 9/30, LR = [0.03]
Step 90, Loss_train 31.401046752929688


100%|██████████| 12/12 [00:07<00:00,  1.60it/s]


Validation Accuracy: 0.023167358229598894
Step 99, Loss_val 4.406830708185832
Starting epoch 10/30, LR = [0.03]
Step 100, Loss_train 4.432236671447754


KeyboardInterrupt: ignored

In [0]:
def save_results(file_name):
  with open(file_name, "w", encoding="utf-8") as f:
    f.write("train_loss,val_loss,accuracy\n")
    for tl,vl,accuracy in zip(train_losses, val_losses, accuracies):
      f.write(f"{tl},{vl},{accuracy}\n")

save_results(f"{BASE_FILE_PATH}_data.csv")
print("*************** DATA SAVED *************")

In [0]:

epochs = np.arange(1, NUM_EPOCHS+1)
plt.figure()
# plt.scatter(epochs, train_losses, c="darkorange", s=20)
plt.plot(epochs, train_losses, zorder=-1)
plt.plot(epochs, val_losses)
plt.xlabel('Epoch')
plt.ylabel('Train Loss')

plt.figure()
plt.scatter(epochs, accuracies, c="darkorange", s=20)
plt.plot(epochs, accuracies, zorder=-1)
plt.xlabel('Epoch')
plt.ylabel('Accuracy')

plt.show()

**Test**

In [0]:
best_model = torch.load(f"{BASE_FILE_PATH}_best_model.pth").to(DEVICE)
best_model.eval()
accuracy,_ = evaluate(test_dataloader, best_model, is_validation=False)

with open(f"{BASE_FILE_PATH}_accuracy.txt", "w", encoding="utf-8") as f:
  f.write(f"LR = {LR}\nAccuracy on test = {accuracy}\nLoss_min = {loss_min}")
