**Import libraries**

In [None]:
import os

import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from torch.backends import cudnn
from torch.utils.data import Subset, DataLoader
from torchvision import transforms
from torchvision.models import alexnet
from torchvision.models import resnet34
from tqdm import tqdm
%matplotlib inline

**Set Arguments**

In [None]:
DEVICE = 'cuda' # 'cuda' or 'cpu'

NUM_CLASSES = 101 # 101 + 1: There is an extra Background class that should be removed 

BATCH_SIZE = 256     # Higher batch sizes allows for larger learning rates. An empirical heuristic suggests that, when changing
                     # the batch size, learning rate should change by the same factor to have comparable results

LR = 5e-3         # The initial Learning Rate
MOMENTUM = 0.9       # Hyperparameter for SGD, keep this at 0.9 when using SGD
WEIGHT_DECAY = 5e-5  # Regularization, you can keep this at the default

NUM_EPOCHS = 15      # Total number of training epochs (iterations over dataset)
STEP_SIZE = 10       # How many epochs before decreasing learning rate (if using a step-down policy)
GAMMA = 0.1          # Multiplicative factor for learning rate step-down

LOG_FREQUENCY = 5

BASE_FILE_PATH = "DA_RUN18_LR3e-5_ADAMW_EP15_SS15_G01_ALL_TRANSF"


**Define Data Preprocessing**

In [None]:
# Define transforms for training phase
train_transform = transforms.Compose([transforms.Resize(256),      # Resizes short size of the PIL image to 256
                                      transforms.CenterCrop(224),  # Crops a central square patch of the image
                                                                   # 224 because torchvision's AlexNet needs a 224x224 input!
                                                                   # Remember this when applying different transformations, otherwise you get an error
                                      transforms.ToTensor(), # Turn PIL Image to torch.Tensor
                                      transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) # Normalizes tensor with mean and standard deviation
])
"""
# DATA AUG 1
train_transform = transforms.Compose([
                                      transforms.RandomApply([transforms.ColorJitter()], p=0.3),
                                      transforms.RandomApply([transforms.Grayscale(3)], p=0.3),
                                      transforms.Resize(256),
                                      transforms.RandomCrop(224),
                                      transforms.ToTensor(),
                                      transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])
"""
#DATA AUG 2
"""
train_transform = transforms.Compose([
                                      transforms.RandomChoice([
                                                               transforms.RandomHorizontalFlip(p=.5),
                                                               transforms.RandomRotation(45)
                                                               ]),
                                      transforms.Resize(256),
                                      transforms.RandomCrop(224),
                                      transforms.ToTensor(),
                                      transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])
"""

# Define transforms for the evaluation phase
eval_transform = transforms.Compose([transforms.Resize(256),
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor(),
                                      transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))                                    
])

**Prepare Dataset**

In [None]:
# Clone github repository with data
if not os.path.isdir('./Caltech101'):
  !git clone https://github.com/lore-lml/machine-learning2020-hw2.git
  !mv 'machine-learning2020-hw2/Caltech101' '.'
  !rm -rf 'machine-learning2020-hw2'

DATA_DIR = 'Caltech101/101_ObjectCategories'
from Caltech101.caltech_dataset import Caltech
import numpy as np

# Prepare Pytorch train/test Datasets
trainVal_dataset = Caltech(DATA_DIR, src='train',  transform=train_transform, eval_transform=eval_transform)
test_dataset = Caltech(DATA_DIR, src='test', transform=eval_transform)

"""train_len = int(train_dataset.__len__() / 2)
val_len = train_dataset.__len__() - train_len
train_dataset, val_dataset = random_split(train_dataset, [train_len, val_len])"""

train_dataset, val_dataset = trainVal_dataset.get_train_validation_set()

# Check dataset sizes
print(f"# classes train_set: {len(set(train_dataset.get_labels()))}")
print(f"# classes val_set: {len(set(val_dataset.get_labels()))}")
print(f"# classes test_set: {len(set(test_dataset.get_labels()))}")
print('Train Dataset: {}'.format(len(train_dataset)))
print('Valid Dataset: {}'.format(len(val_dataset)))
print('Test Dataset: {}'.format(len(test_dataset)))


**Prepare Dataloaders**

In [None]:
# Dataloaders iterate over pytorch datasets and transparently provide useful functions (e.g. parallelization and shuffling)
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

**Prepare Network**

In [None]:
def get_alexnet(pretrained=False):
  net = alexnet(pretrained=pretrained)
  # AlexNet has 1000 output neurons, corresponding to the 1000 ImageNet's classes
  # We need 101 outputs for Caltech-101
  # nn.Linear in pytorch is a fully connected layer
  # The convolutional layer is nn.Conv2d
  
  net.classifier[6] = nn.Linear(4096, NUM_CLASSES)
  return net

def get_resnet(pretrained=False):
    net = resnet34(pretrained=pretrained)
    net.fc = nn.Linear(net.fc.in_features, NUM_CLASSES)
    return net

def loss_optmizer_scheduler(model):
  
  # Define loss function
  criterion = nn.CrossEntropyLoss() # for classification, we use Cross Entropy

  # Choose parameters to optimize
  # To access a different set of parameters, you have to access submodules of AlexNet
  # (nn.Module objects, like AlexNet, implement the Composite Pattern)
  # e.g.: parameters of the fully connected layers: net.classifier.parameters()
  # e.g.: parameters of the convolutional layers: look at alexnet's source code ;) 
  parameters_to_optimize = model.parameters() # In this case we optimize over all the parameters of AlexNet

  # Define optimizer
  # An optimizer updates the weights based on loss
  # We use SGD with momentum

  optimizer = optim.SGD(parameters_to_optimize, lr=LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
  #optimizer = optim.Adam(parameters_to_optimize, lr=LR,amsgrad=True)
  #optimizer = optim.AdamW(parameters_to_optimize, lr=LR,amsgrad=True, weight_decay=WEIGHT_DECAY)

  # Define scheduler
  # A scheduler dynamically changes learning rate
  # The most common schedule is the step(-down), which multiplies learning rate by gamma every STEP_SIZE epochs
  scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=STEP_SIZE, gamma=GAMMA)

  return criterion, optimizer, scheduler

**Prepare Training**

In [None]:
def validate(net, val_loader, criterion, device=DEVICE):
    net.eval()
    cumulative_loss =.0
    running_corrects = 0
    for images, labels in val_loader:
        images = images.to(device)
        labels = labels.to(device)      

        outputs = net(images)
        
        _, preds = torch.max(outputs.data, 1)
        running_corrects += torch.sum(preds == labels.data).data.item()
        
        loss = criterion(outputs, labels)
        cumulative_loss += loss.item()


    return cumulative_loss / len(val_loader), running_corrects / float(len(val_dataset))

def test(net, test_loader, device=DEVICE):

    net = net.eval()
    running_corrects = 0
    for images, labels in tqdm(test_loader):
        images = images.to(device)
        labels = labels.to(device)
        
        net.eval()
        outputs = net(images)
        _, preds = torch.max(outputs.data, 1)
        running_corrects += torch.sum(preds == labels.data).data.item()
   
    return running_corrects / float(len(test_dataset))


def train_batch(net, train_loader, criterion, optimizer, current_step, device=DEVICE):
    net.train()
    cumulative_loss =.0
    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = net(images)
        
        loss = criterion(outputs, labels)
        cumulative_loss += loss.item()
        
        if current_step != 0 and current_step % LOG_FREQUENCY == 0:
                print('\t\tTrain step - Step {}, Loss {}'.format(current_step, loss.item()))

        loss.backward()
        optimizer.step()
        current_step += 1

    return cumulative_loss / len(train_loader), current_step

In [None]:
def train_model(net, criterion, optmizer, scheduler, max_epoch=NUM_EPOCHS, device=DEVICE, file_path=BASE_FILE_PATH):
    import time, math
    net = net.to(device)
    tolerance = 3
    
    train_losses = []
    val_losses = []
    val_accuracies = []

    min_val_loss = -1
    current_step = 0
    start_time = time.time()
    for epoch in range(max_epoch):
        print(f"STARTING EPOCH {epoch+1} - LR={scheduler.get_last_lr()}...")
        curr_result = train_batch(net, train_dataloader, criterion, optimizer, current_step, device)
        curr_train_loss = curr_result[0]
        current_step = curr_result[1]
        
        train_losses.append(curr_train_loss)
        scheduler.step()
        
        curr_val_loss, curr_val_accuracy = validate(net, val_dataloader, criterion, device)
        val_losses.append(curr_val_loss)
        val_accuracies.append(curr_val_accuracy)
        
        print(f"\tRESULT EPOCH {epoch+1}:")
        print(f"\t\tTrain Loss: {curr_train_loss}")
        print(f"\t\tVal Loss: {curr_val_loss} - Val Accuracy: {curr_val_accuracy}\n")
        
        if math.isnan(curr_val_loss):
            tolerance -= 1
        else:
            tolerance = 10
        
        if tolerance == 0:
            print(f"EARLY STOPPING\n")
            break
        
        if min_val_loss == -1 or min_val_loss > curr_val_loss:
            min_val_loss = curr_val_loss
            torch.save(net, f"{file_path}_best_model_finetuning.pth")

    net = torch.load(f"{file_path}_best_model_finetuning.pth").to(device)
    test_acc = test(net, test_dataloader, device)
    print(f"TEST ACCURACY: {test_acc}")

    total_time = int(time.time() - start_time)
    min = int(total_time / 60)
    sec = total_time % 60
    print(f"\nTotal time: {min} min {sec} sec\n")
        
    return train_losses, val_losses, val_accuracies, test_acc


**Train**

In [None]:
net = get_alexnet(pretrained=True)
criterion, optimizer, scheduler = loss_optmizer_scheduler(net)

train_losses_scratch, val_losses_scratch, \
accuracies_scratch, test_scratch_acc = train_model(net, criterion, optimizer, 
                                                   scheduler, file_path=BASE_FILE_PATH)

In [None]:
def save_results(file_name, train_losses, val_losses, accuracies):
  with open(file_name, "w", encoding="utf-8") as f:
    f.write("train_loss,val_loss,accuracy\n")
    for tl,vl,accuracy in zip(train_losses, val_losses, accuracies):
      f.write(f"{tl},{vl},{accuracy}\n")

save_results(f"{BASE_FILE_PATH}_data.csv", train_losses_scratch, val_losses_scratch, accuracies_scratch)
print("*************** DATA SAVED *************")

In [None]:

epochs = np.arange(1, NUM_EPOCHS+1)
plt.figure()
# plt.scatter(epochs, train_losses, c="darkorange", s=20)
plt.plot(epochs, train_losses_scratch, label="train")
plt.plot(epochs, val_losses_scratch, label="val")
plt.legend(loc="upper right")
plt.xlabel('Epoch')
plt.ylabel('Loss')

plt.figure()
plt.scatter(epochs, accuracies_scratch, c="darkorange", s=20)
plt.plot(epochs, accuracies_scratch, zorder=-1)
plt.xlabel('Epoch')
plt.ylabel('Accuracy')

plt.show()

**Test**

In [None]:
with open(f"{BASE_FILE_PATH}_final_result.txt", "w", encoding="utf-8") as f:
  f.write("*************** TRAINING FROM SCRATCH ***************\n")
  f.write(f"LR = {LR}\nAccuracy on test = {test_scratch_acc}\nLoss_min = {min(val_losses_scratch)}\n\n")


**freezing layers**

In [None]:
def is_layers_unfreezed(model):
  for param in model.parameters():
    print(param.requires_grad)

In [None]:
net_conv_freezed = get_alexnet(True)
# Freeze Conv Layers
conv_layers = net_conv_freezed.features.parameters()

for par in conv_layers:
  par.requires_grad = False

criterion, optimizer, scheduler = loss_optmizer_scheduler(net_conv_freezed)
is_layers_unfreezed(net_conv_freezed)
path = f"{BASE_FILE_PATH}_freezing_conv"


In [None]:
train_losses_conv_freezed, val_losses_conv_freezed, \
accuracies_conv_freezed, test_acc_conv_freezed = train_model(net_conv_freezed, criterion, optimizer, 
                                                   scheduler, file_path=BASE_FILE_PATH)

In [None]:
epochs = np.arange(1, NUM_EPOCHS+1)
plt.figure()
plt.plot(epochs, train_losses_conv_freezed, label="train")
plt.plot(epochs, val_losses_conv_freezed, label="val")
plt.legend(loc="upper right")
plt.xlabel('Epoch')
plt.ylabel('Loss')

plt.figure()
plt.scatter(epochs, accuracies_conv_freezed, c="darkorange", s=20)
plt.plot(epochs, accuracies_conv_freezed, zorder=-1)
plt.xlabel('Epoch')
plt.ylabel('Accuracy')

plt.show()

In [None]:
save_results(f"{path}_data.csv", train_losses_conv_freezed, val_losses_conv_freezed, accuracies_conv_freezed)

with open(f"{BASE_FILE_PATH}_final_result.txt", "a", encoding="utf-8") as f:
  f.write("*************** TRAINING ONLY FULL ***************\n")
  f.write(f"LR = {LR}\nAccuracy on test = {test_acc_conv_freezed}\nLoss_min = {min(val_losses_conv_freezed)}\n\n")

print("*************** DATA SAVED *************")

**Freezing fc**

In [None]:
net_fc_freezed = get_alexnet(True)
# Freeze Conv Layers
fc = net_fc_freezed.classifier.parameters()

for par in fc:
  par.requires_grad = False

criterion, optimizer, scheduler = loss_optmizer_scheduler(net_fc_freezed)
is_layers_unfreezed(net_fc_freezed)
path = f"{BASE_FILE_PATH}_freezing_fc"

In [None]:
train_losses_fc_freezed, val_losses_fc_freezed, \
accuracies_fc_freezed, test_acc_fc_freezed = train_model(net_fc_freezed, criterion, optimizer, 
                                                   scheduler, file_path=BASE_FILE_PATH)

In [None]:
epochs = np.arange(1, NUM_EPOCHS+1)
plt.figure()
plt.plot(epochs, train_losses_fc_freezed, label="train")
plt.plot(epochs, val_losses_fc_freezed, label="val")
plt.legend(loc="upper right")
plt.xlabel('Epoch')
plt.ylabel('Loss')

plt.figure()
plt.scatter(epochs, accuracies_fc_freezed, c="darkorange", s=20)
plt.plot(epochs, accuracies_fc_freezed, zorder=-1)
plt.xlabel('Epoch')
plt.ylabel('Accuracy')

plt.show()

In [None]:
save_results(f"{path}_data.csv", train_losses_fc_freezed, val_losses_fc_freezed, accuracies_fc_freezed)

with open(f"{BASE_FILE_PATH}_final_result.txt", "a", encoding="utf-8") as f:
  f.write("*************** TRAINING ONLY FULL ***************\n")
  f.write(f"LR = {LR}\nAccuracy on test = {test_acc_fc_freezed}\nLoss_min = {min(val_losses_fc_freezed)}\n\n")

print("*************** DATA SAVED *************")

# Graph losses and accuracies over unfreezed and only_full nets

In [None]:
epochs = np.arange(1, NUM_EPOCHS+1)
plt.figure()
plt.plot(epochs, val_losses_scratch, label="unfreezed")
plt.plot(epochs, val_losses_conv_freezed, label="only linear")
plt.plot(epochs, val_losses_fc_freezed, label="only conv")
plt.legend(loc="upper right")
plt.xlabel('Epoch')
plt.ylabel('Loss')

plt.figure()
plt.plot(epochs, accuracies_scratch, label="unfreezed")
plt.plot(epochs, accuracies_conv_freezed, label="conv freezed")
plt.plot(epochs, accuracies_fc_freezed, label="fc freezed")
plt.legend(loc="lower right")
plt.xlabel('Epoch')
plt.ylabel('Accuracy')

plt.show()