**Install Requirements**

In [12]:
"""!pip3 install 'torch==1.4.0'
!pip3 install 'torchvision==0.5.0'
!pip3 install 'Pillow-SIMD'
!pip3 install 'tqdm'"""

"!pip3 install 'torch==1.4.0'\n!pip3 install 'torchvision==0.5.0'\n!pip3 install 'Pillow-SIMD'\n!pip3 install 'tqdm'"

In [0]:
import os
if not os.path.isdir('./Pacs'):
  !git clone https://github.com/lore-lml/machine-learning2020-hw3.git
  !mv 'machine-learning2020-hw3' 'Pacs'
  !rm './Pacs/hw3.ipynb'
  !rm './Pacs/README.md'

import logging

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Subset, DataLoader
from torch.backends import cudnn

import torchvision
from torchvision import transforms
from Pacs.pacs_dataset import Pacs
from Pacs.dann import alexdann
from Pacs.dann import train_src, test_target, dann_train_src_target

from PIL import Image
from tqdm import tqdm

import matplotlib.pyplot as plt
%matplotlib inline

**Set Arguments**

In [0]:
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

NUM_CLASSES = 7

BATCH_SIZE = 128     # Higher batch sizes allows for larger learning rates. An empirical heuristic suggests that, when changing
                     # the batch size, learning rate should change by the same factor to have comparable results

LR = 1e-3        # The initial Learning Rate
MOMENTUM = 0.9       # Hyperparameter for SGD, keep this at 0.9 when using SGD
WEIGHT_DECAY = 5e-5  # Regularization, you can keep this at the default

NUM_EPOCHS = 10      # Total number of training epochs (iterations over dataset)
STEP_SIZE = 3       # How many epochs before decreasing learning rate (if using a step-down policy)
GAMMA = 0.5          # Multiplicative factor for learning rate step-down

ALPHA = 'dynamic'
BASE_FILE_PATH = "RUN_4_LR1e-3_DynAlpha_SGD_SS2_GAMMA03"

**Define Data Preprocessing**

In [0]:
transforms = transforms.Compose([transforms.Resize(256),      # Resizes short size of the PIL image to 256
                                      transforms.CenterCrop(224),  # Crops a central square patch of the image
                                                                   # 224 because torchvision's AlexNet needs a 224x224 input!
                                                                   # Remember this when applying different transformations, otherwise you get an error
                                      transforms.ToTensor(), # Turn PIL Image to torch.Tensor
                                      transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) # Normalizes tensor with mean and standard deviation
])

**Prepare Dataset**

In [16]:
ROOT = 'Pacs/PACS'

source_data = Pacs(ROOT, transform=transforms, source='photo')
target_data = Pacs(ROOT, transform=transforms, source='art_painting')

_, source_labels = source_data.get_img_with_labels()
_, target_labels = target_data.get_img_with_labels()

print(f"# classes source_data: {len(set(source_labels))}")
print(f"# classes val_set: {len(set(target_labels))}")
print(f"source_data: {len(source_data)} elements")
print(f"target_data: {len(target_data)} elements")

# classes source_data: 7
# classes val_set: 7
source_data: 1670 elements
target_data: 2048 elements


**Prepare Dataloaders**

In [0]:
source_dataloader = DataLoader(source_data, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
target_dataloader = DataLoader(target_data, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
test_dataloader = DataLoader(target_data, batch_size=BATCH_SIZE, shuffle=False, num_workers=4, drop_last=False)

**Prepare Network**

In [18]:
def init_cnn_objects(model):
  
  # Define loss function
  criterion_1 = nn.CrossEntropyLoss() # for classification, we use Cross Entropy
  criterion_2 = nn.CrossEntropyLoss()
  parameters_to_optimize = model.parameters() # In this case we optimize over all the parameters of AlexNet
  
  optimizer = optim.SGD(parameters_to_optimize, lr=LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
  #optimizer = optim.Adam(parameters_to_optimize, lr=LR,amsgrad=True)
  #optimizer = optim.AdamW(parameters_to_optimize, lr=LR,amsgrad=True, weight_decay=WEIGHT_DECAY)
  scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=STEP_SIZE, gamma=GAMMA)

  return criterion_1, criterion_2, optimizer, scheduler

dann = alexdann(pretrained=True)
criterion, _, optimizer, scheduler = init_cnn_objects(dann)
print("******* NET CREATED *******")

******* NET CREATED *******


**Training**

In [0]:
def simple_train_test(model, source_dataloader, test_dataloader, file_path=BASE_FILE_PATH):
    train_losses = []
    loss_min = -1
    
    model = model.to(DEVICE)
    cudnn.benchmark
    
    current_step = 0
    for epoch in range(NUM_EPOCHS):
        print('Starting epoch {}/{}, LR = {}'.format(epoch+1, NUM_EPOCHS, scheduler.get_lr()))
        cumulative_loss, current_step = train_src(model, source_dataloader, optimizer, criterion, current_step, DEVICE)
        curr_loss = cumulative_loss / len(source_dataloader)
        train_losses.append(curr_loss)
        if loss_min == -1 or loss_min > curr_loss:
            loss_min = curr_loss
            torch.save(model, f"{file_path}_best_model.pth")
        scheduler.step()
        
    model = torch.load(f"{file_path}_best_model.pth").to(DEVICE)
    accuracy = test_target(model, test_dataloader, criterion, DEVICE) / float(len(target_data))
    print(f"Accuracy on test set: {accuracy}%")
    return train_losses

In [20]:
train_losses = simple_train_test(dann, source_dataloader, test_dataloader)

Starting epoch 1/10, LR = [0.001]




Step 0, Loss_train 1.980535626411438
Step 10, Loss_train 0.3002309501171112
Starting epoch 2/10, LR = [0.001]
Step 20, Loss_train 0.16946443915367126
Starting epoch 3/10, LR = [0.001]
Step 30, Loss_train 0.09739388525485992
Starting epoch 4/10, LR = [0.00025]
Step 40, Loss_train 0.07032044231891632
Step 50, Loss_train 0.07836572825908661
Starting epoch 5/10, LR = [0.0005]
Step 60, Loss_train 0.05495484545826912
Starting epoch 6/10, LR = [0.0005]
Step 70, Loss_train 0.05710098147392273
Starting epoch 7/10, LR = [0.000125]
Step 80, Loss_train 0.04965071752667427
Step 90, Loss_train 0.05204298719763756
Starting epoch 8/10, LR = [0.00025]
Step 100, Loss_train 0.028516365215182304
Starting epoch 9/10, LR = [0.00025]
Step 110, Loss_train 0.03686026483774185
Starting epoch 10/10, LR = [6.25e-05]
Step 120, Loss_train 0.06164136528968811


100%|██████████| 16/16 [00:04<00:00,  3.84it/s]

Accuracy on test set: 0.478515625%





**Training with DANN**

In [0]:
import math
def dann_train_test(model, source_dataloader, target_dataloader, test_dataloader, 
                    class_criterion, domain_criterion, file_path=BASE_FILE_PATH):
    class_losses_y = []
    domain_losses_d = []
    accuracies = []
    loss_min = -1
    accuracy_max = 0
    count_diverge = 0
    
    model = model.to(DEVICE)
    cudnn.benchmark
    
    current_step = 0
    for epoch in range(NUM_EPOCHS):
        print('Starting epoch {}/{}, LR = {}'.format(epoch+1, NUM_EPOCHS, scheduler.get_lr()))
        class_loss, domain_loss, current_step = dann_train_src_target(model, source_dataloader, 
                                          target_dataloader, optimizer, class_criterion, domain_criterion, current_step,
                                          epoch, NUM_EPOCHS, alpha=ALPHA, device=DEVICE)
        
        if math.isnan(class_loss) or math.isnan(domain_loss):
          count_diverge += 1
          if count_diverge >= 3:
            print("EARLY STOPPING")
            break;
        class_losses_y.append(class_loss)
        domain_losses_d.append(domain_loss)

        """if loss_min == -1 or loss_min > class_loss:
            loss_min = class_loss
            torch.save(model, f"{file_path}_best_model_dann.pth")"""
        accuracy = test_target(model, test_dataloader, criterion, DEVICE) / float(len(target_data))
        accuracies.append(accuracy)
        print(f"Accuracy on test set: {accuracy}%")
        if accuracy_max < accuracy:
          accuracy_max = accuracy
          torch.save(model, f"{file_path}_best_model_dann.pth")
        scheduler.step()
        
    return class_losses_y, domain_losses_d, accuracies

In [22]:
dann = alexdann(pretrained=True)
class_criterion, domain_criterion, optimizer, scheduler = init_cnn_objects(dann)
class_losses_y, domain_losses_d, accuracies = dann_train_test(dann, source_dataloader, 
                          target_dataloader, test_dataloader, class_criterion, domain_criterion)

print(f"Best Accuracy on test set: {max(accuracies)}%")

Starting epoch 1/10, LR = [0.001]




Step 0
Class Loss 2.2343971729278564, Domain Loss 4.629210472106934
Step 10
Class Loss 0.3250335454940796, Domain Loss 0.5741926431655884


100%|██████████| 16/16 [00:04<00:00,  3.88it/s]


Accuracy on test set: 0.5009765625%
Starting epoch 2/10, LR = [0.001]
Step 20
Class Loss 0.1578495055437088, Domain Loss 0.2912323474884033


100%|██████████| 16/16 [00:04<00:00,  3.91it/s]

Accuracy on test set: 0.4912109375%
Starting epoch 3/10, LR = [0.001]





Step 30
Class Loss 0.1834297776222229, Domain Loss 0.47317901253700256


100%|██████████| 16/16 [00:04<00:00,  3.91it/s]

Accuracy on test set: 0.4970703125%
Starting epoch 4/10, LR = [0.00025]





Step 40
Class Loss 0.15741756558418274, Domain Loss 0.6116553544998169
Step 50
Class Loss 0.11580789089202881, Domain Loss 0.58474200963974


100%|██████████| 16/16 [00:04<00:00,  3.97it/s]

Accuracy on test set: 0.470703125%
Starting epoch 5/10, LR = [0.0005]





Step 60
Class Loss 0.1122283935546875, Domain Loss 0.611627995967865


100%|██████████| 16/16 [00:04<00:00,  3.92it/s]

Accuracy on test set: 0.46044921875%
Starting epoch 6/10, LR = [0.0005]





Step 70
Class Loss 0.2260771244764328, Domain Loss 0.8443145751953125


100%|██████████| 16/16 [00:04<00:00,  3.94it/s]

Accuracy on test set: 0.40380859375%
Starting epoch 7/10, LR = [0.000125]





Step 80
Class Loss 0.4436452388763428, Domain Loss 1.48305082321167
Step 90
Class Loss 11.512096405029297, Domain Loss 58.236915588378906


100%|██████████| 16/16 [00:04<00:00,  3.97it/s]

Accuracy on test set: 0.09814453125%
Starting epoch 8/10, LR = [0.00025]





Step 100
Class Loss 2.0667271614074707, Domain Loss 1.3512712717056274


100%|██████████| 16/16 [00:04<00:00,  3.90it/s]

Accuracy on test set: 0.2451171875%
Starting epoch 9/10, LR = [0.00025]





Step 110
Class Loss 3.6764869689941406, Domain Loss 6.883295059204102


100%|██████████| 16/16 [00:04<00:00,  3.92it/s]

Accuracy on test set: 0.13916015625%
Starting epoch 10/10, LR = [6.25e-05]





Step 120
Class Loss 4.162588119506836, Domain Loss 3.1556973457336426


100%|██████████| 16/16 [00:04<00:00,  3.93it/s]

Accuracy on test set: 0.1943359375%
Best Accuracy on test set: 0.5009765625%



