
**Install requirements**

In [22]:
!pip3 install 'torch==1.3.1'
!pip3 install 'torchvision==0.4.2'
!pip3 install 'Pillow-SIMD'
!pip3 install 'tqdm'



**Import libraries**

In [0]:
import os
import logging

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Subset, DataLoader
from torch.backends import cudnn

import torchvision
from torchvision import transforms
from torchvision.models import alexnet

from PIL import Image
from tqdm import tqdm

import torch.hub
from torch.hub import load_state_dict_from_url
from torch.autograd import Function

import copy

**Set Arguments**

In [0]:
DEVICE = 'cuda' # 'cuda' or 'cpu'

NUM_CLASSES = 7 # 101 + 1: There is am extra Background class that should be removed 

BATCH_SIZE = 256     # Higher batch sizes allows for larger learning rates. An empirical heuristic suggests that, when changing
                     # the batch size, learning rate should change by the same factor to have comparable results

LR = 1e-3            # The initial Learning Rate
MOMENTUM = 0.9       # Hyperparameter for SGD, keep this at 0.9 when using SGD
WEIGHT_DECAY = 5e-5  # Regularization, you can keep this at the default

GAMMA = 0.1          # Multiplicative factor for learning rate step-down

LOG_FREQUENCY = 5

**Define Data Preprocessing**

In [0]:
# Define transforms for training phase
transf = transforms.Compose([transforms.Resize(256),      # Resizes short size of the PIL image to 256
                                      transforms.CenterCrop(224),  # Crops a central square patch of the image
                                                                   # 224 because torchvision's AlexNet needs a 224x224 input!
                                                                   # Remember this when applying different transformations, otherwise you get an error
                                      transforms.ToTensor(), # Turn PIL Image to torch.Tensor
                                      transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) # Normalizes tensor with mean and standard deviation
])


**Prepare Dataset**

In [26]:
# Clone github repository with data
if not os.path.isdir('./Homework3-PACS'):
  !git clone https://github.com/MachineLearning2020/Homework3-PACS

PHOTO_DIR = 'Homework3-PACS/PACS/photo'
ART_DIR = 'Homework3-PACS/PACS/art_painting'
CARTOON_DIR = 'Homework3-PACS/PACS/cartoon'
SKETCH_DIR = 'Homework3-PACS/PACS/sketch'

# Prepare Pytorch train/test Datasets
photo_dataset = torchvision.datasets.ImageFolder(PHOTO_DIR, transform=transf)
art_dataset = torchvision.datasets.ImageFolder(ART_DIR, transform=transf)
cartoon_dataset = torchvision.datasets.ImageFolder(CARTOON_DIR, transform=transf)
sketch_dataset = torchvision.datasets.ImageFolder(SKETCH_DIR, transform=transf)

# Check dataset sizes
print('Photo Dataset: {}'.format(len(photo_dataset)))
print('Art Dataset: {}'.format(len(art_dataset)))
print('Cartoon Dataset: {}'.format(len(cartoon_dataset)))
print('Sketch Dataset: {}'.format(len(sketch_dataset)))

Photo Dataset: 1670
Art Dataset: 2048
Cartoon Dataset: 2344
Sketch Dataset: 3929


**Prepare Dataloaders**

In [0]:
# Dataloaders iterate over pytorch datasets and transparently provide useful functions (e.g. parallelization and shuffling)
photo_dataloader = DataLoader(photo_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
art_dataloader = DataLoader(art_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
cartoon_dataloader = DataLoader(cartoon_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
sketch_dataloader = DataLoader(sketch_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)

**Prepare Network**

In [0]:
class ReverseLayerF(Function):
    # Forwards identity
    # Sends backward reversed gradients
    @staticmethod
    def forward(ctx, x, alpha):
        ctx.alpha = alpha

        return x.view_as(x)

    @staticmethod
    def backward(ctx, grad_output):
        output = grad_output.neg() * ctx.alpha

        return output, None

class DANN(nn.Module):

    def __init__(self, num_classes=1000):
        super(DANN, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, 1000),
        )
        self.GD = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, 2),
        )


    def forward(self, x, alpha=None):
        features = self.features(x)
        # Flatten the features:
        features = features.view(features.size(0), -1)
        # If we pass alpha, we can assume we are training the discriminator
        if alpha is not None:
            # gradient reversal layer (backward gradients will be reversed)
            reverse_feature = ReverseLayerF.apply(features, alpha)
            discriminator_output = self.GD(reverse_feature)
            return discriminator_output
        # If we don't pass alpha, we assume we are training with supervision
        else:
            class_outputs = self.classifier(features)
            return class_outputs


def dann(pretrained=False, progress=True, **kwargs):
    r"""AlexNet model architecture from the
    `"One weird trick..." <https://arxiv.org/abs/1404.5997>`_ paper.
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    model = DANN(**kwargs)
    if pretrained:
        state_dict = load_state_dict_from_url('https://download.pytorch.org/models/alexnet-owt-4df8aa71.pth',
                                              progress=progress)        
        model.load_state_dict(state_dict, strict=False)

        #model.GD[1] = model.classifier[1].state_dict())
        #model.GD[4].load_state_dict(model.classifier[4].state_dict())
        
        model.classifier[6] = nn.Linear(4096, 7)
        model.GD[1].weight.data = copy.deepcopy(model.classifier[1].weight.data)
        model.GD[1].bias.data = copy.deepcopy(model.classifier[1].bias.data)
        model.GD[4].weight.data = copy.deepcopy(model.classifier[4].weight.data)
        model.GD[4].bias.data = copy.deepcopy(model.classifier[4].bias.data)
    return model


**Prepare training**

In [64]:
cudnn.benchmark # Calling this optimizes runtime

# Hyperparameters for grid search
NUM_EPOCHS = 15      # Total number of training epochs (iterations over dataset)
STEP_SIZE = 10      # How many epochs before decreasing learning rate (if using a step-down policy)
ALPHA = 0.8

transfer_set = 'sketch' # can be 'cartoon' or 'sketch'
DA_ENABLED = True

source_dataloader = photo_dataloader
target_dataloader = -1
test_dataloader = art_dataloader

net = dann(pretrained=True).to(DEVICE) # Loading model            
criterion = nn.CrossEntropyLoss()
parameters_to_optimize = net.parameters() # In this case we optimize over all the parameters of AlexNet
optimizer = optim.SGD(parameters_to_optimize, lr=LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=STEP_SIZE, gamma=GAMMA)

if transfer_set == 'cartoon':
  target_dataloader = cartoon_dataloader
elif transfer_set == 'sketch':
  target_dataloader = sketch_dataloader

nr_batches = min(len(source_dataloader), len(target_dataloader))
print("Number of batches: %d" % nr_batches)
            

Number of batches: 6


**Train**

In [65]:
# Start iterating over the epochs
for epoch in range(NUM_EPOCHS):
  
  net.train(True)

  print('\nStarting epoch {}/{}, LR = {}  ALPHA = [{}] TARGET DATASET = [{}]'.format(epoch+1, 
                                NUM_EPOCHS, scheduler.get_lr(), ALPHA, transfer_set))
          
  # Iterate over the dataset
  i = 0
  while i < nr_batches:          
    
    optimizer.zero_grad() # Zero-ing the gradients

    # Load source batch
    source_images, source_labels = next(iter(source_dataloader))
    source_images = source_images.to(DEVICE)
    source_labels = source_labels.to(DEVICE)          

    # Load target batch
    target_images, target_labels = next(iter(target_dataloader))
    target_images = target_images.to(DEVICE)          

    # STEP 1: train the classifier
    outputs = net(source_images)          
    loss_class = criterion(outputs, source_labels)  
    if i % LOG_FREQUENCY == 0:
      print('Step {}, Loss Classifier {}'.format(i+1, loss_class.item()))                
    loss_class.backward()  # backward pass: computes gradients

    if DA_ENABLED:
      # STEP 2: train the discriminator: forward SOURCE data to Gd          
      outputs = net.forward(source_images, alpha=ALPHA)           
      labels_discr_source = torch.zeros(BATCH_SIZE, dtype=torch.int64).to(DEVICE) # source's label is 0
      loss_discr_source = criterion(outputs, labels_discr_source)           
      if i % LOG_FREQUENCY == 0:
        print('Step {}, Loss Distriminator Source {}'.format(i+1, loss_discr_source.item()))
      loss_discr_source.backward()

      # STEP 3: train the discriminator: forward TARGET to Gd          
      outputs = net.forward(target_images, alpha=ALPHA)           
      labels_discr_target = torch.ones(BATCH_SIZE, dtype=torch.int64).to(DEVICE) # target's label is 1
      loss_discr_target = criterion(outputs, labels_discr_target)       
      if i % LOG_FREQUENCY == 0:
        print('Step {}, Loss Discriminator Target {}'.format(i+1, loss_discr_target.item()))
      loss_discr_target.backward()    #update gradients 

    optimizer.step() # update weights based on accumulated gradients          
    i += 1

  # Step the scheduler
  scheduler.step() 
#print("Loss classifier")
#print(loss_class_list)
#print("\nLoss discriminator source")
#print(loss_source_list)
#print("\nLoss discriminator target")
#print(loss_target_list)

# now train is finished, evaluate the model on the target dataset 
net.train(False) # Set Network to evaluation mode
  
running_corrects = 0
for images, labels in target_dataloader:
  images = images.to(DEVICE)
  labels = labels.to(DEVICE)
  
  outputs = net(images)
  _, preds = torch.max(outputs.data, 1)
  running_corrects += torch.sum(preds == labels.data).data.item()

# Calculate Accuracy
accuracy = running_corrects / float( len(target_dataloader)*(target_dataloader.batch_size) )
print("Accuracy: %f (%d / %d )" % (accuracy, running_corrects, len(target_dataloader)*(target_dataloader.batch_size)))


Starting epoch 1/15, LR = [0.001]  ALPHA = [0.8] TARGET DATASET = [sketch]
Step 1, Loss Classifier 2.2314772605895996
Step 1, Loss Distriminator Source 0.5516728162765503
Step 1, Loss Discriminator Target 1.2292377948760986
Step 6, Loss Classifier 0.8010777831077576
Step 6, Loss Distriminator Source 0.1531529277563095
Step 6, Loss Discriminator Target 8.23289155960083e-07

Starting epoch 2/15, LR = [0.001]  ALPHA = [0.8] TARGET DATASET = [sketch]
Step 1, Loss Classifier 0.723768949508667
Step 1, Loss Distriminator Source 0.12396044284105301
Step 1, Loss Discriminator Target 7.264316082000732e-08
Step 6, Loss Classifier 0.308158814907074
Step 6, Loss Distriminator Source 0.23632672429084778
Step 6, Loss Discriminator Target 0.0

Starting epoch 3/15, LR = [0.001]  ALPHA = [0.8] TARGET DATASET = [sketch]
Step 1, Loss Classifier 0.298658162355423
Step 1, Loss Distriminator Source 0.17480769753456116
Step 1, Loss Discriminator Target 1.6763806343078613e-08
Step 6, Loss Classifier 0.3388418

**Test**

In [66]:
net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda
net.train(False) # Set Network to evaluation mode

running_corrects = 0
for images, labels in tqdm(test_dataloader):
  images = images.to(DEVICE)
  labels = labels.to(DEVICE)

  # Forward Pass
  outputs = net(images)

  # Get predictions
  _, preds = torch.max(outputs.data, 1)

  # Update Corrects
  running_corrects += torch.sum(preds == labels.data).data.item()

# Calculate Accuracy
accuracy = running_corrects / float(len(art_dataset))

print('\nTest Accuracy: {} ({} / {})'.format(accuracy, running_corrects, len(art_dataset)))

100%|██████████| 8/8 [00:05<00:00,  1.80it/s]


Test Accuracy: 0.18505859375 (379 / 2048)



