
**Install requirements**

In [None]:
!pip3 install 'torch==1.3.1'
!pip3 install 'torchvision==0.5.0'
!pip3 install 'Pillow-SIMD'
!pip3 install 'tqdm'

**Import libraries**

In [None]:
import os
import logging

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Subset, DataLoader
from torch.backends import cudnn

import torchvision
from torchvision import transforms
from torchvision.models import alexnet

from PIL import Image
from tqdm import tqdm

**Set Arguments**

In [None]:
DEVICE = 'cuda' # 'cuda' or 'cpu'

NUM_CLASSES = 7 

BATCH_SIZE = 256     # Higher batch sizes allows for larger learning rates. An empirical heuristic suggests that, when changing
                     # the batch size, learning rate should change by the same factor to have comparable results

LR = 1e-3            # The initial Learning Rate
MOMENTUM = 0.9       # Hyperparameter for SGD, keep this at 0.9 when using SGD
WEIGHT_DECAY = 5e-5  # Regularization, you can keep this at the default

NUM_EPOCHS = 30      # Total number of training epochs (iterations over dataset)
STEP_SIZE = 20       # How many epochs before decreasing learning rate (if using a step-down policy)
GAMMA = 0.1          # Multiplicative factor for learning rate step-down

LOG_FREQUENCY = 10

**Define Data Preprocessing**

In [None]:
# Define transforms for training phase
train_transform = transforms.Compose([transforms.Resize(256),      # Resizes short size of the PIL image to 256
                                      transforms.CenterCrop(224),  # Crops a central square patch of the image
                                                                   # 224 because torchvision's AlexNet needs a 224x224 input!
                                                                   # Remember this when applying different transformations, otherwise you get an error
                                      transforms.ToTensor(), # Turn PIL Image to torch.Tensor
                                      transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) # Normalizes tensor with mean and standard deviation
])
# Define transforms for the evaluation phase
eval_transform = transforms.Compose([transforms.Resize(256),
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor(),
                                      transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))                                 
])

**Prepare Dataset**

In [None]:
from torchvision.datasets import ImageFolder

# Clone github repository 
if not os.path.isdir('./Homework3-PACS'):
  !git clone https://github.com/MachineLearning2020/Homework3-PACS.git

DATA_DIR = 'Homework3-PACS/PACS'

# Read each domain with Imagefolder
P = ImageFolder(DATA_DIR+"/photo", transform=train_transform) 
A = ImageFolder(DATA_DIR+"/art_painting", transform=eval_transform) 
C = ImageFolder(DATA_DIR+"/cartoon", transform=train_transform) 
S = ImageFolder(DATA_DIR+"/sketch", transform=train_transform) 


**Prepare Dataloaders**

In [None]:
# Dataloaders iterate over pytorch datasets and transparently provide useful functions (e.g. parallelization and shuffling)
source_dataloader = DataLoader(P, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
target_dataloader = DataLoader(A, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
test_dataloader = DataLoader(A, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

**Implementing the model**

In [None]:
#from the sourecode
import torch
import torch.nn as nn
from torch.hub import load_state_dict_from_url
from torch.autograd import Function

__all__ = ['AlexNet', 'alexnet']


model_urls = {
    'alexnet': 'https://download.pytorch.org/models/alexnet-owt-4df8aa71.pth',
}

class ReverseLayerF(Function):
    # Forwards identity
    # Sends backward reversed gradients
    @staticmethod
    def forward(ctx, x, alpha):
        ctx.alpha = alpha

        return x.view_as(x)

    @staticmethod
    def backward(ctx, grad_output):
        output = grad_output.neg() * ctx.alpha

        return output, None

class AlexNet(nn.Module):

    def __init__(self, num_classes=1000):
        super(AlexNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes),
        )
        #add a new branch
        self.gd_classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, 2), #only 2 classes
        )

    def forward(self,x,alpha=None):
      features = self.features(x)
      # If we pass alpha, we can assume we are training the discriminator
      if alpha is not None:
          # gradient reversal layer (backward gradients will be reversed)
          features = features.view(-1, 256 * 6 * 6) 
          reverse_features = ReverseLayerF.apply(features, alpha)
          discriminator_output = torch.flatten(reverse_features,1)
          discriminator_output = self.gd_classifier(discriminator_output)
          return discriminator_output
      # If we don't pass alpha, we assume we are training with supervision
      else:
        class_outputs = self.avgpool(features)
        class_outputs = torch.flatten(class_outputs,1)
        class_outputs = self.classifier(class_outputs)
        return class_outputs


def dann(pretrained=False, progress=True, **kwargs):
    r"""AlexNet model architecture from the
    `"One weird trick..." <https://arxiv.org/abs/1404.5997>`_ paper.
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    model = AlexNet(**kwargs)
    if pretrained:
        state_dict = load_state_dict_from_url(model_urls['alexnet'],
                                              progress=progress)
        model.load_state_dict(state_dict,strict=False)
        #copy the weights
        model.gd_classifier[1].weight.data = model.classifier[1].weight.data
        model.gd_classifier[1].bias.data = model.classifier[1].bias.data
    return model

**Prepare Network**

In [None]:
alex_net = torchvision.models.alexnet(pretrained=True) # Loading AlexNet model
dann_net = dann(pretrained=True) #Loading DANN
# AlexNet has 1000 output neurons, corresponding to the 1000 ImageNet's classes
# We need 101 outputs for Caltech-101
alex_net.classifier[6] = nn.Linear(4096, NUM_CLASSES) # nn.Linear in pytorch is a fully connected layer
                                                 # The convolutional layer is nn.Conv2d
dann_net.classifier[6] = nn.Linear(4096, NUM_CLASSES)
# We just changed the last layer of AlexNet with a new fully connected layer with 101 outputs
# It is mandatory to study torchvision.models.alexnet source code

**Prepare Training**

In [None]:
# Define loss function
criterion = nn.CrossEntropyLoss() # for classification, we use Cross Entropy

# Choose parameters to optimize
# To access a different set of parameters, you have to access submodules of AlexNet
# (nn.Module objects, like AlexNet, implement the Composite Pattern)
# e.g.: parameters of the fully connected layers: net.classifier.parameters()
# e.g.: parameters of the convolutional layers: look at alexnet's source code ;) 
parameters_to_optimize = alex_net.parameters() # In this case we optimize over all the parameters of AlexNet
parameters_to_optimize_dann = dann_net.parameters()
# Define optimizer
# An optimizer updates the weights based on loss
# We use SGD with momentum
optimizer = optim.SGD(parameters_to_optimize, lr=LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
optimizer_dann = optim.SGD(parameters_to_optimize_dann, lr=LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
# Define scheduler
# A scheduler dynamically changes learning rate
# The most common schedule is the step(-down), which multiplies learning rate by gamma every STEP_SIZE epochs
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=STEP_SIZE, gamma=GAMMA)
scheduler_dann = optim.lr_scheduler.StepLR(optimizer_dann, step_size=STEP_SIZE, gamma=GAMMA)

**Training without DANN**

In [None]:
#training without dann
net = alex_net.to(DEVICE)

cudnn.benchmark 

current_step = 0
for epoch in range(NUM_EPOCHS):
  print('Starting epoch {}/{}, LR = {}'.format(epoch+1, NUM_EPOCHS, scheduler.get_lr()))

  # Iterate over the dataset
  for images, labels in source_dataloader:
    # Bring data over the device of choice
    images = images.to(DEVICE)
    labels = labels.to(DEVICE)

    net.train() # Sets module in training mode

    # PyTorch, by default, accumulates gradients after each backward pass
    # We need to manually set the gradients to zero before starting a new iteration
    optimizer.zero_grad() # Zero-ing the gradients

    # Forward pass to the network
    outputs = net(images)

    # Compute loss based on output and ground truth
    loss = criterion(outputs, labels)

    # Log loss
    if current_step % LOG_FREQUENCY == 0:
      print('Step {}, Loss {}'.format(current_step, loss.item()))

    # Compute gradients for each layer and update weights
    loss.backward()  # backward pass: computes gradients
    optimizer.step() # update weights based on accumulated gradients

    current_step += 1

  # Step the scheduler
  scheduler.step() 

**Test**

In [None]:
net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda
net.train(False) # Set Network to evaluation mode

running_corrects = 0
for images, labels in tqdm(test_dataloader):
  images = images.to(DEVICE)
  labels = labels.to(DEVICE)

  # Forward Pass
  outputs = net(images)

  # Get predictions
  _, preds = torch.max(outputs.data, 1)

  # Update Corrects
  running_corrects += torch.sum(preds == labels.data).data.item()

# Calculate Accuracy
accuracy = running_corrects / float(len(A))

print('Test Accuracy: {}'.format(accuracy))

**Training with DANN**

In [None]:
#Training with DANN
ALPHA = 0.04
# By default, everything is loaded to cpu
net = dann_net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda

cudnn.benchmark # Calling this optimizes runtime

current_step = 0
# Start iterating over the epochs
for epoch in range(NUM_EPOCHS):
  print('Starting epoch {}/{}, LR = {}'.format(epoch+1, NUM_EPOCHS, scheduler_dann.get_lr()))

  # Iterate over the dataset
  for  s_img in tqdm(source_dataloader):
    
    t_img = next(iter(target_dataloader))
    s_images, s_labels = s_img
    t_images, t_labels = t_img

    #create tensors of ones e zeros
    zeros = torch.zeros(s_labels.shape, dtype=torch.long)
    ones = torch.ones(t_labels.shape,dtype=torch.long)

    s_images = s_images.to(DEVICE)
    t_images = t_images.to(DEVICE)
    s_labels = s_labels.to(DEVICE)
    zeros = zeros.to(DEVICE)
    ones = ones.to(DEVICE)

    net.train()

    optimizer_dann.zero_grad() 

    #Step 1
    outputs = net(s_images)
    loss1 = criterion(outputs,s_labels)
    loss1.backward()

    #Step 2
    outputs = net(s_images,ALPHA)
    loss2 = criterion(outputs,zeros) #the label is zero for all data
    loss2.backward()

    #Step 3
    outputs = net(t_images,ALPHA)
    loss3 = criterion(outputs,ones) #the label is one for all data
    loss3.backward()

    # Log loss
    if current_step % LOG_FREQUENCY == 0:
      print('Step {}, Loss1 {}, Loss2 {}, Loss3 {}'.format(current_step, loss1.item(),loss2.item(),loss3.item()))

    
    optimizer_dann.step() 
    current_step += 1

  # Step the scheduler
  scheduler_dann.step() 

**Test**

In [None]:
net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda
net.train(False) # Set Network to evaluation mode

running_corrects = 0
for images, labels in tqdm(test_dataloader):
  images = images.to(DEVICE)
  labels = labels.to(DEVICE)

  # Forward Pass
  outputs = net(images)

  # Get predictions
  _, preds = torch.max(outputs.data, 1)

  # Update Corrects
  running_corrects += torch.sum(preds == labels.data).data.item()

# Calculate Accuracy
accuracy = running_corrects / float(len(A))

print('Test Accuracy: {}'.format(accuracy))

**Cross Domain Validation Without Dann**

In [None]:
lrs = [0.001,0.01,0.02,0.1]
batches = [256,512]
best_accuracy = 0
NUM_EPOCHS = 10 #30 epochs takes too much 

for LR in lrs:
  for BATCH_SIZE in batches: 
    print(f"{LR}{BATCH_SIZE}\n")
    source_dataloader = DataLoader(P, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
    target_dataloader = DataLoader(A, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
    val_C_dataloader = DataLoader(C, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)
    val_S_dataloader = DataLoader(S, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)
    alex_net = torchvision.models.alexnet(pretrained=True)
    alex_net.classifier[6] = nn.Linear(4096, NUM_CLASSES)
    criterion = nn.CrossEntropyLoss() 
    parameters_to_optimize = alex_net.parameters() 
    optimizer = optim.SGD(parameters_to_optimize, lr=LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=STEP_SIZE, gamma=GAMMA)

    #training without dann
    net = alex_net.to(DEVICE)

    cudnn.benchmark 

    current_step = 0
    for epoch in range(NUM_EPOCHS):
      print('Starting epoch {}/{}, LR = {}'.format(epoch+1, NUM_EPOCHS, scheduler.get_lr()))

      # Iterate over the dataset
      for images, labels in source_dataloader:
        # Bring data over the device of choice
        images = images.to(DEVICE)
        labels = labels.to(DEVICE)

        net.train() # Sets module in training mode

        # PyTorch, by default, accumulates gradients after each backward pass
        # We need to manually set the gradients to zero before starting a new iteration
        optimizer.zero_grad() # Zero-ing the gradients

        # Forward pass to the network
        outputs = net(images)

        # Compute loss based on output and ground truth
        loss = criterion(outputs, labels)

        # Log loss
        if current_step % LOG_FREQUENCY == 0:
          print('Step {}, Loss {}'.format(current_step, loss.item()))

        # Compute gradients for each layer and update weights
        loss.backward()  # backward pass: computes gradients
        optimizer.step() # update weights based on accumulated gradients

        current_step += 1

      # Step the scheduler
      scheduler.step() 

    #validation on Cartoon
    net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda
    net.train(False) # Set Network to evaluation mode

    running_corrects = 0
    for images, labels in tqdm(val_C_dataloader):
      images = images.to(DEVICE)
      labels = labels.to(DEVICE)

      # Forward Pass
      outputs = net(images)

      # Get predictions
      _, preds = torch.max(outputs.data, 1)

      # Update Corrects
      running_corrects += torch.sum(preds == labels.data).data.item()

    # Calculate Accuracy
    accuracy_cartoon = running_corrects / float(len(C))

    print('\nTest Accuracy Cartoon: {}'.format(accuracy_cartoon))

    #validation on Sketch
    net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda
    net.train(False) # Set Network to evaluation mode

    running_corrects = 0
    for images, labels in tqdm(val_S_dataloader):
      images = images.to(DEVICE)
      labels = labels.to(DEVICE)

      # Forward Pass
      outputs = net(images)

      # Get predictions
      _, preds = torch.max(outputs.data, 1)

      # Update Corrects
      running_corrects += torch.sum(preds == labels.data).data.item()

    # Calculate Accuracy
    accuracy_sketch = running_corrects / float(len(S))

    print('\nTest Accuracy Sketch: {}'.format(accuracy_sketch))

    accuracy_avg = (accuracy_sketch + accuracy_cartoon)/2
    if accuracy_avg > best_accuracy:
      best_accuracy = accuracy_avg
      best_params = [LR,BATCH_SIZE]



In [None]:
best_params

In [None]:
#implements again point 3A with the best hyperparameters and test on art and painting
NUM_EPOCHS = 30
source_dataloader = DataLoader(P, batch_size=best_params[1], shuffle=True, num_workers=4, drop_last=True)
target_dataloader = DataLoader(A, batch_size=best_params[1], shuffle=False, num_workers=4)
alex_net = torchvision.models.alexnet(pretrained=True)
alex_net.classifier[6] = nn.Linear(4096, NUM_CLASSES)
criterion = nn.CrossEntropyLoss() 
parameters_to_optimize = alex_net.parameters() 
optimizer = optim.SGD(parameters_to_optimize, lr=best_params[0], momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=STEP_SIZE, gamma=GAMMA)

#training without dann
net = alex_net.to(DEVICE)

cudnn.benchmark 

current_step = 0
for epoch in range(NUM_EPOCHS):
  print('Starting epoch {}/{}, LR = {}'.format(epoch+1, NUM_EPOCHS, scheduler.get_lr()))

  # Iterate over the dataset
  for images, labels in source_dataloader:
    # Bring data over the device of choice
    images = images.to(DEVICE)
    labels = labels.to(DEVICE)

    net.train() # Sets module in training mode

    # PyTorch, by default, accumulates gradients after each backward pass
    # We need to manually set the gradients to zero before starting a new iteration
    optimizer.zero_grad() # Zero-ing the gradients

    # Forward pass to the network
    outputs = net(images)

    # Compute loss based on output and ground truth
    loss = criterion(outputs, labels)

    # Log loss
    if current_step % LOG_FREQUENCY == 0:
      print('Step {}, Loss {}'.format(current_step, loss.item()))

    # Compute gradients for each layer and update weights
    loss.backward()  # backward pass: computes gradients
    optimizer.step() # update weights based on accumulated gradients

    current_step += 1

  # Step the scheduler
  scheduler.step() 

#test
net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda
net.train(False) # Set Network to evaluation mode

running_corrects = 0
for images, labels in tqdm(target_dataloader):
  images = images.to(DEVICE)
  labels = labels.to(DEVICE)

  # Forward Pass
  outputs = net(images)

  # Get predictions
  _, preds = torch.max(outputs.data, 1)

  # Update Corrects
  running_corrects += torch.sum(preds == labels.data).data.item()

# Calculate Accuracy
accuracy = running_corrects / float(len(A))

print('\nTest Accuracy: {}'.format(accuracy))

**Cross Domain Validation with DANN**

In [None]:
alpha = [0.01,0.03,0.05,0.1]
lrs = [0.001,0.01,0.02,0.1]
batches = [256,512]
best_accuracy = 0
NUM_EPOCHS = 10 #30 epochs takes too much 

for ALPHA in alpha:
  for LR in lrs:
    for BATCH_SIZE in batches:
      print(f"{LR},{BATCH_SIZE},{ALPHA}\n")
    
      #Train photo and cartoon
     
      source_dataloader = DataLoader(P, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
      target_dataloader = DataLoader(C, batch_size=BATCH_SIZE, shuffle=False, num_workers=4,drop_last=True))
      test_dataloader = DataLoader(C, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)
        
      dann_net = dann(pretrained=True)
      dann_net.classifier[6] = nn.Linear(4096, NUM_CLASSES)
      criterion = nn.CrossEntropyLoss() 
      parameters_to_optimize_dann = dann_net.parameters() 
      optimizer_dann = optim.SGD(parameters_to_optimize_dann, lr=LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
      scheduler_dann = optim.lr_scheduler.StepLR(optimizer_dann, step_size=STEP_SIZE, gamma=GAMMA)
        
      net = dann_net.to(DEVICE)

      cudnn.benchmark 

      current_step = 0
      for epoch in range(NUM_EPOCHS):
        print('Starting epoch {}/{}, LR = {}'.format(epoch+1, NUM_EPOCHS, scheduler_dann.get_lr()))

        # Iterate over the dataset
        
        for  s_img in source_dataloader:
            
          t_img = next(iter(target_dataloader))
          s_images, s_labels = s_img
          t_images, t_labels = t_img

          #create tensors of ones e zeros
          zeros = torch.zeros(s_labels.shape, dtype=torch.long)
          ones = torch.ones(t_labels.shape,dtype=torch.long)

          s_images = s_images.to(DEVICE)
          t_images = t_images.to(DEVICE)
          s_labels = s_labels.to(DEVICE)
          zeros = zeros.to(DEVICE)
          ones = ones.to(DEVICE)

          net.train()

          optimizer_dann.zero_grad() 

          #Step 1
          outputs = net(s_images)
          loss1 = criterion(outputs,s_labels)
          loss1.backward()

          #Step 2
          outputs = net(s_images,ALPHA)
          loss2 = criterion(outputs,zeros) #the label is zero for all data
          loss2.backward()

          #Step 3
          outputs = net(t_images,ALPHA)
          loss3 = criterion(outputs,ones) #the label is one for all data
          loss3.backward()

          # Log loss
          if current_step % LOG_FREQUENCY == 0:
            print('Step {}, Loss1 {}, Loss2 {}, Loss3 {}'.format(current_step, loss1.item(),loss2.item(),loss3.item()))

          
          optimizer_dann.step() 
          current_step += 1

        # Step the scheduler
        scheduler_dann.step()

      #test on Cartoon
      net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda
      net.train(False) # Set Network to evaluation mode

      running_corrects = 0
      for images, labels in tqdm(test_dataloader):
        images = images.to(DEVICE)
        labels = labels.to(DEVICE)

        # Forward Pass
        outputs = net(images)

        # Get predictions
        _, preds = torch.max(outputs.data, 1)

        # Update Corrects
        running_corrects += torch.sum(preds == labels.data).data.item()

      # Calculate Accuracy
      accuracy_cartoon = running_corrects / float(len(C))

      print('\nTest Accuracy Cartoon: {}'.format(accuracy_cartoon))
      
      #Train photo and Sketch
     
      source_dataloader = DataLoader(P, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
      target_dataloader = DataLoader(S, batch_size=BATCH_SIZE, shuffle=False, num_workers=4,drop_last=True))
      test_dataloader = DataLoader(S, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)
        
      dann_net = dann(pretrained=True)
      dann_net.classifier[6] = nn.Linear(4096, NUM_CLASSES)
      criterion = nn.CrossEntropyLoss() 
      parameters_to_optimize_dann = dann_net.parameters() 
      optimizer_dann = optim.SGD(parameters_to_optimize_dann, lr=LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
      scheduler_dann = optim.lr_scheduler.StepLR(optimizer_dann, step_size=STEP_SIZE, gamma=GAMMA)
        
      net = dann_net.to(DEVICE)

      cudnn.benchmark 

      current_step = 0
      for epoch in range(NUM_EPOCHS):
        print('Starting epoch {}/{}, LR = {}'.format(epoch+1, NUM_EPOCHS, scheduler_dann.get_lr()))

        # Iterate over the dataset
        
        for  s_img in source_dataloader:
            
          t_img = next(iter(target_dataloader))
          s_images, s_labels = s_img
          t_images, t_labels = t_img

          #create tensors of ones e zeros
          zeros = torch.zeros(s_labels.shape, dtype=torch.long)
          ones = torch.ones(t_labels.shape,dtype=torch.long)

          s_images = s_images.to(DEVICE)
          t_images = t_images.to(DEVICE)
          s_labels = s_labels.to(DEVICE)
          zeros = zeros.to(DEVICE)
          ones = ones.to(DEVICE)

          net.train()

          optimizer_dann.zero_grad() 

          #Step 1
          outputs = net(s_images)
          loss1 = criterion(outputs,s_labels)
          loss1.backward()

          #Step 2
          outputs = net(s_images,ALPHA)
          loss2 = criterion(outputs,zeros) #the label is zero for all data
          loss2.backward()

          #Step 3
          outputs = net(t_images,ALPHA)
          loss3 = criterion(outputs,ones) #the label is one for all data
          loss3.backward()

          # Log loss
          if current_step % LOG_FREQUENCY == 0:
            print('Step {}, Loss1 {}, Loss2 {}, Loss3 {}'.format(current_step, loss1.item(),loss2.item(),loss3.item()))

          
          optimizer_dann.step() 
          current_step += 1

        # Step the scheduler
        scheduler_dann.step()

      #test on Sketch
      net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda
      net.train(False) # Set Network to evaluation mode

      running_corrects = 0
      for images, labels in tqdm(test_dataloader):
        images = images.to(DEVICE)
        labels = labels.to(DEVICE)

        # Forward Pass
        outputs = net(images)

        # Get predictions
        _, preds = torch.max(outputs.data, 1)

        # Update Corrects
        running_corrects += torch.sum(preds == labels.data).data.item()

      # Calculate Accuracy
      accuracy_sketch = running_corrects / float(len(S))

      print('\nTest Accuracy Sketch: {}'.format(accuracy_sketch))

      accuracy_avg = (accuracy_sketch + accuracy_cartoon)/2
      if accuracy_avg > best_accuracy:
        best_accuracy = accuracy_avg
        best_params = [LR,BATCH_SIZE,ALPHA]
        
        

In [None]:
#TRAIN AND TEST ON ART with best hyperparameters
NUM_EPOCHS = 30
source_dataloader = DataLoader(P, batch_size=best_params[1], shuffle=True, num_workers=4, drop_last=True)
target_dataloader = DataLoader(A, batch_size=best_params[1], shuffle=True, num_workers=4, drop_last=True)
test_dataloader = DataLoader(A, batch_size=best_params[1], shuffle=False, num_workers=4)
dann_net = dann(pretrained=True)
dann_net.classifier[6] = nn.Linear(4096, NUM_CLASSES)
criterion = nn.CrossEntropyLoss() 
parameters_to_optimize_dann = dann_net.parameters() 
optimizer_dann = optim.SGD(parameters_to_optimize_dann, lr=best_params[0], momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
scheduler_dann = optim.lr_scheduler.StepLR(optimizer_dann, step_size=STEP_SIZE, gamma=GAMMA)
#training wit dann
net = dann_net.to(DEVICE)

cudnn.benchmark 

current_step = 0

losses1 = []
losses2 = []
losses3 = []

for epoch in range(NUM_EPOCHS):
  print('Starting epoch {}/{}, LR = {}'.format(epoch+1, NUM_EPOCHS, scheduler_dann.get_lr()))

  # Iterate over the dataset

  for  i,s_img in enumerate(source_dataloader):
    
    loss1_avg = 0
    loss2_avg = 0
    loss3_avg = 0
    
    t_img = next(iter(target_dataloader))

    s_images, s_labels = s_img
    t_images, t_labels = t_img

    #create tensors of ones e zeros
    zeros = torch.zeros(s_labels.shape, dtype=torch.long)
    ones = torch.ones(t_labels.shape,dtype=torch.long)

    s_images = s_images.to(DEVICE)
    t_images = t_images.to(DEVICE)
    s_labels = s_labels.to(DEVICE)
    zeros = zeros.to(DEVICE)
    ones = ones.to(DEVICE)

    net.train()

    optimizer_dann.zero_grad() 

    #Step 1
    outputs = net(s_images)
    loss1 = criterion(outputs,s_labels)
    loss1_avg += loss1.item()
    loss1.backward()

    #Step 2
    outputs = net(s_images,best_params[2])
    loss2 = criterion(outputs,zeros) #the label is zero for all data
    loss2_avg += loss2.item()
    loss2.backward()

    #Step 3
    outputs = net(t_images,best_params[2])
    loss3 = criterion(outputs,ones) #the label is one for all data
    loss3_avg += loss3.item()
    loss3.backward()

    # Log loss
    if current_step % LOG_FREQUENCY == 0:
      print('Step {}, Loss1 {}, Loss2 {}, Loss3 {}'.format(current_step, loss1.item(),loss2.item(),loss3.item()))

    
    optimizer_dann.step() 
    current_step += 1

  # Step the scheduler
  losses1.append(loss1_avg/float(i+1))
  losses2.append(loss2_avg/float(i+1))
  losses3.append(loss3_avg/float(i+1))
  scheduler_dann.step()

net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda
net.train(False) # Set Network to evaluation mode

running_corrects = 0
for images, labels in tqdm(test_dataloader):
  images = images.to(DEVICE)
  labels = labels.to(DEVICE)

  # Forward Pass
  outputs = net(images)

  # Get predictions
  _, preds = torch.max(outputs.data, 1)

  # Update Corrects
  running_corrects += torch.sum(preds == labels.data).data.item()

# Calculate Accuracy
accuracy = running_corrects / float(len(A))

print('\nTest Accuracy: {}'.format(accuracy))


In [None]:
from matplotlib import pyplot as plt

plt.plot(losses1,label='classifier loss')
plt.plot(losses2,label='discriminator loss (source)')
plt.plot(losses3,label='discriminator loss (target)')
plt.grid()
plt.legend()