In [1]:
import copy
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import logging

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Subset, DataLoader
from torch.backends import cudnn

import torchvision
from torchvision import transforms
from torchvision.datasets.vision import StandardTransform

from PIL import Image
from tqdm import tqdm

from DAlexnet import randomNetworkWithReverseGrad
# from torchvision.models import alexnet


**Set Arguments**

In [2]:
DEVICE = 'cuda' # 'cuda' or 'cpu'

NUM_CLASSES = 101 # 101 + 1: There is am extra Background class that should be removed
NUM_DOMAINS = 2   # 2 domains each time

ALFA = 10      #Alfa value for the GD branch of the DANN

BATCH_SIZE = 64     # Higher batch sizes allows for larger learning rates. An empirical heuristic suggests that, when changing
                     # the batch size, learning rate should change by the same factor to have comparable results

LR = 1e-2            # The initial Learning Rate
MOMENTUM = 0.9       # Hyperparameter for SGD, keep this at 0.9 when using SGD
WEIGHT_DECAY = 5e-5  # Regularization, you can keep this at the default

NUM_EPOCHS = 25  #30    # Total number of training epochs (iterations over dataset)
STEP_SIZE = 12  #30    # How many epochs before decreasing learning rate (if using a step-down policy)
GAMMA = 0.1          # Multiplicative factor for learning rate step-down

LOG_FREQUENCY = 10

**Define Data Preprocessing**

In [3]:
# Define transforms for training phase
train_transform = transforms.Compose([transforms.Resize(256),      # Resizes short size of the PIL image to 256
                                      transforms.CenterCrop(224),  # Crops a central square patch of the image
                                                                   # 224 because torchvision's AlexNet needs a 224x224 input!
                                      transforms.ToTensor(), # Turn PIL Image to torch.Tensor
                                      #transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) # Normalizes tensor with mean and standard deviation
                                      transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))  # Mean and Std are provided by the ImageNet documentation
                                    ])

# Define transforms for the evaluation phase
eval_transform = transforms.Compose([transforms.Resize(256),
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor(),
                                      # transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
                                      transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
                                    ])
                                  #torchvision.transforms.RandomHorizontalFlip(p=0.5),
                                      #torchvision.transforms.RandomGrayscale(p=0.1),                             # 224 because torchvision's AlexNet needs a 224x224 input!
                                      #transforms.TenCrop(224, vertical_flip=False),

**Prepare Dataset**

In [4]:
DATA_DIR_PHOTO = 'PACS/photo'
DATA_DIR_CARTOON = 'PACS/cartoon'
DATA_DIR_SKETCH = 'PACS/sketch'
DATA_DIR_ARTPAINTING = 'PACS/art_painting'

# Prepare Pytorch train/test Datasets
train_dataset = torchvision.datasets.ImageFolder(DATA_DIR_PHOTO, transform=train_transform)
test_dataset = torchvision.datasets.ImageFolder(DATA_DIR_ARTPAINTING, transform=eval_transform)
val1_dataset =torchvision.datasets.ImageFolder(DATA_DIR_CARTOON, transform=eval_transform)
val2_dataset =torchvision.datasets.ImageFolder(DATA_DIR_SKETCH, transform=eval_transform)
#train_indexes = [idx for idx in range(len(train_dataset)) if idx % 5]
#test_indexes = [idx for idx in range(len(test_dataset)) if not idx % 5]

# train_dataset = Subset(dataset, train_indexes)
# val_dataset = Subset(dataset, val_indexes)

# Check dataset sizes
print('Train Dataset: {}'.format(len(train_dataset)))
print('Valid Dataset: {}'.format(len(val1_dataset)))
print('Valid Dataset: {}'.format(len(val2_dataset)))
print('Test Dataset: {}'.format(len(test_dataset)))

Train Dataset: 1670
Valid Dataset: 2344
Valid Dataset: 3929
Test Dataset: 2048


**Prepare Dataloaders**

In [5]:
# Dataloaders iterate over pytorch datasets and transparently provide useful functions (e.g. parallelization and shuffling)
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
val1_dataloader = DataLoader(val1_dataset, batch_size=int(BATCH_SIZE), shuffle=False, num_workers=4)
val2_dataloader = DataLoader(val2_dataset, batch_size=int(BATCH_SIZE), shuffle=False, num_workers=4)
test_dataloader = DataLoader(test_dataset, batch_size=int(BATCH_SIZE/2), shuffle=False, num_workers=4) #Batch_size decrease for GPU ram problems

**Prepare Network**

In [6]:
net = randomNetworkWithReverseGrad(pretrained=True)
best_net = randomNetworkWithReverseGrad()

net.classifier[6] = nn.Linear(4096, NUM_CLASSES)
net.dann_classifier[6]= nn.Linear(4096, NUM_DOMAINS)

# for i, (name, param) in enumerate(net.named_parameters()):
#   print(i, name)
#   if i < 10:  param.requires_grad = False
# We just changed the last layer of AlexNet with a new fully connected layer with 101 outputs
# It is strongly suggested to study torchvision.models.alexnet source code

RuntimeError: Error(s) in loading state_dict for RandomNetworkWithReverseGrad:
	Missing key(s) in state_dict: "dann_classifier.1.weight", "dann_classifier.1.bias", "dann_classifier.4.weight", "dann_classifier.4.bias", "dann_classifier.6.weight", "dann_classifier.6.bias". 

**Prepare Training**

In [None]:
# Define loss function
criterion = nn.CrossEntropyLoss() # for classification, we use Cross Entropy

# Choose parameters to optimize
# To access a different set of parameters, you have to access submodules of AlexNet
# (nn.Module objects, like AlexNet, implement the Composite Pattern)
# e.g.: parameters of the fully connected layers: net.classifier.parameters()
# e.g.: parameters of the convolutional layers: look at alexnet's source code ;) 
parameters_to_optimize = net.parameters() # In this case we optimize over all the parameters of AlexNet

# Define optimizer
# An optimizer updates the weights based on loss
# We use SGD with momentum

optimizer = optim.SGD(parameters_to_optimize, lr=LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
# optimizer = optim.Adam(parameters_to_optimize, lr=LR, weight_decay=WEIGHT_DECAY)

# Define scheduler
# A scheduler dynamically changes learning rate
# The most common schedule is the step(-down), which multiplies learning rate by gamma every STEP_SIZE epochs
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=STEP_SIZE, gamma=GAMMA)


  #TODO add the alpha parameter setting


**Train with Validation**

In [None]:
# By default, everything is loaded to cpu
net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda
cudnn.benchmark = True # Calling this optimizes runtime

current_step = 0
best_accuracy = 0
loss = 0
loss_vector = []
loss_val = []
acc_train_class = []
acc_train_domain = []
tot_accuracy = []
# Start iterating over the epochs
for epoch in range(NUM_EPOCHS):
  print('Starting epoch {}/{}, LR = {}'.format(epoch+1, NUM_EPOCHS, scheduler.get_lr()))
  running_correct_train_class=0
  running_correct_train_domain=0

  # Iterate over the dataset
  for images, labels in train_dataloader:

    # Bring data over the device of choice
    images = images.to(DEVICE)
    labels = labels.to(DEVICE)

    net.train().to(DEVICE) # Sets module in training mode

    # PyTorch, by default, accumulates gradients after each backward pass
    # We need to manually set the gradients to zero before starting a new iteration
    optimizer.zero_grad() # Zero-ing the gradients

    # Forward pass to the network
    class_outputs = net(images)

    #Calc the correct for the graph
    _, preds = torch.max(class_outputs.data, 1)
    running_correct_train_class += torch.sum(preds == labels.data).data.item()

    # Compute loss based on output and ground truth
    class_loss = criterion(class_outputs, labels)
    loss = class_loss
    class_loss.backward()

    #Train the GD with the domain 0
    domains = torch.zeros(len(images), dtype=torch.long)
    domain_outputs = net(images, ALFA)

    #Calc the correct for the graph
    _, preds = torch.max(domain_outputs.data, 1)
    running_correct_train_domain += torch.sum(preds == domains.data).data.item()

    domain_loss = criterion(domain_outputs, domains)
    loss += domain_loss
    domain_loss.backward()



  #TRAIN THE GD ON TARGET DOMAIN

  running_corrects = 0
  for images, labels in tqdm(test_dataloader):
    images = images.to(DEVICE)
    labels = labels.to(DEVICE)
    domains = np.ones(len(images), dtype=torch.long)

    # Forward Pass
    target_outputs = net(images, ALFA)

    # Get predictions
    _, preds = torch.max(target_outputs.data, 1)
    running_corrects += torch.sum(preds == domains.data).data.item()

    domain_target_loss = criterion(target_outputs, domains)
    loss += domain_target_loss
    domain_target_loss.backward()

  # Compute gradients for each layer and update weights
  #   loss = class_loss + domain_loss + domain_target_loss
  #   loss.backward()  # backward pass: computes gradients
    optimizer.step() # update weights based on accumulated gradients

    current_step += 1

  # Log loss
    if current_step % LOG_FREQUENCY == 0:
      print('Step {}, Loss {}'.format(current_step, loss.item()))

  loss_vector.append(loss.item())
  acc_train_class.append(running_correct_train_class / float(len(train_dataset)))
  acc_train_domain.append(running_correct_train_domain / float(len(train_dataset)))

  # Step the scheduler
  scheduler.step()

  # Calculate Accuracy
  accuracy = running_corrects / float(len(test_dataset))
  tot_accuracy.append(accuracy)
  print('Test Accuracy: {}'.format(accuracy))

  if accuracy>best_accuracy:
    best_net = copy.deepcopy(net)
    best_accuracy=accuracy

**Plots**

In [None]:
csv_name = 'DANN - BS= %d LR= %e  EPOCHS= %d  STEP= %d' % (BATCH_SIZE, LR, NUM_EPOCHS, STEP_SIZE)
pd.DataFrame(tot_accuracy, loss_vector).to_csv('./Results/ %s.csv' % csv_name)

title = 'LossFunction - BATCH_SIZE= %d LR= %f  EPOCHS= %d  STEP_SIZE= %d GAMMA= %f' % (BATCH_SIZE, LR, NUM_EPOCHS, STEP_SIZE,GAMMA)
title2='Accuracy classes - BATCH_SIZE= %d LR= %f  EPOCHS= %d  STEP_SIZE= %d GAMMA= %f' %(BATCH_SIZE, LR, NUM_EPOCHS, STEP_SIZE,GAMMA)

x = np.linspace(1, NUM_EPOCHS, NUM_EPOCHS)

plt.plot(x, loss_vector,color='mediumseagreen')
plt.plot(x, loss_val,color='darkseagreen')
plt.title(title)
plt.xticks(np.arange(1, NUM_EPOCHS, 4))
plt.xlabel('epoch')
plt.ylabel('loss')
plt.legend(['Loss_train','Loss_val'], loc='best')
plt.show()

plt.plot(acc_train_class,color='mediumseagreen')
plt.plot(tot_accuracy,color='darkseagreen')
plt.legend(['accuracy_train','accuracy_val'],loc='best')
plt.title(title2)
plt.xlabel('epoch')
plt.ylabel('accuracy_score')

print('Accuracy classes', acc_train_class)
print('Accuracy domains', acc_train_domain)
print(best_accuracy)

**Test**

In [None]:
net = best_net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda
net.train(False) # Set Network to evaluation mode

running_corrects_class = 0
running_corrects_domain = 0

for images, labels in tqdm(test_dataloader):
  images = images.to(DEVICE)
  labels = labels.to(DEVICE)

  # Forward Pass
  class_outputs = net(images)
  domain_outputs = net(images, ALFA)

  # Get predictions
  _, preds_class = torch.max(class_outputs.data, 1)
  _, preds_domain = torch.max(domain_outputs.data, 1)

  # Update Corrects
  running_corrects_class += torch.sum(preds == labels.data).data.item()
  running_corrects_domain += torch.sum(preds_domain == 1).data.item()

# Calculate Accuracy
accuracy_class = running_corrects_class / float(len(test_dataset))
accuracy_domain = running_corrects_domain / float(len(test_dataset))

print('Test Accuracy classes: {}'.format(accuracy_class))
print('Test Accuracy domains: {}'.format(accuracy_domain))