# Homework 3: Deep Domain Adaptation

In [1]:
import torch.nn as nn
import torch
import os
import torchvision
import torch.optim as optim
from gradient_reversal import mygradientreversalnet
from torch.utils.data import DataLoader
from torch.backends import cudnn
from torchvision import transforms
from tqdm import tqdm
from sklearn.metrics import  confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

In [3]:
DEVICE = 'cuda'

NUM_CLASSES = 7
NUM_DOMAINS = 2

BATCH_SIZE = 32
LR = 0.005 
MOMENTUM = 0.9  
WEIGHT_DECAY = 5e-5  
NUM_EPOCHS = 20 
STEP_SIZE = 10  
GAMMA = 0.1  
LOG_FREQUENCY = 10

ALPHA = 0.1

#### Define transformations

In [4]:
train_transform = transforms.Compose([transforms.Resize(256),  
                                      transforms.CenterCrop(224), 
                                      transforms.ToTensor(),  
                                      transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
                                      ])

eval_transform = transforms.Compose([transforms.Resize(256),
                                     transforms.CenterCrop(224),
                                     transforms.ToTensor(),
                                     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
                                     ])

#### Data preparation

In [6]:
root = "PACS"

photos = torchvision.datasets.ImageFolder(os.path.join(root, "photo"), transform=train_transform)
art_paintings = torchvision.datasets.ImageFolder(os.path.join(root, "art_painting"), transform=train_transform)

train_dataloader = DataLoader(photos, batch_size=BATCH_SIZE, shuffle=True, num_workers=1, drop_last=True)
test_dataloader = DataLoader(art_paintings, batch_size=BATCH_SIZE, shuffle=True, num_workers=1)

#### Model implementation

In [8]:
net = mygradientreversalnet(pretrained=True)


# Copy the classifier weights into the dann_classifier:
for i in range(len(net.classifier)):
    if (type(net.classifier[i])) not in [nn.ReLU, nn.Dropout]:
        net.dann_classifier[i].weight.data = net.classifier[i].weight.data
        net.dann_classifier[i].bias.data = net.classifier[i].bias.data

# Change the last layers of the two branches in order to have the correct number of classes:
net.classifier[6] = nn.Linear(4096, NUM_CLASSES)
net.dann_classifier[6] = nn.Linear(4096, NUM_DOMAINS)


criterion = nn.CrossEntropyLoss()
parameters_to_optimize = net.parameters()
optimizer = optim.SGD(parameters_to_optimize, lr=LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=STEP_SIZE, gamma=GAMMA)

## Training without domain adaptation

In [10]:
net = net.to(DEVICE)
cudnn.benchmark
current_step = 0

for epoch in range(NUM_EPOCHS):
  print('Starting epoch {}/{}, LR = {}'.format(epoch+1, NUM_EPOCHS, scheduler.get_lr()))

  # Iterate over the dataset
  for images, labels in train_dataloader:
    # Bring data over the device of choice
    images = images.to(DEVICE)
    labels = labels.to(DEVICE)

    net.train() 

    optimizer.zero_grad() 

    outputs = net(images)
    # Compute loss based onoutput and ground truth
    loss = criterion(outputs, labels)

    # Log loss
    if current_step % LOG_FREQUENCY == 0:
      print('Step {}, Loss {}'.format(current_step, loss.item()))

    # Compute gradients for each layer and update weights
    loss.backward()  # backward pass: computes gradients
    optimizer.step() # update weights based on accumulated gradients

    current_step += 1

  # Step the scheduler
  scheduler.step()

Starting epoch 1/20, LR = [0.005]
Step 0, Loss 1.8622608184814453
Step 10, Loss 0.5696645975112915
Step 20, Loss 0.2044483721256256
Step 30, Loss 0.3768826127052307
Step 40, Loss 0.29052311182022095
Step 50, Loss 0.14955906569957733
Starting epoch 2/20, LR = [0.005]
Step 60, Loss 0.5817769169807434
Step 70, Loss 0.15583392977714539
Step 80, Loss 0.2816635072231293
Step 90, Loss 0.26235324144363403


KeyboardInterrupt: 

Traceback (most recent call last):
  File "/usr/lib/python3.7/multiprocessing/queues.py", line 242, in _feed
    send_bytes(obj)
  File "/usr/lib/python3.7/multiprocessing/connection.py", line 200, in send_bytes
    self._send_bytes(m[offset:offset + size])
  File "/usr/lib/python3.7/multiprocessing/connection.py", line 404, in _send_bytes
    self._send(header + buf)
  File "/usr/lib/python3.7/multiprocessing/connection.py", line 368, in _send
    n = write(self._handle, buf)
BrokenPipeError: [Errno 32] Broken pipe


## Training with domain adaptation

In [None]:
net = net.to(DEVICE)
cudnn.benchmark
current_step = 0

for epoch in range(NUM_EPOCHS):
    print('Starting epoch {}/{}, LR = {}'.format(epoch + 1, NUM_EPOCHS, scheduler.get_lr()))

    # Iterate over the dataset
    for images, labels in train_dataloader:
        # Bring data over the device of choice
        images = images.to(DEVICE)
        labels = labels.to(DEVICE)
        net.train()
        
        optimizer.zero_grad()  

        # 1):
        outputs = net(images)  # Forward pass to the network
        loss = criterion(outputs, labels)  # Compute loss based on output and ground truth
        if current_step % LOG_FREQUENCY == 0:  # Log loss
            print('Step {}, Loss {}'.format(current_step, loss.item()))
        # Compute gradients for each layer and update weights
        loss.backward()  # backward pass: computes gradients

        #  2)
        domain_labels = torch.zeros(labels.size(), dtype=torch.int64)
        domain_labels = domain_labels.to(DEVICE)
        outputs = net(images, alpha=ALPHA)  # Forward pass to the network
        loss2 = criterion(outputs, domain_labels)  # Compute loss based on output and ground truth
        if current_step % LOG_FREQUENCY == 0:  # Log loss
            print('Step {}, Loss {}'.format(current_step, loss2.item()))
        # Compute gradients for each layer and update weights
        loss2.backward()  # backward pass: computes gradients

        #  3)
        domain_labels = torch.ones(labels.size(), dtype=torch.int64)
        domain_labels = domain_labels.to(DEVICE)
        target_images = next(iter(test_dataloader))[0].to(DEVICE)
        outputs = net(target_images, alpha=ALPHA)  # Forward pass to the network
        loss3 = criterion(outputs, domain_labels)  # Compute loss based on output and ground truth
        if current_step % LOG_FREQUENCY == 0:  # Log loss
            print('Step {}, Loss {}'.format(current_step, loss3.item()))
        # Compute gradients for each layer and update weights
        loss3.backward()  # backward pass: computes gradients


        optimizer.step()  # update weights based on accumulated gradients

        current_step += 1

    # Step the scheduler
    scheduler.step()


## Evaluate model on the test set

In [None]:
net = net.to(DEVICE)  # this will bring the network to GPU if DEVICE is cuda
net.train(False)  # Set Network to evaluation mode

running_corrects = 0
classes_list = []
predictions_list = []

for images, labels in tqdm(test_dataloader):
    images = images.to(DEVICE)
    labels = labels.to(DEVICE)
  
    # Forward Pass
    outputs = net(images)

    # Get predictions
    _, preds = torch.max(outputs.data, 1)
    
    classes_list = classes_list + labels.data.tolist()
    predictions_list = predictions_list + preds.data.tolist()
    
    # Update Corrects
    running_corrects += torch.sum(preds == labels.data).data.item()

# Calculate Accuracy
accuracy = running_corrects / float(len(art_paintings))

print('Test Accuracy: {}'.format(accuracy))

### Plot confusion matrix

In [None]:
confusion = confusion_matrix(classes_list, predictions_list)
f, ax = plt.subplots(figsize=(7, 7))
ax = sns.heatmap(confusion, annot=True, square=True, cbar=False, fmt="d", annot_kws={"size": 14}, cmap='GnBu')
ax.set_xticklabels(photos.classes)
ax.set_yticklabels(photos.classes, rotation=0)
plt.show()