<a href="https://colab.research.google.com/github/luciainnocenti/Homework3-PACS/blob/master/MLDL_Homework3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#**Import libraries**

In [0]:
import os
import logging

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Subset, DataLoader
from torch.backends import cudnn

import torchvision
from torchvision import transforms

from PIL import Image
from tqdm import tqdm


from numpy import random 

random.seed(33)

#**Set Arguments**

In [0]:
DEVICE = 'cuda' # 'cuda' or 'cpu'

BATCH_SIZE = 128     # Higher batch sizes allows for larger learning rates. An empirical heuristic suggests that, when changing
                     # the batch size, learning rate should change by the same factor to have comparable results

MOMENTUM = 0.9       # Hyperparameter for SGD, keep this at 0.9 when using SGD
WEIGHT_DECAY = 5e-5  # Regularization, you can keep this at the default

NUM_EPOCHS = 20      # Total number of training epochs (iterations over dataset)
STEP_SIZE = 20       # How many epochs before decreasing learning rate (if using a step-down policy)
GAMMA = 0.1          # Multiplicative factor for learning rate step-down

LOG_FREQUENCY = 10

alfa = 1
LR = 1e-6          # The initial Learning Rate

#**Define Data Preprocessing**

In [0]:
# Define transforms for training phase
train_transform = transforms.Compose([transforms.Resize(256),   
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor(), # Turn PIL Image to torch.Tensor
                                      transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) # Normalizes tensor with mean and standard deviation
])
# Define transforms for the test phase
test_transform = transforms.Compose([transforms.Resize(256),
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor(),
                                      transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))                                    
])

#**Prepare Dataset**

In [24]:
# Clone github repository with data
!git clone https://github.com/luciainnocenti/Homework3-PACS.git
!mv 'Homework3-PACS' 'HW_PACS'

from HW_PACS.dataset import PACS_Dataset 

rootPhoto = "HW_PACS/PACS/photo"
photos = PACS_Dataset(root = rootPhoto, transform = train_transform)

rootArt_painting = "HW_PACS/PACS/art_painting"
art_painting = PACS_Dataset(root = rootArt_painting, transform = test_transform)

# Check dataset sizes
print('Train Dataset: {}'.format(len(photos)))
print('Test Dataset: {}'.format(len(art_painting)))

Cloning into 'Homework3-PACS'...
remote: Enumerating objects: 6, done.[K
remote: Counting objects: 100% (6/6), done.[K
remote: Compressing objects: 100% (6/6), done.[K
remote: Total 10108 (delta 2), reused 0 (delta 0), pack-reused 10102[K
Receiving objects: 100% (10108/10108), 174.20 MiB | 38.51 MiB/s, done.
Resolving deltas: 100% (45/45), done.
Checking out files: 100% (9995/9995), done.
Train Dataset: 1670
Test Dataset: 2048


#**Prepare Dataloaders**

In [0]:
# Dataloaders iterate over pytorch datasets and transparently provide useful functions (e.g. parallelization and shuffling)
photos_dataloader = DataLoader(photos, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)
art_painting_dataloader = DataLoader(art_painting, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

#**Model without DANN**

##**Prepare Network**

In [0]:
from HW_PACS.gradient_reversal_example import alexNetDA 

net = alexNetDA(num_classes = 7)


##**Prepare Training**

In [0]:
# Define loss function
criterion = nn.CrossEntropyLoss() 

# Choose parameters to optimize
# To access a different set of parameters, you have to access submodules of AlexNet
# (nn.Module objects, like AlexNet, implement the Composite Pattern)
# e.g.: parameters of the fully connected layers: net.classifier.parameters()
# e.g.: parameters of the convolutional layers: look at alexnet's source code ;) 
parameters_to_optimize = net.parameters() # In this case we optimize over all the parameters of AlexNet

# Define optimizer
# An optimizer updates the weights based on loss
# We use SGD with momentum

#optimizer = optim.SGD(parameters_to_optimize, lr=LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)

optimizer = optim.Adam(parameters_to_optimize, LR)

# Define scheduler
# A scheduler dynamically changes learning rate
# The most common schedule is the step(-down), which multiplies learning rate by gamma every STEP_SIZE epochs
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=STEP_SIZE, gamma=GAMMA)

In [28]:
# By default, everything is loaded to cpu
net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda
cudnn.benchmark # Calling this optimizes runtime
running_corrects = 0
current_step = 0
# Start iterating over the epochs
# Iterate over the dataset
for epoch in range(NUM_EPOCHS):
  scheduler.step() 
  
  for images, labels in  tqdm(photos_dataloader):

    # Bring data over the device of choice
    images = images.to(DEVICE)
    labels = labels.to(DEVICE)

    net.train() # Sets module in training mode

    # PyTorch, by default, accumulates gradients after each backward pass
    # We need to manually set the gradients to zero before starting a new iteration
    optimizer.zero_grad() # Zero-ing the gradients

    # Forward pass to the network
    outputs = net(images)

    # Compute loss based on output and ground truth
    loss = criterion(outputs, labels)

    # Log loss
    if current_step % LOG_FREQUENCY == 0:
      print('Step {}, Loss {}'.format(current_step, loss.item()))

    # Compute gradients for each layer and update weights

    loss.backward()  # backward pass: computes gradients

    optimizer.step() # update weights based on accumulated gradients

    current_step += 1
    # Get predictions
    _, preds = torch.max(outputs.data, 1)

    # Update Corrects
    running_corrects += torch.sum(preds == labels.data).data.item()

  # Calculate Accuracy
  accuracy = running_corrects / float(len(photos))
  print("Accuracy on training set = "  + str(accuracy))
  running_corrects = 0
    

 14%|█▍        | 2/14 [00:02<00:17,  1.47s/it]

Step 0, Loss 2.0112955570220947


 79%|███████▊  | 11/14 [00:05<00:01,  2.04it/s]

Step 10, Loss 1.9544504880905151


100%|██████████| 14/14 [00:06<00:00,  2.29it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

Accuracy on training set = 0.181437125748503


 57%|█████▋    | 8/14 [00:04<00:02,  2.41it/s]

Step 20, Loss 1.8786988258361816


100%|██████████| 14/14 [00:06<00:00,  2.30it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

Accuracy on training set = 0.29101796407185626


 21%|██▏       | 3/14 [00:02<00:11,  1.03s/it]

Step 30, Loss 1.818904161453247


100%|██████████| 14/14 [00:06<00:00,  2.30it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

Step 40, Loss 1.6520856618881226
Accuracy on training set = 0.36766467065868264


 64%|██████▍   | 9/14 [00:05<00:03,  1.62it/s]

Step 50, Loss 1.6478114128112793


100%|██████████| 14/14 [00:06<00:00,  2.29it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

Accuracy on training set = 0.45269461077844314


 43%|████▎     | 6/14 [00:03<00:05,  1.48it/s]

Step 60, Loss 1.4276676177978516


100%|██████████| 14/14 [00:06<00:00,  2.29it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

Accuracy on training set = 0.5329341317365269


 14%|█▍        | 2/14 [00:02<00:17,  1.44s/it]

Step 70, Loss 1.5461167097091675


 86%|████████▌ | 12/14 [00:05<00:00,  2.55it/s]

Step 80, Loss 1.402630090713501


100%|██████████| 14/14 [00:06<00:00,  2.22it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

Accuracy on training set = 0.5544910179640719


 57%|█████▋    | 8/14 [00:04<00:02,  2.37it/s]

Step 90, Loss 1.3871232271194458


100%|██████████| 14/14 [00:06<00:00,  2.25it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

Accuracy on training set = 0.6053892215568862


 29%|██▊       | 4/14 [00:02<00:07,  1.32it/s]

Step 100, Loss 1.2586095333099365


100%|██████████| 14/14 [00:06<00:00,  2.26it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

Step 110, Loss 1.2893433570861816
Accuracy on training set = 0.6173652694610778


 64%|██████▍   | 9/14 [00:05<00:03,  1.62it/s]

Step 120, Loss 1.2737518548965454


100%|██████████| 14/14 [00:06<00:00,  2.27it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

Accuracy on training set = 0.6383233532934132


 36%|███▌      | 5/14 [00:03<00:07,  1.16it/s]

Step 130, Loss 1.2010056972503662


100%|██████████| 14/14 [00:06<00:00,  2.29it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

Accuracy on training set = 0.655688622754491


 14%|█▍        | 2/14 [00:02<00:16,  1.40s/it]

Step 140, Loss 1.116728663444519


 79%|███████▊  | 11/14 [00:05<00:01,  2.06it/s]

Step 150, Loss 0.9816076755523682


100%|██████████| 14/14 [00:06<00:00,  2.32it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

Accuracy on training set = 0.6868263473053893


 57%|█████▋    | 8/14 [00:04<00:02,  2.34it/s]

Step 160, Loss 0.9293521046638489


100%|██████████| 14/14 [00:06<00:00,  2.26it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

Accuracy on training set = 0.6958083832335329


 29%|██▊       | 4/14 [00:02<00:08,  1.13it/s]

Step 170, Loss 0.972048819065094


100%|██████████| 14/14 [00:06<00:00,  2.30it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

Step 180, Loss 0.9433964490890503
Accuracy on training set = 0.7149700598802395


 71%|███████▏  | 10/14 [00:05<00:01,  2.01it/s]

Step 190, Loss 0.9736971259117126


100%|██████████| 14/14 [00:06<00:00,  2.28it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

Accuracy on training set = 0.729940119760479


 43%|████▎     | 6/14 [00:03<00:05,  1.43it/s]

Step 200, Loss 1.0001200437545776


100%|██████████| 14/14 [00:06<00:00,  2.26it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

Accuracy on training set = 0.7502994011976047


  7%|▋         | 1/14 [00:01<00:24,  1.86s/it]

Step 210, Loss 0.8775646090507507


 79%|███████▊  | 11/14 [00:05<00:01,  2.72it/s]

Step 220, Loss 0.9194539785385132


100%|██████████| 14/14 [00:06<00:00,  2.29it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

Accuracy on training set = 0.7592814371257485


 57%|█████▋    | 8/14 [00:04<00:02,  2.38it/s]

Step 230, Loss 0.7458876371383667


100%|██████████| 14/14 [00:06<00:00,  2.31it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

Accuracy on training set = 0.7580838323353294


 29%|██▊       | 4/14 [00:02<00:07,  1.30it/s]

Step 240, Loss 0.8007180690765381


100%|██████████| 14/14 [00:06<00:00,  2.29it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

Step 250, Loss 0.732064425945282
Accuracy on training set = 0.781437125748503


 64%|██████▍   | 9/14 [00:05<00:03,  1.60it/s]

Step 260, Loss 0.7487094402313232


100%|██████████| 14/14 [00:06<00:00,  2.31it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

Accuracy on training set = 0.7916167664670659


 36%|███▌      | 5/14 [00:03<00:08,  1.08it/s]

Step 270, Loss 0.7033335566520691


100%|██████████| 14/14 [00:06<00:00,  2.30it/s]

Accuracy on training set = 0.7988023952095809





##**Test**

In [29]:
net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda
net.train(False) # Set Network to evaluation mode

running_corrects = 0
for images, labels in tqdm(art_painting_dataloader):
  images = images.to(DEVICE)
  labels = labels.to(DEVICE)

  # Forward Pass
  outputs = net(images)

  loss = criterion(outputs, labels)

  # Get predictions
  _, preds = torch.max(outputs.data, 1)

  # Update Corrects
  running_corrects += torch.sum(preds == labels.data).data.item()

# Calculate Accuracy
accuracy = running_corrects / float(len(art_painting))

print('Test Accuracy: {}'.format(accuracy))

100%|██████████| 16/16 [00:07<00:00,  2.22it/s]

Test Accuracy: 0.27734375





In [30]:
loss.item()

2.931746244430542

# Model with DANN

## Network

In [0]:
net = alexNetDA(num_classes = 7)
net = net.to(DEVICE)

## Loss, Optim and Scheduler

In [0]:
criterion_class = nn.CrossEntropyLoss() 
criterion_domain = nn.CrossEntropyLoss()

parameters_to_optimize = net.parameters() 

#optimizer = optim.SGD(parameters_to_optimize, lr=LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
optimizer = optim.Adam(parameters_to_optimize, LR)


scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=STEP_SIZE, gamma=GAMMA)

## Test function

In [0]:
def testFunction(datasetName, epoch):  
  net = torch.load('model_epoch_' + str(epoch) + '.pt')
  net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda
  net.train(False) # Set Network to evaluation mode
  if (datasetName == 'photo'):
    dataLoader = DataLoader(photos, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)
  elif( datasetName == 'artPainting'):
    dataLoader = DataLoader(art_painting, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

  lenLoader = len(dataLoader)
  iterator = iter(dataLoader)
  totalLen = 0
  running_corrects = 0
  criterion = nn.CrossEntropyLoss() 
  for i in range(lenLoader):
    t_img, t_label = next(iterator)

    t_img = t_img.to(DEVICE)
    t_label = t_label.to(DEVICE)

    # Forward Pass
    classes_output = net(t_img)

    loss = criterion(classes_output, t_label)

    # Get predictions
    _, preds = torch.max(classes_output.data, 1)

    # Update Corrects
    running_corrects += torch.sum(preds == t_label.data).data.item()
    totalLen += len(t_img)

  # Calculate Accuracy
  accuracy = running_corrects / float(totalLen)

  print(f'Accuracy on  {datasetName}' f' during epoch {epoch}' f' is {accuracy}' f' loss is {loss}')

## Train 

In [0]:
#photos_dataloader = DataLoader(photos, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)
#art_painting_dataloader = DataLoader(art_painting, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

max_batches = max(len(photos_dataloader), len(art_painting_dataloader))
min_batches = min(len(photos_dataloader), len(art_painting_dataloader))

In [35]:
# By default, everything is loaded to cpu
net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda

cudnn.benchmark # Calling this optimizes runtime
running_corrects = 0
current_step = 0
# Start iterating over the epochs
# Iterate over the dataset
for epoch in range(NUM_EPOCHS):
  scheduler.step() 
  iterPh = iter(photos_dataloader)
  iterAP = iter(art_painting_dataloader)
  for batch in range(max_batches):
    net.train() # Sets module in training mode
    optimizer.zero_grad() # Zero-ing the gradients
    if( batch == min_batches):
      iterPh = iter(photos_dataloader)
    images_source, labels_source = next(iterPh)
    labels_domain = torch.zeros(len(images_source), dtype=torch.long)
    
    # Bring data over the device of choice
    images_source = images_source.to(DEVICE)
    labels_source = labels_source.to(DEVICE)
    labels_domain = labels_domain.to(DEVICE)

  
    # Get the output for classes and domains; class_pred, domain_pred
    classes_output = net(images_source)
    # Compute the loss on the source domain
    loss_s_label = criterion_class(classes_output, labels_source)

    domain_output = net(images_source, alfa)
    # Compute the loss on the source domain
    loss_s_domain = criterion_domain(domain_output, labels_domain)

    # Get the output for targets
    targets, _ = next(iterAP)
    target_domain = torch.ones(len(targets), dtype=torch.long)

    # Bring data over the device of choice
    targets = targets.to(DEVICE)
    target_domain = target_domain.to(DEVICE)

    target_output = net(targets, alfa)

    # Compute the loss on the source domain
    loss_t_domain = criterion_domain(target_output,target_domain)

    loss = loss_s_label + loss_s_domain + loss_t_domain
    loss.backward()  # backward pass: computes gradients

    optimizer.step() # update weights based on accumulated gradients

    current_step += 1

    print(f'[{batch+1}/{max_batches}] '
          f'class_loss: {loss_s_label.item():.4f} ' f's_domain_loss: {loss_s_domain.item():.4f} '
          f't_domain_loss: {loss_t_domain.item():.4f} '
          )  
  torch.save(net, 'model_epoch_{0}.pt'.format(epoch))
  testFunction('photo', epoch)
  testFunction('artPainting', epoch)
  



[1/16] class_loss: 2.1084 s_domain_loss: 0.9644 t_domain_loss: 0.5036 
[2/16] class_loss: 2.0468 s_domain_loss: 0.9527 t_domain_loss: 0.5049 
[3/16] class_loss: 2.1378 s_domain_loss: 0.9430 t_domain_loss: 0.5195 
[4/16] class_loss: 2.1097 s_domain_loss: 0.9499 t_domain_loss: 0.5179 
[5/16] class_loss: 2.0293 s_domain_loss: 0.9573 t_domain_loss: 0.5178 
[6/16] class_loss: 2.1722 s_domain_loss: 0.9413 t_domain_loss: 0.5178 
[7/16] class_loss: 2.1671 s_domain_loss: 0.9443 t_domain_loss: 0.5220 
[8/16] class_loss: 2.0296 s_domain_loss: 0.9461 t_domain_loss: 0.5275 
[9/16] class_loss: 1.9898 s_domain_loss: 0.9376 t_domain_loss: 0.5275 
[10/16] class_loss: 2.1053 s_domain_loss: 0.9369 t_domain_loss: 0.5261 
[11/16] class_loss: 2.0317 s_domain_loss: 0.9337 t_domain_loss: 0.5269 
[12/16] class_loss: 1.9593 s_domain_loss: 0.9427 t_domain_loss: 0.5261 
[13/16] class_loss: 2.0361 s_domain_loss: 0.9339 t_domain_loss: 0.5225 
[14/16] class_loss: 1.8322 s_domain_loss: 0.9710 t_domain_loss: 0.5290 
[

In [36]:
LR

1e-06

In [37]:
alfa

1