<a href="https://colab.research.google.com/github/luciainnocenti/Homework3-PACS/blob/master/MLDL_Homework3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#**Import libraries**

In [0]:
import os
import logging

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Subset, DataLoader
from torch.backends import cudnn

import torchvision
from torchvision import transforms

from PIL import Image
from tqdm import tqdm


from numpy import random 

random.seed(33)

#**Set Arguments**

In [0]:
DEVICE = 'cuda' # 'cuda' or 'cpu'

BATCH_SIZE = 128     # Higher batch sizes allows for larger learning rates. An empirical heuristic suggests that, when changing
                     # the batch size, learning rate should change by the same factor to have comparable results

MOMENTUM = 0.9       # Hyperparameter for SGD, keep this at 0.9 when using SGD
WEIGHT_DECAY = 5e-5  # Regularization, you can keep this at the default

NUM_EPOCHS = 20      # Total number of training epochs (iterations over dataset)
STEP_SIZE = 20       # How many epochs before decreasing learning rate (if using a step-down policy)
GAMMA = 0.1          # Multiplicative factor for learning rate step-down

LOG_FREQUENCY = 10

alfa = 1
LR = 1e-4          # The initial Learning Rate

#**Define Data Preprocessing**

In [0]:
# Define transforms for training phase
train_transform = transforms.Compose([transforms.Resize(256),   
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor(), # Turn PIL Image to torch.Tensor
                                      transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) # Normalizes tensor with mean and standard deviation
])
# Define transforms for the test phase
test_transform = transforms.Compose([transforms.Resize(256),
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor(),
                                      transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))                                    
])

#**Prepare Dataset**

In [18]:
# Clone github repository with data
!git clone https://github.com/luciainnocenti/Homework3-PACS.git
!mv 'Homework3-PACS' 'HW_PACS'

from HW_PACS.dataset import PACS_Dataset 

rootPhoto = "HW_PACS/PACS/photo"
photos = PACS_Dataset(root = rootPhoto, transform = train_transform)

rootArt_painting = "HW_PACS/PACS/art_painting"
art_painting = PACS_Dataset(root = rootArt_painting, transform = test_transform)

# Check dataset sizes
print('Train Dataset: {}'.format(len(photos)))
print('Test Dataset: {}'.format(len(art_painting)))

Cloning into 'Homework3-PACS'...
remote: Enumerating objects: 3, done.[K
remote: Counting objects: 100% (3/3), done.[K
remote: Compressing objects: 100% (3/3), done.[K
remote: Total 10105 (delta 0), reused 0 (delta 0), pack-reused 10102[K
Receiving objects: 100% (10105/10105), 174.19 MiB | 30.55 MiB/s, done.
Resolving deltas: 100% (43/43), done.
Checking out files: 100% (9995/9995), done.
Train Dataset: 1670
Test Dataset: 2048


#**Prepare Dataloaders**

In [0]:
# Dataloaders iterate over pytorch datasets and transparently provide useful functions (e.g. parallelization and shuffling)
photos_dataloader = DataLoader(photos, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)
art_painting_dataloader = DataLoader(art_painting, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

#**Model without DANN**

##**Prepare Network**

In [0]:
from HW_PACS.gradient_reversal_example import alexNetDA 

net = alexNetDA(num_classes = 7)


##**Prepare Training**

In [0]:
# Define loss function
criterion = nn.CrossEntropyLoss() 

# Choose parameters to optimize
# To access a different set of parameters, you have to access submodules of AlexNet
# (nn.Module objects, like AlexNet, implement the Composite Pattern)
# e.g.: parameters of the fully connected layers: net.classifier.parameters()
# e.g.: parameters of the convolutional layers: look at alexnet's source code ;) 
parameters_to_optimize = net.parameters() # In this case we optimize over all the parameters of AlexNet

# Define optimizer
# An optimizer updates the weights based on loss
# We use SGD with momentum

#optimizer = optim.SGD(parameters_to_optimize, lr=LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)

optimizer = optim.Adam(parameters_to_optimize, LR)

# Define scheduler
# A scheduler dynamically changes learning rate
# The most common schedule is the step(-down), which multiplies learning rate by gamma every STEP_SIZE epochs
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=STEP_SIZE, gamma=GAMMA)

In [22]:
# By default, everything is loaded to cpu
net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda
cudnn.benchmark # Calling this optimizes runtime
running_corrects = 0
current_step = 0
# Start iterating over the epochs
# Iterate over the dataset
for epoch in range(NUM_EPOCHS):
  scheduler.step() 
  
  for images, labels in  tqdm(photos_dataloader):

    # Bring data over the device of choice
    images = images.to(DEVICE)
    labels = labels.to(DEVICE)

    net.train() # Sets module in training mode

    # PyTorch, by default, accumulates gradients after each backward pass
    # We need to manually set the gradients to zero before starting a new iteration
    optimizer.zero_grad() # Zero-ing the gradients

    # Forward pass to the network
    outputs = net(images)

    # Compute loss based on output and ground truth
    loss = criterion(outputs, labels)

    # Log loss
    if current_step % LOG_FREQUENCY == 0:
      print('Step {}, Loss {}'.format(current_step, loss.item()))

    # Compute gradients for each layer and update weights

    loss.backward()  # backward pass: computes gradients

    optimizer.step() # update weights based on accumulated gradients

    current_step += 1
    # Get predictions
    _, preds = torch.max(outputs.data, 1)

    # Update Corrects
    running_corrects += torch.sum(preds == labels.data).data.item()

  # Calculate Accuracy
  accuracy = running_corrects / float(len(photos))
  print("Accuracy on training set = "  + str(accuracy))
  running_corrects = 0
    

  7%|▋         | 1/14 [00:02<00:38,  2.94s/it]

Step 0, Loss 2.0280373096466064


 79%|███████▊  | 11/14 [00:08<00:01,  1.83it/s]

Step 10, Loss 0.39569905400276184


100%|██████████| 14/14 [00:08<00:00,  1.58it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

Accuracy on training set = 0.7766467065868263


 50%|█████     | 7/14 [00:05<00:05,  1.35it/s]

Step 20, Loss 0.17666594684123993


100%|██████████| 14/14 [00:08<00:00,  1.62it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

Accuracy on training set = 0.9365269461077844


 21%|██▏       | 3/14 [00:03<00:16,  1.47s/it]

Step 30, Loss 0.16246397793293


 93%|█████████▎| 13/14 [00:08<00:00,  2.35it/s]

Step 40, Loss 0.039291128516197205


100%|██████████| 14/14 [00:08<00:00,  1.61it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

Accuracy on training set = 0.9670658682634731


 64%|██████▍   | 9/14 [00:07<00:04,  1.14it/s]

Step 50, Loss 0.04401931166648865


100%|██████████| 14/14 [00:08<00:00,  1.60it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

Accuracy on training set = 0.9892215568862276


 36%|███▌      | 5/14 [00:05<00:11,  1.30s/it]

Step 60, Loss 0.01564890705049038


100%|██████████| 14/14 [00:08<00:00,  1.58it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

Accuracy on training set = 0.9970059880239521


  7%|▋         | 1/14 [00:02<00:38,  2.94s/it]

Step 70, Loss 0.015791703015565872


 79%|███████▊  | 11/14 [00:08<00:01,  1.87it/s]

Step 80, Loss 0.0037064626812934875


100%|██████████| 14/14 [00:08<00:00,  1.61it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

Accuracy on training set = 0.9994011976047904


 50%|█████     | 7/14 [00:05<00:05,  1.31it/s]

Step 90, Loss 0.00273846834897995


100%|██████████| 14/14 [00:08<00:00,  1.61it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

Accuracy on training set = 0.9988023952095808


 21%|██▏       | 3/14 [00:03<00:16,  1.46s/it]

Step 100, Loss 0.0031649209558963776


 93%|█████████▎| 13/14 [00:08<00:00,  2.33it/s]

Step 110, Loss 0.0023523159325122833


100%|██████████| 14/14 [00:08<00:00,  1.59it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

Accuracy on training set = 1.0


 64%|██████▍   | 9/14 [00:07<00:04,  1.14it/s]

Step 120, Loss 0.0006505213677883148


100%|██████████| 14/14 [00:08<00:00,  1.61it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

Accuracy on training set = 1.0


 36%|███▌      | 5/14 [00:05<00:11,  1.24s/it]

Step 130, Loss 0.0003470107913017273


100%|██████████| 14/14 [00:08<00:00,  1.60it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

Accuracy on training set = 0.9994011976047904


  7%|▋         | 1/14 [00:02<00:37,  2.88s/it]

Step 140, Loss 0.0053692832589149475


 79%|███████▊  | 11/14 [00:07<00:01,  1.80it/s]

Step 150, Loss 0.0004733428359031677


100%|██████████| 14/14 [00:08<00:00,  1.61it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

Accuracy on training set = 1.0


 50%|█████     | 7/14 [00:05<00:05,  1.31it/s]

Step 160, Loss 0.0026524774730205536


100%|██████████| 14/14 [00:08<00:00,  1.60it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

Accuracy on training set = 1.0


 21%|██▏       | 3/14 [00:03<00:15,  1.45s/it]

Step 170, Loss 0.0006532184779644012


 93%|█████████▎| 13/14 [00:08<00:00,  2.27it/s]

Step 180, Loss 0.0007004812359809875


100%|██████████| 14/14 [00:08<00:00,  1.61it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

Accuracy on training set = 1.0


 64%|██████▍   | 9/14 [00:07<00:04,  1.14it/s]

Step 190, Loss 0.0001503974199295044


100%|██████████| 14/14 [00:08<00:00,  1.60it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

Accuracy on training set = 1.0


 36%|███▌      | 5/14 [00:04<00:10,  1.19s/it]

Step 200, Loss 0.00011843070387840271


100%|██████████| 14/14 [00:08<00:00,  1.62it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

Accuracy on training set = 1.0


  7%|▋         | 1/14 [00:02<00:35,  2.77s/it]

Step 210, Loss 0.0005457401275634766


 79%|███████▊  | 11/14 [00:08<00:01,  1.75it/s]

Step 220, Loss 0.006246618926525116


100%|██████████| 14/14 [00:08<00:00,  1.59it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

Accuracy on training set = 0.9988023952095808


 50%|█████     | 7/14 [00:05<00:05,  1.28it/s]

Step 230, Loss 0.0003522410988807678


100%|██████████| 14/14 [00:08<00:00,  1.58it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

Accuracy on training set = 1.0


 21%|██▏       | 3/14 [00:03<00:16,  1.48s/it]

Step 240, Loss 0.0005271323025226593


 93%|█████████▎| 13/14 [00:08<00:00,  2.29it/s]

Step 250, Loss 0.0001693516969680786


100%|██████████| 14/14 [00:08<00:00,  1.59it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

Accuracy on training set = 1.0


 64%|██████▍   | 9/14 [00:07<00:04,  1.15it/s]

Step 260, Loss 4.484504461288452e-05


100%|██████████| 14/14 [00:08<00:00,  1.58it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

Accuracy on training set = 1.0


 36%|███▌      | 5/14 [00:05<00:11,  1.22s/it]

Step 270, Loss 0.000999700278043747


100%|██████████| 14/14 [00:08<00:00,  1.60it/s]

Accuracy on training set = 1.0





##**Test**

In [23]:
net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda
net.train(False) # Set Network to evaluation mode

running_corrects = 0
for images, labels in tqdm(art_painting_dataloader):
  images = images.to(DEVICE)
  labels = labels.to(DEVICE)

  # Forward Pass
  outputs = net(images)

  loss = criterion(outputs, labels)

  # Get predictions
  _, preds = torch.max(outputs.data, 1)

  # Update Corrects
  running_corrects += torch.sum(preds == labels.data).data.item()

# Calculate Accuracy
accuracy = running_corrects / float(len(art_painting))

print('Test Accuracy: {}'.format(accuracy))

100%|██████████| 16/16 [00:09<00:00,  1.62it/s]

Test Accuracy: 0.478515625





In [24]:
loss.item()

10.876704216003418

# Model with DANN

## Network

In [0]:
net = alexNetDA(num_classes = 7)
net = net.to(DEVICE)

## Loss, Optim and Scheduler

In [0]:
criterion_class = nn.CrossEntropyLoss() 
criterion_domain = nn.CrossEntropyLoss()

parameters_to_optimize = net.parameters() 

#optimizer = optim.SGD(parameters_to_optimize, lr=LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
optimizer = optim.Adam(parameters_to_optimize, LR)


scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=STEP_SIZE, gamma=GAMMA)

## Test function

In [0]:
def testFunction(datasetName, epoch):  
  net = torch.load('model_epoch_' + str(epoch) + '.pt')
  net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda
  net.train(False) # Set Network to evaluation mode
  if (datasetName == 'photo'):
    dataLoader = DataLoader(photos, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)
  elif( datasetName == 'artPainting'):
    dataLoader = DataLoader(art_painting, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

  lenLoader = len(dataLoader)
  iterator = iter(dataLoader)
  totalLen = 0
  running_corrects = 0
  criterion = nn.CrossEntropyLoss() 
  for i in range(lenLoader):
    t_img, t_label = next(iterator)

    t_img = t_img.to(DEVICE)
    t_label = t_label.to(DEVICE)

    # Forward Pass
    classes_output = net(t_img)

    loss = criterion(classes_output, t_label)

    # Get predictions
    _, preds = torch.max(classes_output.data, 1)

    # Update Corrects
    running_corrects += torch.sum(preds == t_label.data).data.item()
    totalLen += len(t_img)

  # Calculate Accuracy
  accuracy = running_corrects / float(totalLen)

  print(f'Accuracy on  {datasetName}' f' during epoch {epoch}' f' is {accuracy}' f' loss is {loss}')

## Train 

In [0]:
#photos_dataloader = DataLoader(photos, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)
#art_painting_dataloader = DataLoader(art_painting, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

max_batches = max(len(photos_dataloader), len(art_painting_dataloader))
min_batches = min(len(photos_dataloader), len(art_painting_dataloader))

In [29]:
# By default, everything is loaded to cpu
net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda

cudnn.benchmark # Calling this optimizes runtime
running_corrects = 0
current_step = 0
# Start iterating over the epochs
# Iterate over the dataset
for epoch in range(NUM_EPOCHS):
  scheduler.step() 
  iterPh = iter(photos_dataloader)
  iterAP = iter(art_painting_dataloader)
  for batch in range(max_batches):
    net.train() # Sets module in training mode
    optimizer.zero_grad() # Zero-ing the gradients
    if( batch == min_batches):
      iterPh = iter(photos_dataloader)
    images_source, labels_source = next(iterPh)
    labels_domain = torch.zeros(len(images_source), dtype=torch.long)
    
    # Bring data over the device of choice
    images_source = images_source.to(DEVICE)
    labels_source = labels_source.to(DEVICE)
    labels_domain = labels_domain.to(DEVICE)

  
    # Get the output for classes and domains; class_pred, domain_pred
    classes_output = net(images_source)
    # Compute the loss on the source domain
    loss_s_label = criterion_class(classes_output, labels_source)

    domain_output = net(images_source, alfa)
    # Compute the loss on the source domain
    loss_s_domain = criterion_domain(domain_output, labels_domain)

    # Get the output for targets
    targets, _ = next(iterAP)
    target_domain = torch.ones(len(targets), dtype=torch.long)

    # Bring data over the device of choice
    targets = targets.to(DEVICE)
    target_domain = target_domain.to(DEVICE)

    target_output = net(targets, alfa)

    # Compute the loss on the source domain
    loss_t_domain = criterion_domain(target_output,target_domain)

    loss = loss_s_label + loss_s_domain + loss_t_domain
    loss.backward()  # backward pass: computes gradients

    optimizer.step() # update weights based on accumulated gradients

    current_step += 1

    print(f'[{batch+1}/{max_batches}] '
          f'class_loss: {loss_s_label.item():.4f} ' f's_domain_loss: {loss_s_domain.item():.4f} '
          f't_domain_loss: {loss_t_domain.item():.4f} '
          )  
  torch.save(net, 'model_epoch_{0}.pt'.format(epoch))
  testFunction('photo', epoch)
  testFunction('artPainting', epoch)
  



[1/16] class_loss: 1.9096 s_domain_loss: 0.5665 t_domain_loss: 0.8743 
[2/16] class_loss: 1.3034 s_domain_loss: 0.6973 t_domain_loss: 0.6983 
[3/16] class_loss: 1.0588 s_domain_loss: 0.7857 t_domain_loss: 0.6153 
[4/16] class_loss: 0.7645 s_domain_loss: 0.7648 t_domain_loss: 0.5997 
[5/16] class_loss: 0.6219 s_domain_loss: 0.7296 t_domain_loss: 0.6371 
[6/16] class_loss: 0.4314 s_domain_loss: 0.6431 t_domain_loss: 0.6884 
[7/16] class_loss: 0.2951 s_domain_loss: 0.6099 t_domain_loss: 0.7337 
[8/16] class_loss: 0.3522 s_domain_loss: 0.6059 t_domain_loss: 0.7399 
[9/16] class_loss: 0.3453 s_domain_loss: 0.5721 t_domain_loss: 0.7321 
[10/16] class_loss: 0.3839 s_domain_loss: 0.5945 t_domain_loss: 0.7043 
[11/16] class_loss: 0.2979 s_domain_loss: 0.6344 t_domain_loss: 0.6490 
[12/16] class_loss: 0.2380 s_domain_loss: 0.6542 t_domain_loss: 0.6149 
[13/16] class_loss: 0.2660 s_domain_loss: 0.6797 t_domain_loss: 0.6032 
[14/16] class_loss: 0.1433 s_domain_loss: 0.7670 t_domain_loss: 0.5964 
[

In [30]:
LR

0.0001

In [31]:
alfa

1