<a href="https://colab.research.google.com/github/luciainnocenti/Homework3-PACS/blob/master/MLDL_Homework3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#**Import libraries**

In [0]:
import os
import logging

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Subset, DataLoader
from torch.backends import cudnn

import torchvision
from torchvision import transforms

from PIL import Image
from tqdm import tqdm


from numpy import random 

random.seed(33)

#**Set Arguments**

In [0]:
DEVICE = 'cuda' # 'cuda' or 'cpu'

BATCH_SIZE = 128     # Higher batch sizes allows for larger learning rates. An empirical heuristic suggests that, when changing
                     # the batch size, learning rate should change by the same factor to have comparable results

MOMENTUM = 0.9       # Hyperparameter for SGD, keep this at 0.9 when using SGD
WEIGHT_DECAY = 5e-5  # Regularization, you can keep this at the default

NUM_EPOCHS = 20      # Total number of training epochs (iterations over dataset)
STEP_SIZE = 20       # How many epochs before decreasing learning rate (if using a step-down policy)
GAMMA = 0.1          # Multiplicative factor for learning rate step-down

LOG_FREQUENCY = 10

alfa = 0.01
LR = 1e-4          # The initial Learning Rate

#**Define Data Preprocessing**

In [0]:
# Define transforms for training phase
train_transform = transforms.Compose([transforms.Resize(256),      # Resizes short size of the PIL image to 256
                                      transforms.CenterCrop(224),  # Crops a central square patch of the image
                                                                   # 224 because torchvision's AlexNet needs a 224x224 input!
                                                                   # Remember this when applying different transformations, otherwise you get an error
                                      #transforms.RandomCrop( 64 , padding =2) ,
                                      transforms.ToTensor(), # Turn PIL Image to torch.Tensor
                                      transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) # Normalizes tensor with mean and standard deviation
])
# Define transforms for the test phase
test_transform = transforms.Compose([transforms.Resize(256),
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor(),
                                      transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))                                    
])

#**Prepare Dataset**

In [17]:
# Clone github repository with data
!git clone https://github.com/luciainnocenti/Homework3-PACS.git
!mv 'Homework3-PACS' 'HW_PACS'

from HW_PACS.dataset import PACS_Dataset 

rootPhoto = "HW_PACS/PACS/photo"
photos = PACS_Dataset(root = rootPhoto, transform = train_transform)

rootArt_painting = "HW_PACS/PACS/art_painting"
art_painting = PACS_Dataset(root = rootArt_painting, transform = test_transform)

# Check dataset sizes
print('Train Dataset: {}'.format(len(photos)))
print('Test Dataset: {}'.format(len(art_painting)))

Cloning into 'Homework3-PACS'...
remote: Enumerating objects: 10102, done.[K
remote: Total 10102 (delta 0), reused 0 (delta 0), pack-reused 10102[K
Receiving objects: 100% (10102/10102), 174.18 MiB | 29.97 MiB/s, done.
Resolving deltas: 100% (43/43), done.
Checking out files: 100% (9995/9995), done.
Train Dataset: 1670
Test Dataset: 2048


#**Prepare Dataloaders**

In [0]:
# Dataloaders iterate over pytorch datasets and transparently provide useful functions (e.g. parallelization and shuffling)
photos_dataloader = DataLoader(photos, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)
art_painting_dataloader = DataLoader(art_painting, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

#**Model without DANN**

##**Prepare Network**

In [0]:
from HW_PACS.gradient_reversal_example import alexNetDA 

net = alexNetDA(num_classes = 7)


##**Prepare Training**

In [0]:
# Define loss function
criterion = nn.CrossEntropyLoss() 

# Choose parameters to optimize
# To access a different set of parameters, you have to access submodules of AlexNet
# (nn.Module objects, like AlexNet, implement the Composite Pattern)
# e.g.: parameters of the fully connected layers: net.classifier.parameters()
# e.g.: parameters of the convolutional layers: look at alexnet's source code ;) 
parameters_to_optimize = net.parameters() # In this case we optimize over all the parameters of AlexNet

# Define optimizer
# An optimizer updates the weights based on loss
# We use SGD with momentum

optimizer = optim.SGD(parameters_to_optimize, lr=LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)

#optimizer = optim.Adam(parameters_to_optimize, LR)

# Define scheduler
# A scheduler dynamically changes learning rate
# The most common schedule is the step(-down), which multiplies learning rate by gamma every STEP_SIZE epochs
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=STEP_SIZE, gamma=GAMMA)

In [21]:
# By default, everything is loaded to cpu
net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda

cudnn.benchmark # Calling this optimizes runtime
running_corrects = 0
current_step = 0
# Start iterating over the epochs
# Iterate over the dataset
for epoch in range(NUM_EPOCHS):
  scheduler.step() 
  
  for images, labels in  tqdm(photos_dataloader):

    # Bring data over the device of choice
    images = images.to(DEVICE)
    labels = labels.to(DEVICE)

    net.train() # Sets module in training mode

    # PyTorch, by default, accumulates gradients after each backward pass
    # We need to manually set the gradients to zero before starting a new iteration
    optimizer.zero_grad() # Zero-ing the gradients

    # Forward pass to the network
    outputs = net(images)

    # Compute loss based on output and ground truth
    loss = criterion(outputs, labels)

    # Log loss
    if current_step % LOG_FREQUENCY == 0:
      print('Step {}, Loss {}'.format(current_step, loss.item()))

    # Compute gradients for each layer and update weights

    loss.backward()  # backward pass: computes gradients

    optimizer.step() # update weights based on accumulated gradients

    current_step += 1
    # Get predictions
    _, preds = torch.max(outputs.data, 1)

    # Update Corrects
    running_corrects += torch.sum(preds == labels.data).data.item()

  # Calculate Accuracy
  accuracy = running_corrects / float(len(photos))
  print("Accuracy on training set = "  + str(accuracy))
  running_corrects = 0
    

 14%|█▍        | 2/14 [00:02<00:17,  1.44s/it]

Step 0, Loss 2.1148531436920166


 79%|███████▊  | 11/14 [00:05<00:01,  2.63it/s]

Step 10, Loss 1.4435231685638428


100%|██████████| 14/14 [00:06<00:00,  2.30it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

Accuracy on training set = 0.36946107784431137


 57%|█████▋    | 8/14 [00:04<00:02,  2.49it/s]

Step 20, Loss 0.9774367213249207


100%|██████████| 14/14 [00:06<00:00,  2.31it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

Accuracy on training set = 0.6970059880239521


 29%|██▊       | 4/14 [00:02<00:07,  1.32it/s]

Step 30, Loss 0.7604193091392517


100%|██████████| 14/14 [00:06<00:00,  2.31it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

Step 40, Loss 0.4694751799106598
Accuracy on training set = 0.8311377245508982


 71%|███████▏  | 10/14 [00:05<00:01,  2.04it/s]

Step 50, Loss 0.38208815455436707


100%|██████████| 14/14 [00:06<00:00,  2.31it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

Accuracy on training set = 0.8784431137724551


 43%|████▎     | 6/14 [00:03<00:05,  1.49it/s]

Step 60, Loss 0.40540677309036255


100%|██████████| 14/14 [00:06<00:00,  2.32it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

Accuracy on training set = 0.9005988023952096


 14%|█▍        | 2/14 [00:01<00:15,  1.33s/it]

Step 70, Loss 0.3361206352710724


 79%|███████▊  | 11/14 [00:05<00:01,  2.53it/s]

Step 80, Loss 0.2659076154232025


100%|██████████| 14/14 [00:06<00:00,  2.30it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

Accuracy on training set = 0.9149700598802395


 57%|█████▋    | 8/14 [00:04<00:02,  2.29it/s]

Step 90, Loss 0.2513381242752075


100%|██████████| 14/14 [00:06<00:00,  2.29it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

Accuracy on training set = 0.9173652694610779


 29%|██▊       | 4/14 [00:02<00:07,  1.33it/s]

Step 100, Loss 0.21343590319156647


100%|██████████| 14/14 [00:06<00:00,  2.29it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

Step 110, Loss 0.2759970724582672
Accuracy on training set = 0.9275449101796407


 71%|███████▏  | 10/14 [00:05<00:01,  2.06it/s]

Step 120, Loss 0.19524644315242767


100%|██████████| 14/14 [00:06<00:00,  2.31it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

Accuracy on training set = 0.9413173652694611


 36%|███▌      | 5/14 [00:03<00:08,  1.09it/s]

Step 130, Loss 0.1941853165626526


100%|██████████| 14/14 [00:06<00:00,  2.29it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

Accuracy on training set = 0.9437125748502994


 14%|█▍        | 2/14 [00:02<00:17,  1.45s/it]

Step 140, Loss 0.21364907920360565


 79%|███████▊  | 11/14 [00:05<00:01,  2.07it/s]

Step 150, Loss 0.22004646062850952


100%|██████████| 14/14 [00:06<00:00,  2.29it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

Accuracy on training set = 0.937125748502994


 57%|█████▋    | 8/14 [00:04<00:02,  2.44it/s]

Step 160, Loss 0.1822589933872223


100%|██████████| 14/14 [00:06<00:00,  2.31it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

Accuracy on training set = 0.948502994011976


 29%|██▊       | 4/14 [00:02<00:07,  1.30it/s]

Step 170, Loss 0.19476532936096191


100%|██████████| 14/14 [00:06<00:00,  2.29it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

Step 180, Loss 0.1456691175699234
Accuracy on training set = 0.9508982035928144


 64%|██████▍   | 9/14 [00:05<00:03,  1.55it/s]

Step 190, Loss 0.11495558172464371


100%|██████████| 14/14 [00:06<00:00,  2.29it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

Accuracy on training set = 0.9556886227544911


 43%|████▎     | 6/14 [00:03<00:05,  1.45it/s]

Step 200, Loss 0.1887865513563156


100%|██████████| 14/14 [00:06<00:00,  2.31it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

Accuracy on training set = 0.9532934131736527


 14%|█▍        | 2/14 [00:02<00:17,  1.43s/it]

Step 210, Loss 0.13302458822727203


 86%|████████▌ | 12/14 [00:05<00:00,  2.75it/s]

Step 220, Loss 0.08059795200824738


100%|██████████| 14/14 [00:06<00:00,  2.29it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

Accuracy on training set = 0.9526946107784431


 57%|█████▋    | 8/14 [00:04<00:02,  2.34it/s]

Step 230, Loss 0.19394399225711823


100%|██████████| 14/14 [00:06<00:00,  2.30it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

Accuracy on training set = 0.9574850299401197


 29%|██▊       | 4/14 [00:02<00:08,  1.21it/s]

Step 240, Loss 0.09139358997344971


100%|██████████| 14/14 [00:06<00:00,  2.30it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

Step 250, Loss 0.16974523663520813
Accuracy on training set = 0.9580838323353293


 64%|██████▍   | 9/14 [00:05<00:03,  1.57it/s]

Step 260, Loss 0.12675343453884125


100%|██████████| 14/14 [00:06<00:00,  2.31it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

Accuracy on training set = 0.9580838323353293


 36%|███▌      | 5/14 [00:03<00:08,  1.10it/s]

Step 270, Loss 0.10453487187623978


100%|██████████| 14/14 [00:06<00:00,  2.30it/s]

Accuracy on training set = 0.9640718562874252





##**Test**

In [22]:
net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda
net.train(False) # Set Network to evaluation mode

running_corrects = 0
for images, labels in tqdm(art_painting_dataloader):
  images = images.to(DEVICE)
  labels = labels.to(DEVICE)

  # Forward Pass
  outputs = net(images)

  loss = criterion(outputs, labels)

  # Get predictions
  _, preds = torch.max(outputs.data, 1)

  # Update Corrects
  running_corrects += torch.sum(preds == labels.data).data.item()

# Calculate Accuracy
accuracy = running_corrects / float(len(art_painting))

print('Test Accuracy: {}'.format(accuracy))

100%|██████████| 16/16 [00:07<00:00,  2.24it/s]

Test Accuracy: 0.453125





In [23]:
loss.item()

5.220218181610107

# Model with DANN

## Network

In [0]:
net = alexNetDA(num_classes = 7)
net = net.to(DEVICE)

## Loss, Optim and Scheduler

In [0]:
criterion_class = nn.CrossEntropyLoss() 
criterion_domain = nn.CrossEntropyLoss()

parameters_to_optimize = net.parameters() 

optimizer = optim.SGD(parameters_to_optimize, lr=LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)

scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=STEP_SIZE, gamma=GAMMA)

## Test function

In [0]:
def testFunction(datasetName, epoch):  
  net = torch.load('model_epoch_' + str(epoch) + '.pt')
  net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda
  net.train(False) # Set Network to evaluation mode
  if (datasetName == 'photo'):
    dataLoader = DataLoader(photos, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)
  elif( datasetName == 'artPainting'):
    dataLoader = DataLoader(art_painting, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

  lenLoader = len(dataLoader)
  iterator = iter(dataLoader)
  totalLen = 0
  running_corrects = 0
  criterion = nn.CrossEntropyLoss() 
  for i in range(lenLoader):
    t_img, t_label = next(iterator)

    t_img = t_img.to(DEVICE)
    t_label = t_label.to(DEVICE)

    # Forward Pass
    classes_output = net(t_img)

    loss = criterion(classes_output, t_label)

    # Get predictions
    _, preds = torch.max(classes_output.data, 1)

    # Update Corrects
    running_corrects += torch.sum(preds == t_label.data).data.item()
    totalLen += len(t_img)

  # Calculate Accuracy
  accuracy = running_corrects / float(totalLen)

  print(f'Accuracy on  {datasetName}' f' during epoch {epoch}' f' is {accuracy}' f' loss is {loss}')

## Train 

In [0]:
#photos_dataloader = DataLoader(photos, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)
#art_painting_dataloader = DataLoader(art_painting, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

max_batches = max(len(photos_dataloader), len(art_painting_dataloader))
min_batches = min(len(photos_dataloader), len(art_painting_dataloader))

In [28]:
# By default, everything is loaded to cpu
net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda

cudnn.benchmark # Calling this optimizes runtime
running_corrects = 0
current_step = 0
# Start iterating over the epochs
# Iterate over the dataset
for epoch in range(NUM_EPOCHS):
  scheduler.step() 
  iterPh = iter(photos_dataloader)
  iterAP = iter(art_painting_dataloader)
  for batch in range(max_batches):
    net.train() # Sets module in training mode
    optimizer.zero_grad() # Zero-ing the gradients
    if( batch == min_batches):
      iterPh = iter(photos_dataloader)
    images_source, labels_source = next(iterPh)
    labels_domain = torch.zeros(len(images_source), dtype=torch.long)
    
    # Bring data over the device of choice
    images_source = images_source.to(DEVICE)
    labels_source = labels_source.to(DEVICE)
    labels_domain = labels_domain.to(DEVICE)

  
    # Get the output for classes and domains; class_pred, domain_pred
    classes_output = net(images_source)
    # Compute the loss on the source domain
    loss_s_label = criterion_class(classes_output, labels_source)

    domain_output = net(images_source, alfa)
    # Compute the loss on the source domain
    loss_s_domain = criterion_domain(domain_output, labels_domain)

    # Get the output for targets
    targets, _ = next(iterAP)
    target_domain = torch.ones(len(targets), dtype=torch.long)

    # Bring data over the device of choice
    targets = targets.to(DEVICE)
    target_domain = target_domain.to(DEVICE)

    target_output = net(targets, alfa)

    # Compute the loss on the source domain
    loss_t_domain = criterion_domain(target_output,target_domain)

    loss = loss_s_label + loss_s_domain + loss_t_domain
    loss.backward()  # backward pass: computes gradients

    optimizer.step() # update weights based on accumulated gradients

    current_step += 1

    print(f'[{batch+1}/{max_batches}] '
          f'class_loss: {loss_s_label.item():.4f} ' f's_domain_loss: {loss_s_domain.item():.4f} '
          f't_domain_loss: {loss_t_domain.item():.4f} '
          )  
  torch.save(net, 'model_epoch_{0}.pt'.format(epoch))
  testFunction('photo', epoch)
  testFunction('artPainting', epoch)
  



[1/16] class_loss: 1.8094 s_domain_loss: 0.5647 t_domain_loss: 0.8890 
[2/16] class_loss: 1.9195 s_domain_loss: 0.5704 t_domain_loss: 0.8728 
[3/16] class_loss: 1.9845 s_domain_loss: 0.5858 t_domain_loss: 0.8663 
[4/16] class_loss: 1.7855 s_domain_loss: 0.6130 t_domain_loss: 0.8223 
[5/16] class_loss: 1.7782 s_domain_loss: 0.6305 t_domain_loss: 0.8063 
[6/16] class_loss: 1.6556 s_domain_loss: 0.6680 t_domain_loss: 0.7700 
[7/16] class_loss: 1.7188 s_domain_loss: 0.6935 t_domain_loss: 0.7398 
[8/16] class_loss: 1.3688 s_domain_loss: 0.7160 t_domain_loss: 0.7185 
[9/16] class_loss: 1.4695 s_domain_loss: 0.7437 t_domain_loss: 0.6835 
[10/16] class_loss: 1.3398 s_domain_loss: 0.7589 t_domain_loss: 0.6617 
[11/16] class_loss: 1.2121 s_domain_loss: 0.7829 t_domain_loss: 0.6489 
[12/16] class_loss: 1.2303 s_domain_loss: 0.7834 t_domain_loss: 0.6538 
[13/16] class_loss: 1.2330 s_domain_loss: 0.7910 t_domain_loss: 0.6392 
[14/16] class_loss: 0.9629 s_domain_loss: 0.7986 t_domain_loss: 0.6404 
[