# Setup

## Imports 

In [0]:
from google.colab import drive
drive.mount('/content/drive')

In [0]:
from PIL import Image
from sklearn import utils
from sklearn.model_selection import ParameterGrid
from sklearn.model_selection import train_test_split
from torch.autograd import Function
from torch.backends import cudnn
from torch.utils.data import DataLoader
from torch.utils.data import Subset, DataLoader
from torch.utils.model_zoo import load_url as load_state_dict_from_url
from torchvision import models
from torchvision import transforms
from torchvision.datasets import ImageFolder
from torchvision.datasets import VisionDataset
from torchvision.models import alexnet
from torchvision.transforms.functional import pad
from tqdm import tqdm
import logging
import matplotlib.pyplot as plt
import numbers
import numpy as np
import os
import os.path
import shutil
import sys
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import zipfile


## Download datasets in colab
Always working for everyone

In [0]:
# Download ROD dataset
rod_destination_path = "/content/ROD"
if not os.path.isdir(rod_destination_path):
  # ROD 
  # https://drive.google.com/open?id=1p1GORdB44NjtNWJ4d1xqttseM1X9lWNF
  # https://drive.google.com/open?id=168neCvaHwMffFOqjOkth-wVaP4tRFuSW
  !curl -c ./cookie -s -L "https://drive.google.com/uc?export=download&id=1p1GORdB44NjtNWJ4d1xqttseM1X9lWNF" > /dev/null
  !curl -Lb ./cookie "https://drive.google.com/uc?export=download&confirm=`awk '/download/ {print $NF}' ./cookie`&id=1p1GORdB44NjtNWJ4d1xqttseM1X9lWNF" -o "ROD.zip"

In [0]:
# Download synROD dataset
synrod_destination_path = "/content/synROD"
if not os.path.isdir(synrod_destination_path):
  # synROD 
  # https://drive.google.com/open?id=1rry4GViJLmmMpbm0B2s7MyQs5Dx8pFS3
  # https://drive.google.com/open?id=1V1fthSNAvsPRF6hLt_kf_xonw7lxAV03
  !curl -c ./cookie -s -L "https://drive.google.com/uc?export=download&id=1rry4GViJLmmMpbm0B2s7MyQs5Dx8pFS3" > /dev/null
  !curl -Lb ./cookie "https://drive.google.com/uc?export=download&confirm=`awk '/download/ {print $NF}' ./cookie`&id=1rry4GViJLmmMpbm0B2s7MyQs5Dx8pFS3" -o "synROD.zip"

In [0]:
# Extract ROD dataset
rod_destination_path = "/content/ROD"
if not os.path.isdir(rod_destination_path):
  with zipfile.ZipFile("/content/ROD.zip", 'r') as zip_ref:
      zip_ref.extractall(rod_destination_path)
!rm ROD.zip

# Extract synROD dataset
synrod_destination_path = "/content/synROD"
if not os.path.isdir(synrod_destination_path):
  with zipfile.ZipFile("/content/synROD.zip", 'r') as zip_ref:
      zip_ref.extractall(synrod_destination_path)
!rm synROD.zip
!rm cookie

rod_path = "/content/ROD/ROD"
synrod_path = "/content/synROD/synROD"


if os.path.isdir(os.path.join(synrod_path,  "bell_papper")):
  # line below needed only for the first extraction of the synrod dataset, in which bell_pepper is wrongly named bell_papper
  os.rename(os.path.join(synrod_path,  "bell_papper"), os.path.join(synrod_path,  "bell_pepper") )
  print("Bell pepper fixed")

## Utility functions

In [0]:
class HLoss(nn.Module):
    def __init__(self):
        super(HLoss, self).__init__()

    def forward(self, x):
        b = F.softmax(x, dim=1) * F.log_softmax(x, dim=1)
        b = -1.0 * b.sum()
        return b

# DATALOADER function
def collate(batch):
  return batch

#default init weights
def init_weights(m):
    if type(m) == nn.Conv2d or type(m) == nn.Linear :
        nn.init.xavier_uniform_(m.weight)
        nn.init.zeros_(m.bias)

# helper function used to setup batches returned by the dataloaders in the way that is mentioned in the paper
def format_batch(batch, pretext_task="rotation"):
  """"
  set pretext_task == rotation to require the pretext task labels to be of type "long"
  set pretext_task == zoom to require the pretext task labels to be of type "float"
  """
  data = {"rgb":[], "depth":[], "label":[] }
  data_hat = {"rgb":[], "depth":[], "label":[] }
  for tuple_, tuple_hat in batch:
    rgb_img, depth_img, label = tuple_
    rot_rgb_img, rot_depth_img, rot_label = tuple_hat

    data["rgb"].append(rgb_img[None,:])
    data["depth"].append(depth_img[None,:])
    data["label"].append(label)

    data_hat["rgb"].append(rot_rgb_img[None,:])
    data_hat["depth"].append(rot_depth_img[None,:])
    data_hat["label"].append(rot_label)
  
  data["rgb"] = torch.cat(data["rgb"] , dim=0) 
  data["depth"] = torch.cat(data["depth"] , dim=0)
  data["label"] = torch.LongTensor(data["label"])
  
  data_hat["rgb"] = torch.cat(data_hat["rgb"] , dim=0) 
  data_hat["depth"] = torch.cat(data_hat["depth"] , dim=0)
  if pretext_task == "rotation":
    data_hat["label"] = torch.LongTensor(data_hat["label"] )
  else:
    data_hat["label"] = torch.FloatTensor(data_hat["label"] )
  
  return data, data_hat


#side by side loss and accuracy plot
def make_plot(train_loss, train_acc, test_loss, test_acc):
  f = plt.figure(figsize=(10,3))
  ax1 = f.add_subplot(121)
  ax2 = f.add_subplot(122)

  # plot all points registered during training
  ax1.plot(range(0,len(train_loss)), train_loss, label="train")
  ax1.plot(range(0, len(test_loss)), test_loss, label="test")

  # or average them for each epoch and plot per epoch
  #ax1.plot(range(0, num_epochs), train_loss, label="train")
  #ax1.plot(range(0, num_epochs), test_loss, label="test")
  ax1.set_title("loss")
  ax1.grid()
  #uncomment this to decide scale of the loss plot
  #ax1.set_ylim(0, 5)
  ax1.legend()
  ax2.plot(range(0, len(train_acc)),train_acc , label="train")
  ax2.plot(range(0, len(test_acc)), test_acc, label="test")

  #ax2.plot(range(0, num_epochs),train_acc , label="train")
  #ax2.plot(range(0, num_epochs), test_acc, label="test")
  ax2.set_title("accuracy")
  #uncomment this to decide scale of the accuracy plot
  #ax2.set_ylim(0,1.05)
  ax2.grid()
  ax2.legend()

In [0]:
import matplotlib.pyplot as plt

def learning_curves(training_accuracies, training_losses, validation_accuracies, validation_losses, plot_title, plot_size=(16,6)):
  """
  Plots accuracies and losses per epochs.
  """
  fig, ax = plt.subplots(nrows=1, ncols=2, figsize=plot_size)
  ax[0].plot(range(1,len(training_accuracies)+1), training_accuracies, label="Training")
  ax[0].plot(range(1,len(validation_accuracies)+1), validation_accuracies, label="Validation")
  ax[0].legend()
  ax[0].set_title("Accuracy")
  ax[0].set_xlabel("Epochs")

  ax[1].plot(range(1,len(training_losses)+1), training_losses, label="Training")
  ax[1].plot(range(1,len(validation_losses)+1), validation_losses, label="Validation")
  ax[1].legend()
  ax[1].set_title("Loss")
  ax[1].set_xlabel("Epochs")

  fig.suptitle(plot_title)
  plt.show()

## Copy in current folder datasets and net classes

In [0]:
!cp -r "/content/drive/My Drive/DL_project/dataset/." "/content/"
!cp -r "/content/drive/My Drive/DL_project/net/." "/content/"
!cp -r "/content/drive/My Drive/DL_project/transform_config/." "/content/"

## Import datasets, net and configurator classes

In [0]:
from synrod import SynROD
from rod import ROD

from dnet import DNet

from tconfig import TransformConfig

# Train test function definitions

## Train test RGB/ DEPTH ONLY function definition

In [0]:
def train_net(synrod, rod, hyperparams, type="rgb", light_validation=False):
  """Train the resnet18 in rgb or depth mode.  

  Args:
    synrod: train dataset
    rod: test dataset
    type: "rgb" or "depth"
    hyperparams: parameters dict with the keys
      {
        lr
        batch_size
        weight_decay
        step_size
        epochs
        momentum (optional, default 0.9)
        gamma (optional, default 0.1)
      }
    light_validation: if True the validation is done only in the last epoch.

  Return: 
    (trained_model, train_loss, train_acc, test_loss, test_acc).
  """
  lr = hyperparams["lr"]
  batch_size = hyperparams["batch_size"]
  weight_decay = hyperparams["weight_decay"]
  step_size = hyperparams["step_size"]
  epochs = hyperparams["epochs"]
  curr_momentum = hyperparams.get("momentum", 0.9)
  curr_gamma = hyperparams.get("gamma", 0.1) 
 
  DEVICE = "cuda"
  cudnn.benchmark

  # dataloader definition with given batch size
  source = DataLoader(synrod,  batch_size=batch_size, shuffle=True, drop_last=True, num_workers=4, collate_fn=collate)
  target = DataLoader(rod,  batch_size=batch_size, shuffle=True, drop_last=True, num_workers=4, collate_fn=collate)
  
  # NET DEFINITION
  model = models.resnet18(pretrained=True)
  outLayer = torch.nn.Linear(512, 51) 
  nn.init.xavier_uniform_(outLayer.weight)
  nn.init.zeros_(m.bias)
  model.fc = outLayer
  model = model.to(DEVICE)

  criterion = nn.CrossEntropyLoss() 

  optimizer = optim.SGD(model.parameters(), lr=lr, momentum=curr_momentum, weight_decay=weight_decay)
  scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=curr_gamma)

  train_loss = []
  train_acc = []
  test_loss = []
  test_acc = []

  train_running_corrects = 0
  test_running_corrects = 0

  # TRAIN NET
  for i in range(epochs):
    n_iters = 0
    train_acc_val = 0
    train_loss_val = 0
    test_acc_val = 0
    test_loss_val = 0
    
    for source_batch in source:
      # Train mode and reset accumulated gradients
      model.train()
      optimizer.zero_grad()
      
      S, _ = format_batch(source_batch)

      # Prepare data
      source_main_labels = S["label"].to(DEVICE)
      
      #TRAINING SECTION
      if type == "rgb":
        source_rgb_images = S["rgb"].to(DEVICE)
        outputs = model(source_rgb_images)
      elif type == "depth":
        source_depth_images = S["depth"].to(DEVICE)
        outputs = model(source_depth_images)
      else:
        return None
      loss = criterion(outputs, source_main_labels)
      _, preds = torch.max(outputs.data, 1)
      train_running_corrects = torch.sum(preds == source_main_labels.data).data.item()
      loss.backward() 
      optimizer.step()
      
      # cumulatives of current epoch
      train_acc_val += train_running_corrects/len(source_main_labels)
      train_loss_val += loss.item()

      n_iters += 1

    train_acc.append(train_acc_val/n_iters)
    train_loss.append(train_loss_val/n_iters)

    print("EPOCH ", i + 1)
    print("train accuracy: ", train_acc_val/n_iters)
    print("train loss: ", train_loss_val/n_iters)



    #VALIDATION SECTION
    if not light_validation or i == epochs - 1:
      model.eval()
      n_iters = 0
      for target_batch in target:
        
        T, _ = format_batch(target_batch)

        target_main_labels = T["label"].to(DEVICE)

        if type == "rgb":
          target_rgb_images = T["rgb"].to(DEVICE)
          outputs = model(target_rgb_images)
        elif type == "depth":
          target_depth_images = T["depth"].to(DEVICE)
          outputs = model(target_depth_images)

        loss = criterion(outputs, target_main_labels)
        _, preds = torch.max(outputs.data, 1)
        test_running_corrects = torch.sum(preds == target_main_labels.data).data.item()

        test_acc_val += test_running_corrects/len(target_main_labels)
        test_loss_val += loss.item()

        n_iters += 1

      test_acc.append(test_acc_val/n_iters)
      test_loss.append(test_loss_val/n_iters)
        
      print("test accuracy: ", test_acc_val/n_iters)
      print("test loss: ", test_loss_val/n_iters)
    
    print()
    scheduler.step()

  return model, train_loss, train_acc, test_loss, test_acc

## Train test RGB-D FROZEN function definition

In [0]:
def train_rgbd_net(synrod, rod, hyperparams, trained_RGB_net=None, trained_DEPTH_net=None, light_validation=False):
  """Merges together two features extractors (resnet18) ptretrained for rgb and depth modalities.
  Features extractors are frozen and is added a xavier initialized fully connected layer on top of them.

  Args:
    synrod: train dataset
    rod: test dataset
    hyperparams: parameters dict with the keys
      {
        lr
        batch_size
        weight_decay 
        step_size
        epochs 
        momentum (optional, default 0.9)
        gamma (optional, default 0.1)
      }
    trained_RGB_net: Pretrained resnet18 on RGB only images. Should be specified
    trained_DEPTH_net: Pretrained resnet18 on depth only images. Should be specified
    light_validation: if True the validation is done only in the last epoch.

  Return: 
    (trained_model, train_loss, train_acc, test_loss, test_acc).
  """
  lr = hyperparams["lr"]
  batch_size = hyperparams["batch_size"]
  weight_decay = hyperparams["weight_decay"]
  step_size = hyperparams["step_size"]
  epochs = hyperparams["epochs"]
  curr_momentum = hyperparams.get("momentum", 0.9)
  curr_gamma = hyperparams.get("gamma", 0.1) 
  
  DEVICE = "cuda"
  cudnn.benchmark

  # dataloader definition with given batch size
  source = DataLoader(synrod,  batch_size=batch_size, shuffle=True, drop_last=True, num_workers=4, collate_fn=collate)
  target = DataLoader(rod,  batch_size=batch_size, shuffle=True, drop_last=True, num_workers=4, collate_fn=collate)
  
  # Feature extractors
  if trained_RGB_net is None:
    print("##### Start training RGB feature extractor #####")
    trained_RGB_net, _, _, _, _ = train_net(synrod, rod, hyperparams, type="rgb", light_validation=True)
    print("##### End training RGB feature extractor #####")
    print()
    print()

  if trained_DEPTH_net is None:
    print("##### Start training DEPTH feature extractor #####")
    trained_DEPTH_net, _, _, _, _ = train_net(synrod, rod, hyperparams, type="depth", light_validation=True)
    print("##### End training DEPTH feature extractor #####")
    print()
    print()
 
  net = DNet(num_classes=51, resnet1=trained_RGB_net, resnet2=trained_RGB_net)
  net.Pbranch = None
  
  net.Mbranch = nn.Sequential(
              nn.AdaptiveAvgPool2d((1,1)),
              nn.Flatten(),
              nn.Linear(1024, 51)
          )
  net = net.to(DEVICE)

  # to avoid that the backward pass wastes time calculating gradients for parameters that will
  # not be update it is necessary to set the flag to false
  for param in trained_RGB_net.parameters():
    param.requires_grad = False
  for param in trained_DEPTH_net.parameters():
    param.requires_grad = False

  # final fully connected branch that is trained after the two feature extractors are trained and frozen. 
  # The full net consists of model_RGB + model_DEPTH, both frozen while fusionBranch is trained 

  criterion = nn.CrossEntropyLoss() 

  optimizer = optim.SGD(net.Mbranch.parameters(), lr=lr, momentum=curr_momentum, weight_decay=weight_decay)
  scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=curr_gamma)

  train_loss = []
  train_acc = []
  test_loss = []
  test_acc = []

  running_corrects = 0
  
  # TRAIN FUSION NETWORK
  for i in range(epochs):
    n_iters = 0
    train_acc_val = 0
    train_loss_val = 0
    test_acc_val = 0
    test_loss_val = 0
    
    net.train()
    for source_batch in source:
      S, _ = format_batch(source_batch, pretext_task="rotation")

      # Prepare data
      source_rgb_images = S["rgb"].to(DEVICE)
      source_depth_images = S["depth"].to(DEVICE)
      source_main_labels = S["label"].to(DEVICE)
      
      # FUSION ONLY TRAINING SECTION
      optimizer.zero_grad()
      outputs = net.forward(source_rgb_images, source_depth_images, mode="main", debug=False)
      loss = criterion(outputs, source_main_labels)
      _, preds = torch.max(outputs.data, 1)
      running_corrects = torch.sum(preds == source_main_labels.data).data.item()
      loss.backward() 
      optimizer.step()
      
      # log results obtained by training the fusion net over the current batch
      train_acc_val += running_corrects/len(source_main_labels)
      train_loss_val += loss.item()

      n_iters += 1

    print("EPOCH ", i + 1)
    print("train FUSION accuracy: ", train_acc_val/n_iters)
    print("train FUSION loss: ", train_loss_val/n_iters)
    train_loss.append(train_loss_val/n_iters)
    train_acc.append(train_acc_val/n_iters)

    #VALIDATION SECTION
    if not light_validation or i == epochs - 1:
      n_iters = 0
      net.eval()
      for target_batch in target:
        T, _ = format_batch(target_batch, pretext_task="rotation")
        target_rgb_images = T["rgb"].to(DEVICE)
        target_depth_images = T["depth"].to(DEVICE)
        target_main_labels = T["label"].to(DEVICE)

        outputs = net.forward(target_rgb_images, target_depth_images, mode="main", debug=False)

        loss_T_FUSION = criterion(outputs, target_main_labels)
        _, preds = torch.max(outputs.data, 1)
        running_corrects = torch.sum(preds == target_main_labels.data).data.item()

        test_acc_val += running_corrects/len(target_main_labels)
        test_loss_val += loss_T_FUSION.item()

        n_iters += 1
      
      test_loss.append(test_loss_val/n_iters)
      test_acc.append(test_acc_val/n_iters)

      print("test target FUSION accuracy: ", test_acc_val/n_iters)
      print("test target FUSION loss: ", test_loss_val/n_iters)

    print()
    scheduler.step()

  return net, train_loss, train_acc, test_loss, test_acc

## Train test RGB-D E2E function definition

In [0]:
def train_rgbde2e_net(synrod, rod, hyperparams, light_validation=False):
  """Merges together two features extractors (resnet18) for rgb and depth modalities.
  A xavier initialized fully connected layer is added on top of them.
  The net is trained in end-to-end fashion.

  Args:
    synrod: train dataset
    rod: test dataset
    hyperparams: parameters dict with the keys
      {
        lr
        batch_size
        weight_decay 
        step_size
        epochs 
        momentum (optional, default 0.9)
        gamma (optional, default 0.1)
      }
    light_validation: if True the validation is done only in the last epoch.

  Return: 
    (trained_model, train_loss, train_acc, test_loss, test_acc).
  """
  lr = hyperparams["lr"]
  batch_size = hyperparams["batch_size"]
  weight_decay = hyperparams["weight_decay"]
  step_size = hyperparams["step_size"]
  epochs = hyperparams["epochs"]
  curr_momentum = hyperparams.get("momentum", 0.9)
  curr_gamma = hyperparams.get("gamma", 0.1) 

  DEVICE = "cuda"
  cudnn.benchmark

  # dataloader definition with given batch size
  source = DataLoader(synrod,  batch_size=batch_size, shuffle=True, drop_last=True, num_workers=4, collate_fn=collate)
  target = DataLoader(rod,  batch_size=batch_size, shuffle=True, drop_last=True, num_workers=4, collate_fn=collate)
  
  # NET DEFINITION
  net = DNet(num_classes=51)
  net.Pbranch = None

  #uncomment this section to train model as descripted in paper (RGB-De2e)
  #net.Mbranch = nn.Sequential(
              #nn.AdaptiveAvgPool2d((1,1)),
              #nn.Flatten(),
              #nn.Linear(1024, 51)
          #)
  #net.Mbranch.apply(init_weights)
  net = net.to(DEVICE)

  criterion = nn.CrossEntropyLoss() 

  #parameters_to_optimize = list(fusionBranch.parameters()) + list(RGB_net.parameters()) + list(DEPTH_net.parameters()) 
  parameters_to_optimize = net.parameters()
  optimizer = optim.SGD(parameters_to_optimize, lr=lr, momentum=curr_momentum, weight_decay=weight_decay)
  scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=curr_gamma)

  train_loss = []
  train_acc = []
  test_loss = []
  test_acc = []

  running_corrects = 0
  
  # TRAIN FUSION NETWORK
  for i in range(epochs):
    n_iters = 0
    train_acc_val = 0
    train_loss_val = 0
    test_acc_val = 0
    test_loss_val = 0
    
    net.train()
    for source_batch in source:
      S, _ = format_batch(source_batch, pretext_task="rotation")

      # Prepare data
      source_rgb_images = S["rgb"].to(DEVICE)
      source_depth_images = S["depth"].to(DEVICE)
      source_main_labels = S["label"].to(DEVICE)
      
      # TRAINING SECTION
      optimizer.zero_grad()
      outputs = net.forward(source_rgb_images, source_depth_images, mode="main", debug=False)
      loss = criterion(outputs, source_main_labels)
      _, preds = torch.max(outputs.data, 1)
      running_corrects = torch.sum(preds == source_main_labels.data).data.item()
      loss.backward() 
      optimizer.step()
      
      # log results obtained by training the fusion net over the current batch
      train_acc_val += running_corrects/len(source_main_labels)
      train_loss_val += loss.item()

      n_iters += 1

    print("EPOCH ", i + 1)
    print("train e2e accuracy: ", train_acc_val/n_iters)
    print("train e2e loss: ", train_loss_val/n_iters)
    train_loss.append(train_loss_val/n_iters)
    train_acc.append(train_acc_val/n_iters)

    #VALIDATION SECTION
    if not light_validation or i == epochs - 1:
      n_iters = 0
      net.eval()
      for target_batch in target:
        T, _ = format_batch(target_batch, pretext_task="rotation")
        target_rgb_images = T["rgb"].to(DEVICE)
        target_depth_images = T["depth"].to(DEVICE)
        target_main_labels = T["label"].to(DEVICE)

        outputs = net.forward(target_rgb_images, target_depth_images, mode="main", debug=False)

        loss_T_FUSION = criterion(outputs, target_main_labels)
        _, preds = torch.max(outputs.data, 1)
        running_corrects = torch.sum(preds == target_main_labels.data).data.item()

        test_acc_val += running_corrects/len(target_main_labels)
        test_loss_val += loss_T_FUSION.item()

        n_iters += 1
      
      test_loss.append(test_loss_val/n_iters)
      test_acc.append(test_acc_val/n_iters)

      print("test target e2e accuracy: ", test_acc_val/n_iters)
      print("test target e2e loss: ", test_loss_val/n_iters)

    print()
    scheduler.step()

  return net, train_loss, train_acc, test_loss, test_acc

## Train test OURS function definition

In [0]:
def train_test_ours(synrod, rod, hyperparams, light_validation=False):
  """Train the architecture called "OURS" in the reference paper with rotation pretext task.
  The net is trained in end-to-end fashion.

  Args:
    synrod: train dataset
    rod: test dataset
    hyperparams: parameters dict with the keys
      {
        lr
        batch_size
        weight_decay 
        step_size
        epochs 
        lambda (!!!)
        momentum (optional, default 0.9)
        gamma (optional, default 0.1)
      }
    light_validation: if True the validation is done only in the last epoch.

  Return: 
    (trained_model, train_loss, train_acc, test_loss, test_acc).
  """
  lr = hyperparams["lr"]
  batch_size = hyperparams["batch_size"]
  weight_decay = hyperparams["weight_decay"]
  step_size = hyperparams["step_size"]
  epochs = hyperparams["epochs"]
  curr_momentum = hyperparams.get("momentum", 0.9)
  curr_gamma = hyperparams.get("gamma", 0.1) 
  lambda_ = hyperparams["lambda"]
  em_weight = 0.1
  
  DEVICE = "cuda"
  cudnn.benchmark

  # dataloader definition with given batch size
  source = DataLoader(synrod,  batch_size=batch_size, shuffle=True, drop_last=True, num_workers=4, collate_fn=collate)
  source_rot = DataLoader(synrod,  batch_size=batch_size, shuffle=True, drop_last=True, num_workers=4, collate_fn=collate)
  target_rot = DataLoader(rod,  batch_size=batch_size, shuffle=True, drop_last=True, num_workers=4, collate_fn=collate)
  target = DataLoader(rod,  batch_size=batch_size, shuffle=True, drop_last=True, num_workers=4, collate_fn=collate)

  # NET DEFINITION
  net = DNet(num_classes=51, dim_pretext=4).to(DEVICE)

  criterion = nn.CrossEntropyLoss() 
  entropy_min_criterion = HLoss()

  parameters_to_optimize = net.parameters() 
  optimizer = optim.SGD(parameters_to_optimize, lr=lr, 
                            momentum=curr_momentum, 
                            weight_decay=weight_decay)
  scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=curr_gamma)

  # lists that accumulate loss/accuracy values over the training period
  train_loss = []
  train_acc = []
  test_loss = []
  test_acc = []
  # lambda parameter used to weight the losses of main and pretext task
 
  running_corrects = 0
  tot_samples = 0
  for i in range(epochs):
    counter_mod = 0
    n_iters = 0
    train_main_acc_val = 0
    train_main_loss_val = 0

    train_rot_source_acc_val = 0
    train_rot_source_loss_val = 0

    train_rot_target_acc_val = 0
    train_rot_target_loss_val = 0
    
    test_main_acc_val = 0
    test_main_loss_val = 0

    for source_batch, target_batch, source_rot_batch, target_rot_batch in zip(source, target, source_rot, target_rot):
      
      net.train()
      S, _ = format_batch(source_batch, pretext_task="rotation")
      T, _ = format_batch(target_batch, pretext_task="rotation")

      _, S_hat = format_batch(source_rot_batch, pretext_task="rotation")
      _, T_hat = format_batch(target_rot_batch, pretext_task="rotation")

      # zero the gradients
      optimizer.zero_grad() 

      # MAIN TASK
      # setup SOURCE DOMAIN STANDARD dataset to feed to the net
      source_rgb_images = S["rgb"].to(DEVICE)
      source_depth_images = S["depth"].to(DEVICE)
      source_main_labels = S["label"].to(DEVICE)
      
      # train on source original images 
      outputs = net.forward(source_rgb_images, source_depth_images, mode="main")
      loss_M = criterion(outputs, source_main_labels)
      
      # compute stats
      _, preds = torch.max(outputs.data, 1)
      running_corrects = torch.sum(preds == source_main_labels.data).data.item()
      tot_samples = len(source_main_labels)
      train_main_loss_val += loss_M.item()
      train_main_acc_val += running_corrects/tot_samples
      
      # entropy minimization
      # setup TARGET DOMAIN STANDARD dataset to feed to the net
      target_rgb_images = T["rgb"].to(DEVICE)
      target_depth_images = T["depth"].to(DEVICE)
      # target labels in this phase can't be used for training
      # train on source original images 
      outputs = net.forward(target_rgb_images, target_depth_images, mode="main")
      loss_entropy_min = entropy_min_criterion(outputs)
          


      # PRETEXT TASK 
      # setup  SOURCE DOMAIN ROTATED dataset to feed to the net
      source_rotated_rgb_images = S_hat["rgb"].to(DEVICE)
      source_rotated_depth_images = S_hat["depth"].to(DEVICE)
      source_rotated_labels = S_hat["label"].to(DEVICE)
      
      # train on source rotated 
      outputs = net.forward(source_rotated_rgb_images, source_rotated_depth_images, mode="pretext")
      loss_P_1 = criterion(outputs, source_rotated_labels) 
      
      # compute stats
      _, preds = torch.max(outputs.data, 1)
      running_corrects = torch.sum(preds == source_rotated_labels.data).data.item()
      tot_samples = len(source_rotated_labels)
      train_rot_source_acc_val += running_corrects/tot_samples
      train_rot_source_loss_val += loss_P_1.item()

      #setup TARGET DOMAIN ROTATED dataset to feed to the net
      target_rotated_rgb_images = T_hat["rgb"].to(DEVICE)
      target_rotated_depth_images = T_hat["depth"].to(DEVICE)
      target_rotated_labels = T_hat["label"].to(DEVICE)
      
      # train on target rotated
      outputs = net.forward(target_rotated_rgb_images, target_rotated_depth_images, mode="pretext")
      loss_P_2 = criterion(outputs, target_rotated_labels)
      
      # compute stats
      _, preds = torch.max(outputs.data, 1)
      running_corrects = torch.sum(preds == target_rotated_labels.data).data.item()
      tot_samples = len(target_rotated_labels)
      train_rot_target_acc_val += running_corrects/tot_samples
      train_rot_target_loss_val += loss_P_2.item()


      # BACKPROP WITH THE FULL LOSS
      loss = loss_M + (em_weight/tot_samples)*loss_entropy_min + lambda_*(loss_P_1 + loss_P_2)
 
      loss.backward() 
      # UPDATE GRADIENTS
      optimizer.step()

      n_iters += 1
    
    train_loss.append(train_main_loss_val/n_iters)
    train_acc.append(train_main_acc_val/n_iters)

    print("EPOCH: ", i + 1)
    print("train main accuracy: ", train_main_acc_val/n_iters)
    print("train main loss: ", train_main_loss_val/n_iters)
    print("train rot source accuracy: ", train_rot_source_acc_val/n_iters)
    print("train rot source loss: ", train_rot_source_loss_val/n_iters)
    print("train rot target accuracy: ", train_rot_target_acc_val/n_iters)
    print("train rot target loss: ", train_rot_target_loss_val/n_iters)

    # TEST RESULTS OF THE CURRENT BATCH OF TRAINING
    if not light_validation or i == epochs - 1:
      net.eval() 
      n_iters = 0
      for target_batch in target: 
        # Format batch
        T, _ = format_batch(target_batch, pretext_task="rotation")

        # prepare target data
        target_rgb_images = T["rgb"].to(DEVICE)
        target_depth_images = T["depth"].to(DEVICE)
        target_main_labels = T["label"].to(DEVICE)

        outputs = net.forward(target_rgb_images, target_depth_images, mode="main")
        loss_T = criterion(outputs, target_main_labels)

        # compute test stats
        _, preds = torch.max(outputs.data, 1)
        running_corrects = torch.sum(preds == target_main_labels.data).data.item()
        test_main_acc_val += running_corrects/len(target_main_labels)
        test_main_loss_val += loss_T.item()

        n_iters += 1
      
      test_loss.append(test_main_loss_val/n_iters)
      test_acc.append(test_main_acc_val/n_iters)

      print("test main target accuracy: ", test_main_acc_val/n_iters)
      print("test main target loss: ", test_main_loss_val/n_iters)

    print()
    scheduler.step()
  
  return net, train_loss, train_acc, test_loss, test_acc

## Train test VARIATION function definition

In [0]:
def train_test_variation(synrod, rod, hyperparams, light_validation=False):
  """Train our variation to the paper architecture with zoom pretext task.
  The net is trained in end-to-end fashion.

  Args:
    synrod: train dataset
    rod: test dataset
    hyperparams: parameters dict with the keys
      {
        lr
        batch_size
        weight_decay 
        step_size
        epochs 
        lambda (!!!)
        momentum (optional, default 0.9)
        gamma (optional, default 0.1)
      }
    light_validation: if True the validation is done only in the last epoch.

  Return: 
    (trained_model, train_loss, train_acc, test_loss, test_acc).
  """
  lr = hyperparams["lr"]
  batch_size = hyperparams["batch_size"]
  weight_decay = hyperparams["weight_decay"]
  step_size = hyperparams["step_size"]
  epochs = hyperparams["epochs"]
  curr_momentum = hyperparams.get("momentum", 0.9)
  curr_gamma = hyperparams.get("gamma", 0.1) 
  lambda_ = hyperparams["lambda"]
  em_weight = 0.1
  DEVICE = "cuda"
  cudnn.benchmark

  # dataloader definition with given batch size
  N_WORKERS = 4
  source = DataLoader(synrod,  batch_size=batch_size, shuffle=True, drop_last=True, num_workers=N_WORKERS, collate_fn=collate)
  source_rot = DataLoader(synrod,  batch_size=batch_size, shuffle=True, drop_last=True, num_workers=N_WORKERS, collate_fn=collate)
  target_rot = DataLoader(rod,  batch_size=batch_size, shuffle=True, drop_last=True, num_workers=N_WORKERS, collate_fn=collate)
  target = DataLoader(rod,  batch_size=batch_size, shuffle=True, drop_last=True, num_workers=N_WORKERS, collate_fn=collate)

  # NET DEFINITION
  net = DNet(num_classes=51, dim_pretext=1)
  net = net.to(DEVICE)

  main_criterion = nn.CrossEntropyLoss() 
  pretext_criterion = nn.MSELoss() 
  entropy_min_criterion = HLoss()

  parameters_to_optimize = net.parameters() 
  optimizer = optim.SGD(parameters_to_optimize, lr=lr, 
                            momentum=curr_momentum, 
                            weight_decay=weight_decay)
  scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=curr_gamma)

  # lists that accumulate loss/accuracy values over the training period
  train_loss = []
  train_acc = []
  test_loss = []
  test_acc = []

  running_corrects = 0
  tot_samples = 0
  for i in range(epochs):
    counter_mod = 0
    n_iters = 0
    train_main_acc_val = 0
    train_main_loss_val = 0

    train_zoom_source_loss_val = 0

    train_zoom_target_loss_val = 0
    
    test_main_acc_val = 0
    test_main_loss_val = 0
    
    net.train()
    for source_batch, target_batch, source_rot_batch, target_rot_batch in zip(source, target, source_rot, target_rot):

      S, _ = format_batch(source_batch, pretext_task="zoom")
      T, _ = format_batch(target_batch, pretext_task="zoom")

      _, S_hat = format_batch(source_rot_batch, pretext_task="zoom")
      _, T_hat = format_batch(target_rot_batch, pretext_task="zoom")

      # zero the gradients
      optimizer.zero_grad() 

      # MAIN TASK
      # setup SOURCE DOMAIN STANDARD dataset to feed to the net
      source_rgb_images = S["rgb"].to(DEVICE)
      source_depth_images = S["depth"].to(DEVICE)
      source_main_labels = S["label"].to(DEVICE)
      #train on source zoomed 
      outputs = net.forward(source_rgb_images, source_depth_images, mode="main")
      loss_M = main_criterion(outputs, source_main_labels)
      #compute stats
      _, preds = torch.max(outputs.data, 1)
      running_corrects = torch.sum(preds == source_main_labels.data).data.item()
      tot_samples = len(source_main_labels)
      train_main_loss_val += loss_M.item()
      train_main_acc_val += running_corrects/tot_samples

      
      # entropy minimization
      # setup TARGET DOMAIN STANDARD dataset to feed to the net
      target_rgb_images = T["rgb"].to(DEVICE)
      target_depth_images = T["depth"].to(DEVICE)
      # target labels in this phase can't be used for training
      # train on source original images 
      outputs = net.forward(target_rgb_images, target_depth_images, mode="main")
      loss_entropy_min = entropy_min_criterion(outputs)
          

      # PRETEXT TASK 
      #setup  SOURCE DOMAIN ZOOMED dataset to feed to the net
      source_zoom_rgb_images = S_hat["rgb"].to(DEVICE)
      source_zoom_depth_images = S_hat["depth"].to(DEVICE)
      source_zoom_labels = S_hat["label"].to(DEVICE)
      #train on source zoomed 
      outputs = net.forward(source_zoom_rgb_images, source_zoom_depth_images, mode="pretext")
      loss_P_1 = pretext_criterion(outputs.flatten(), source_zoom_labels) 
      #compute stats
      train_zoom_source_loss_val += loss_P_1.item()

      #setup TARGET DOMAIN ZOOMED dataset to feed to the net
      target_zoom_rgb_images = T_hat["rgb"].to(DEVICE)
      target_zoom_rgb_depth_images = T_hat["depth"].to(DEVICE)
      target_zoom_labels = T_hat["label"].to(DEVICE)
      # train on target zoomed
      outputs = net.forward(target_zoom_rgb_images, target_zoom_rgb_depth_images, mode="pretext")
      loss_P_2 = pretext_criterion(outputs.flatten(), target_zoom_labels)
      #compute stats
      train_zoom_target_loss_val += loss_P_2.item()
      



      # BACKPROP WITH THE FULL LOSS
      loss = loss_M + (em_weight/tot_samples)*loss_entropy_min + lambda_*(loss_P_1 + loss_P_2)

      loss.backward() 
      # UPDATE GRADIENTS
      optimizer.step()

      # accumulate gradients and update them once every two batches
      counter_mod += 1
      n_iters += 1
    
    train_acc.append(train_main_acc_val/n_iters)
    train_loss.append(train_main_loss_val/n_iters)

    print("EPOCH ", i + 1)
    print("train main accuracy: ", train_main_acc_val/n_iters)
    print("train main loss: ", train_main_loss_val/n_iters)
    print("train zoom source loss: ", train_zoom_source_loss_val/n_iters)
    print("train zoom target loss: ", train_zoom_target_loss_val/n_iters)


    # TEST RESULTS OF THE CURRENT BATCH OF TRAINING
    if not light_validation or i == epochs - 1:
      net.eval() 
      n_iters = 0
      for target_batch in target: 
        
        # Format batch
        T, _ = format_batch(target_batch, pretext_task="zoom") 
      
        # prepare target data
        target_rgb_images = T["rgb"].to(DEVICE)
        target_depth_images = T["depth"].to(DEVICE)
        target_main_labels = T["label"].to(DEVICE)
      
        outputs = net.forward(target_rgb_images, target_depth_images, mode="main")
        loss_T = main_criterion(outputs, target_main_labels)

        # compute test stats
        _, preds = torch.max(outputs.data, 1)
        running_corrects = torch.sum(preds == target_main_labels.data).data.item()
        test_main_acc_val += running_corrects/len(target_main_labels)
        test_main_loss_val += loss_T.item()
  
        n_iters += 1

      test_loss.append(test_main_loss_val/n_iters)
      test_acc.append(test_main_acc_val/n_iters)

      print("test main target accuracy: ", test_main_acc_val/n_iters)
      print("test main target loss: ", test_main_loss_val/n_iters)
    
    print()
    scheduler.step()
  
  return net, train_loss, train_acc, test_loss, test_acc

# Datasets

## ROD and synROD - rotation

In [0]:
tfConfig = TransformConfig(resize_shape=256, centercrop_shape=224)   # or resize_shape=
# config types are imagenet, rgb_mod, depth_mod, rgb_depth_mod                              
synrod_param_values, rod_param_values = tfConfig.get_rotation_configuration(config_type="imagenet")    # mod corresponds to the modification of imagenet weights with the computed ones

In [0]:

synrod = SynROD(synrod_path,
                item_extractor_fn="rotation",
                item_extractor_param_values= synrod_param_values,
                 ram_mode=False)
rod = ROD(rod_path,
                item_extractor_fn="rotation",
                item_extractor_param_values=rod_param_values,
                 ram_mode=False)

## ROD and synROD - zoom

In [0]:
tfConfig = TransformConfig(resize_shape=256, centercrop_shape=224)   # or resize_shape=
# config types are imagenet, rgb_mod, depth_mod, rgb_depth_mod                              
synrod_param_values, rod_param_values = tfConfig.get_zoom_configuration(config_type="imagenet")    # mod corresponds to the modification of imagenet weights with the computed ones

In [0]:

synrod = SynROD(synrod_path,
                item_extractor_fn="zoom",
                item_extractor_param_values= synrod_param_values,
                 ram_mode=False)
rod = ROD(rod_path,
                item_extractor_fn="zoom",
                item_extractor_param_values=rod_param_values,
                 ram_mode=False)

## ROD and synROD - decentralized zoom

In [0]:
tfConfig = TransformConfig(resize_shape=256, centercrop_shape=224)   # or resize_shape=
# config types are imagenet, rgb_mod, depth_mod, rgb_depth_mod                              
synrod_param_values, rod_param_values = tfConfig.get_zoom_configuration(config_type="imagenet")    # mod corresponds to the modification of imagenet weights with the computed ones

In [0]:

synrod = SynROD(synrod_path,
                item_extractor_fn="decentralized_zoom",
                item_extractor_param_values= synrod_param_values,
                 ram_mode=False)
rod = ROD(rod_path,
                item_extractor_fn="decentralized_zoom",
                item_extractor_param_values=rod_param_values,
                 ram_mode=False)

# Tuning process

In [0]:
#add/remove parameters of choice
parameters_dict = { "lr" : [0.0001, 0.0005, 0.001, 0.005, 0.01],
                    "batch_size":[16,32,64, 128],
                   "epochs":[10, 20, 30],
                   "weight_decay":[1e-1, 1e-2, 1e-3, 1e-4, 1e-5],
                    "step_size": [30, 60],
                   }

paramGrid = ParameterGrid(parameters_dict)
paramgrid_list = [grid_val for grid_val in paramGrid]

log_file = "test_tuning.csv"
SAVE_PATH = "/content/drive/My Drive/DL_project/logs/" + log_file

# Create or overwrite file with heading
with open(SAVE_PATH, "w") as f:
  header = ['lr', 'batch_size', 'epochs', 'weight_decay', 'step_size', "train_accuracy", "train_loss", "validation_accuracy", "validation_loss"]
  f.write(", ".join(header) + '\n')
  f.close()

# Now open in append mode
f = open(SAVE_PATH, "a")

for grid in paramgrid_list:
  print("CONFIG:")
  print(grid)

  # Train net
  net, train_loss, train_acc, test_loss, test_acc = train_net(synrod, rod, grid, type="rgb", light_validation=True)
  
  if len(test_loss) > 1: # Not in light_validation mode
    learning_curves(train_acc, train_loss, test_acc, test_loss, "Example of learning curves")
    print()

  # Append on file
  config_data = [grid['lr'], grid['batch_size'], grid['epochs'], grid['weight_decay'], grid['step_size'],
                 train_acc[-1], train_loss[-1], test_acc[-1], test_loss[-1]]
  f.write(", ".join([str(el) for el in config_data]) + '\n')
  f.flush()

f.close()