# Setup

## Imports 

In [0]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
from PIL import Image
from sklearn import utils
from sklearn.model_selection import ParameterGrid
from sklearn.model_selection import train_test_split
from torch.autograd import Function
from torch.backends import cudnn
from torch.utils.data import DataLoader
from torch.utils.data import Subset, DataLoader
from torch.utils.model_zoo import load_url as load_state_dict_from_url
from torchvision import models
from torchvision import transforms
from torchvision.datasets import ImageFolder
from torchvision.datasets import VisionDataset
from torchvision.models import alexnet
from torchvision.transforms.functional import pad
from tqdm import tqdm
import logging
import matplotlib.pyplot as plt
import numbers
import numpy as np
import os
import os.path
import shutil
import sys
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import zipfile


## Download datasets in colab
Always working for everyone

In [0]:
# Download ROD dataset
rod_destination_path = "/content/ROD"
if not os.path.isdir(rod_destination_path):
  # ROD 
  # https://drive.google.com/open?id=1p1GORdB44NjtNWJ4d1xqttseM1X9lWNF
  # https://drive.google.com/open?id=168neCvaHwMffFOqjOkth-wVaP4tRFuSW
  !curl -c ./cookie -s -L "https://drive.google.com/uc?export=download&id=1p1GORdB44NjtNWJ4d1xqttseM1X9lWNF" > /dev/null
  !curl -Lb ./cookie "https://drive.google.com/uc?export=download&confirm=`awk '/download/ {print $NF}' ./cookie`&id=1p1GORdB44NjtNWJ4d1xqttseM1X9lWNF" -o "ROD.zip"

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0100   408    0   408    0     0   4340      0 --:--:-- --:--:-- --:--:--  4340
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
100 3146M    0 3146M    0     0  59.9M      0 --:--:--  0:00:52 --:--:-- 23.4M


In [0]:
# Download synROD dataset
synrod_destination_path = "/content/synROD"
if not os.path.isdir(synrod_destination_path):
  # synROD 
  # https://drive.google.com/open?id=1rry4GViJLmmMpbm0B2s7MyQs5Dx8pFS3
  # https://drive.google.com/open?id=1V1fthSNAvsPRF6hLt_kf_xonw7lxAV03
  !curl -c ./cookie -s -L "https://drive.google.com/uc?export=download&id=1rry4GViJLmmMpbm0B2s7MyQs5Dx8pFS3" > /dev/null
  !curl -Lb ./cookie "https://drive.google.com/uc?export=download&confirm=`awk '/download/ {print $NF}' ./cookie`&id=1rry4GViJLmmMpbm0B2s7MyQs5Dx8pFS3" -o "synROD.zip"

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0100   408    0   408    0     0   5230      0 --:--:-- --:--:-- --:--:--  5230
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
100 5343M    0 5343M    0     0  40.2M      0 --:--:--  0:02:12 --:--:-- 74.0M


In [0]:
# Extract ROD dataset
rod_destination_path = "/content/ROD"
if not os.path.isdir(rod_destination_path):
  with zipfile.ZipFile("/content/ROD.zip", 'r') as zip_ref:
      zip_ref.extractall(rod_destination_path)
!rm ROD.zip

# Extract synROD dataset
synrod_destination_path = "/content/synROD"
if not os.path.isdir(synrod_destination_path):
  with zipfile.ZipFile("/content/synROD.zip", 'r') as zip_ref:
      zip_ref.extractall(synrod_destination_path)
!rm synROD.zip
!rm cookie

rod_path = "/content/ROD/ROD"
synrod_path = "/content/synROD/synROD"

"""
if os.path.isdir(os.path.join(synrod_path,  "bell_papper")):
  # line below needed only for the first extraction of the synrod dataset, in which bell_pepper is wrongly named bell_papper
  os.rename(os.path.join(synrod_path,  "bell_papper"), os.path.join(synrod_path,  "bell_pepper") )
  print("Bell pepper fixed")
"""

'\nif os.path.isdir(os.path.join(synrod_path,  "bell_papper")):\n  # line below needed only for the first extraction of the synrod dataset, in which bell_pepper is wrongly named bell_papper\n  os.rename(os.path.join(synrod_path,  "bell_papper"), os.path.join(synrod_path,  "bell_pepper") )\n  print("Bell pepper fixed")\n'

## Utility functions

In [0]:

# DATALOADER function
def collate(batch):
  return batch

# helper function used to setup batches returned by the dataloaders in the way that is mentioned in the paper
def format_batch(batch, pretext_task="rotation"):
  """"
  set pretext_task == rotation or zoom_clf, to require the pretext task labels to be of type "long"
  set pretext_task == zoom to require the pretext task labels to be of type "float"
  """
  data = {"rgb":[], "depth":[], "label":[] }
  data_hat = {"rgb":[], "depth":[], "label":[] }
  for tuple_, tuple_hat in batch:
    rgb_img, depth_img, label = tuple_
    rot_rgb_img, rot_depth_img, rot_label = tuple_hat

    data["rgb"].append(rgb_img[None,:])
    data["depth"].append(depth_img[None,:])
    data["label"].append(label)

    data_hat["rgb"].append(rot_rgb_img[None,:])
    data_hat["depth"].append(rot_depth_img[None,:])
    data_hat["label"].append(rot_label)
  
  data["rgb"] = torch.cat(data["rgb"] , dim=0) 
  data["depth"] = torch.cat(data["depth"] , dim=0)
  data["label"] = torch.LongTensor(data["label"])
  
  data_hat["rgb"] = torch.cat(data_hat["rgb"] , dim=0) 
  data_hat["depth"] = torch.cat(data_hat["depth"] , dim=0)
  if pretext_task == "rotation" or pretext_task == "zoom_clf":
    data_hat["label"] = torch.LongTensor(data_hat["label"] )
  else:
    data_hat["label"] = torch.FloatTensor(data_hat["label"] )
  
  return data, data_hat

In [0]:
import matplotlib.pyplot as plt

def learning_curves(training_accuracies, training_losses, validation_accuracies, validation_losses, plot_title, plot_size=(16,6)):
  """
  Plots accuracies and losses per epochs.
  """
  fig, ax = plt.subplots(nrows=1, ncols=2, figsize=plot_size)
  ax[0].plot(range(1,len(training_accuracies)+1), training_accuracies, label="Training")
  ax[0].plot(range(1,len(validation_accuracies)+1), validation_accuracies, label="Validation")
  ax[0].legend()
  ax[0].set_title("Accuracy")
  ax[0].set_xlabel("Epochs")

  ax[1].plot(range(1,len(training_losses)+1), training_losses, label="Training")
  ax[1].plot(range(1,len(validation_losses)+1), validation_losses, label="Validation")
  ax[1].legend()
  ax[1].set_title("Loss")
  ax[1].set_xlabel("Epochs")

  fig.suptitle(plot_title)
  plt.show()

In [0]:
# Entropy loss
class HLoss(nn.Module):
    def __init__(self):
        super(HLoss, self).__init__()

    def forward(self, x):
        b = F.softmax(x, dim=1) * F.log_softmax(x, dim=1)
        b = -1.0 * b.sum()
        return b

## Path variables declaration

In [0]:
rod_path = "/content/ROD/ROD"
synrod_path = "/content/synROD/synROD"


## Copy in current folder datasets and net classes

In [0]:
!cp -r "/content/drive/My Drive/DL_project/architecture/dataset/." "/content/"
!cp -r "/content/drive/My Drive/DL_project/architecture/net/." "/content/"
!cp -r "/content/drive/My Drive/DL_project/architecture/transform_config/." "/content/"
!cp -r "/content/drive/My Drive/DL_project/architecture/datasets_with_splits/." "/content/"
!cp -r "/content/drive/My Drive/DL_project/data_split/." "/content/"

## Import datasets, net and configurator classes

In [0]:
from synrod import SynROD
from rod import ROD

from synrodmod import SynRODMOD
from rodmod import RODMOD

from dnet2 import DNet
from tconfig import TransformConfig

# Train test function definitions

## Evaluate net

In [0]:
def evaluate_net(net, eval_dataloader, pretext_task):
  net.eval()
  validation_corrects = 0
  validation_main_loss = 0
  validation_dim = 0
  n_iters = 0
  
  for source_val_batch in eval_dataloader:
    S, _ = format_batch(source_val_batch, pretext_task=pretext_task)
  
    # MAIN TASK
    # setup SOURCE DOMAIN STANDARD dataset to feed to the net
    source_rgb_images = S["rgb"].to(DEVICE)
    source_depth_images = S["depth"].to(DEVICE)
    source_main_labels = S["label"].to(DEVICE)
    
    outputs = net.forward(source_rgb_images, source_depth_images, mode="main")
    loss_M = criterion(outputs, source_main_labels)
    
    _, preds = torch.max(outputs.data, 1)
    validation_corrects += torch.sum(preds == source_main_labels.data).data.item()
    validation_dim += len(source_main_labels)
    validation_main_loss += loss_M.item()

    del source_rgb_images, source_depth_images, source_main_labels

    n_iters += 1

    return validation_corrects / validation_dim, validation_main_loss / n_iters


## Relative rotation

In [0]:
def train_test_ours(synrod, synrod_validation, rod, rod_test, hyperparams, save_folder, net_name="ours_net", light_validation=False, preexisting_net=None):
  """Train the architecture called "OURS" in the reference paper with rotation pretext task.
  The net is trained in end-to-end fashion.

  Args:
    synrod: train dataset
    rod: test dataset
    hyperparams: parameters dict with the keys
      {
        lr
        batch_size
        weight_decay 
        step_size
        epochs 
        lambda (!!!)
        momentum (optional, default 0.9)
        gamma (optional, default 0.1)
      }
    light_validation: if True the validation is done only in the last epoch.

  Return: 
    (trained_model, train_loss, train_acc, test_loss, test_acc).
  """
  lr = hyperparams["lr"]
  batch_size = hyperparams["batch_size"]
  weight_decay = hyperparams["weight_decay"]
  step_size = hyperparams["step_size"]
  epochs = hyperparams["epochs"]
  curr_momentum = hyperparams.get("momentum", 0.9)
  curr_gamma = hyperparams.get("gamma", 0.1) 
  lambda_ = hyperparams["lambda"]
  em_weight = 0.1
  
  DEVICE = "cuda"
  cudnn.benchmark

  NUM_WORKERS = 4

  # dataloader definition with given batch size
  source = DataLoader(synrod,  batch_size=batch_size, shuffle=True, num_workers=NUM_WORKERS, collate_fn=collate)
  source_rot = DataLoader(synrod,  batch_size=batch_size, shuffle=True, num_workers=NUM_WORKERS, collate_fn=collate)
  target_rot = DataLoader(rod,  batch_size=batch_size, shuffle=True, num_workers=NUM_WORKERS, collate_fn=collate)
  target = DataLoader(rod,  batch_size=batch_size, shuffle=True, num_workers=NUM_WORKERS, collate_fn=collate)

  # evaluation dataloaders (no data augmentation on these)
  source_validation = DataLoader(synrod_validation,  batch_size=batch_size, shuffle=True, num_workers=NUM_WORKERS, collate_fn=collate)
  target_test = DataLoader(rod_test,  batch_size=batch_size, shuffle=True, num_workers=NUM_WORKERS, collate_fn=collate)

  # smallest dataloader dim
  small_dl_dim = min([len(source), len(source_rot), len(target_rot), len(target)])

  # datasets dimensions
  source_dim = len(synrod)
  target_dim = len(rod)
  validation_dim = len(synrod_validation)
  test_dim = len(rod_test)

  # NET DEFINITION
  net = DNet(num_classes=47, dim_pretext=4).to(DEVICE) if preexisting_net is None else preexisting_net.to(DEVICE)

  criterion = nn.CrossEntropyLoss() 
  entropy_min_criterion = HLoss()

  parameters_to_optimize = net.parameters() 
  optimizer = optim.SGD(parameters_to_optimize, lr=lr, 
                            momentum=curr_momentum, 
                            weight_decay=weight_decay)
  scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=curr_gamma)

  # lists that accumulate loss/accuracy values over the training period
  train_loss = []
  train_acc = []
  valid_loss = []
  valid_acc = []
  test_loss = []
  test_acc = []
 
  
  for i in range(epochs):
    train_main_corrects = 0
    train_main_loss = 0

    train_rot_source_corrects = 0
    train_rot_source_loss = 0

    train_rot_target_corrects = 0
    train_rot_target_loss = 0

    n_iters = 0

    print("EPOCH: ", i + 1)

    for source_batch, target_batch, source_rot_batch, target_rot_batch in tqdm(zip(source, target, source_rot, target_rot), total=small_dl_dim):
      
      net.train()
      S, _ = format_batch(source_batch)
      T, _ = format_batch(target_batch)

      _, S_hat = format_batch(source_rot_batch, pretext_task="rotation")
      _, T_hat = format_batch(target_rot_batch, pretext_task="rotation")

      # zero the gradients
      optimizer.zero_grad() 

      # ------------------------- MAIN TASK --------------------------
      # setup SOURCE DOMAIN STANDARD dataset to feed to the net
      source_rgb_images = S["rgb"].to(DEVICE)
      source_depth_images = S["depth"].to(DEVICE)
      source_main_labels = S["label"].to(DEVICE)
      
      # train on source original images 
      outputs = net.forward(source_rgb_images, source_depth_images, mode="main")
      loss_M = criterion(outputs, source_main_labels)
      loss_M.backward()
      
      # compute stats
      _, preds = torch.max(outputs.data, 1)
      running_corrects = torch.sum(preds == source_main_labels.data).data.item()
      train_main_loss += loss_M.item()
      train_main_corrects += running_corrects

      tot_samples = len(source_main_labels)

      del source_rgb_images, source_depth_images, source_main_labels

      # entropy minimization
      # setup TARGET DOMAIN STANDARD dataset to feed to the net
      target_rgb_images = T["rgb"].to(DEVICE)
      target_depth_images = T["depth"].to(DEVICE)
      # target labels in this phase can't be used for training
      # train on source original images 
      outputs = net.forward(target_rgb_images, target_depth_images, mode="main")
      loss_entropy_min = entropy_min_criterion(outputs)
      loss_ent = (em_weight/tot_samples)*loss_entropy_min
      loss_ent.backward()

      del target_rgb_images, target_depth_images


      # ------------------------- PRETEXT TASK -------------------------------
      # setup  SOURCE DOMAIN ROTATED dataset to feed to the net
      source_rotated_rgb_images = S_hat["rgb"].to(DEVICE)
      source_rotated_depth_images = S_hat["depth"].to(DEVICE)
      source_rotated_labels = S_hat["label"].to(DEVICE)
      
      # train on source rotated 
      outputs = net.forward(source_rotated_rgb_images, source_rotated_depth_images, mode="pretext")
      loss_P_1 = criterion(outputs, source_rotated_labels) 
      lossP1 = lambda_*loss_P_1
      lossP1.backward()

      # compute stats
      _, preds = torch.max(outputs.data, 1)
      running_corrects = torch.sum(preds == source_rotated_labels.data).data.item()
      train_rot_source_corrects += running_corrects
      train_rot_source_loss += loss_P_1.item()

      del source_rotated_rgb_images, source_rotated_depth_images, source_rotated_labels


      #setup TARGET DOMAIN ROTATED dataset to feed to the net
      target_rotated_rgb_images = T_hat["rgb"].to(DEVICE)
      target_rotated_depth_images = T_hat["depth"].to(DEVICE)
      target_rotated_labels = T_hat["label"].to(DEVICE)
      
      # train on target rotated
      outputs = net.forward(target_rotated_rgb_images, target_rotated_depth_images, mode="pretext")
      loss_P_2 = criterion(outputs, target_rotated_labels)
      lossP2 = lambda_*loss_P_2
      lossP2.backward()
      
      # compute stats
      _, preds = torch.max(outputs.data, 1)
      running_corrects = torch.sum(preds == target_rotated_labels.data).data.item()
      train_rot_target_corrects += running_corrects
      train_rot_target_loss += loss_P_2.item()

      del target_rotated_rgb_images, target_rotated_depth_images, target_rotated_labels

    
      # UPDATE WEIGHTS
      optimizer.step()

      n_iters += 1

    
    train_loss.append(train_main_loss / n_iters)
    train_acc.append(train_main_corrects / source_dim)
    
    print("train main accuracy: ", train_main_corrects / source_dim)
    print("train main loss: ", train_main_loss / n_iters)
    print("train rot source accuracy: ", train_rot_source_corrects / source_dim)
    print("train rot source loss: ", train_rot_source_loss / n_iters)
    print("train rot target accuracy: ", train_rot_target_corrects / target_dim)
    print("train rot target loss: ", train_rot_target_loss / n_iters)

    
    # ---------------------- EVALUATION -----------------------------------
    if not light_validation or i == epochs - 1:
      # VALIDATION ON SOURCE
      val_acc, val_loss = evaluate_net(net, source_validation, "rotation")

      valid_loss.append(val_loss)
      valid_acc.append(val_acc)
      print("validation main accuracy: ", val_acc)
      print("validation main loss: ", val_loss)

      
    if i == epochs - 1 or (i + 5) % 1 == 0:
      # TEST ON TARGET
      tst_acc, tst_loss = evaluate_net(net, target_test, "rotation")
  
      test_loss.append(tst_losss)
      test_acc.append(tst_acc)
      print("test main target accuracy: ", tst_acc)
      print("test main target loss: ", tst_loss)


    print()
    scheduler.step()

    # SAVE NET: every 5 epochs and at the last one
    if (i + 1) % 5 == 0 or i == epochs - 1:
      model_save_path = os.path.join(save_folder,  net_name + ".pth"  )
      torch.save(net.state_dict(), model_save_path)
  
  return net, train_loss, train_acc, valid_loss, valid_acc, test_loss, test_acc

## Relative zoom (classification)

In [0]:
def train_test_zoom_clf(synrod, synrod_validation, rod, rod_test, hyperparams, save_folder, net_name="ours_net", light_validation=False, preexisting_net=None):
  """Our pretext task variation, implemented as a classification problem to predict the relative zoom between rgb and depth.
  The net is trained in end-to-end fashion.

  Args:
    synrod: train dataset
    rod: test dataset
    hyperparams: parameters dict with the keys
      {
        lr
        batch_size
        weight_decay 
        step_size
        epochs 
        lambda (!!!)
        momentum (optional, default 0.9)
        gamma (optional, default 0.1)
      }
    light_validation: if True the validation is done only in the last epoch.

  Return: 
    (trained_model, train_loss, train_acc, test_loss, test_acc).
  """
  lr = hyperparams["lr"]
  batch_size = hyperparams["batch_size"]
  weight_decay = hyperparams["weight_decay"]
  step_size = hyperparams["step_size"]
  epochs = hyperparams["epochs"]
  curr_momentum = hyperparams.get("momentum", 0.9)
  curr_gamma = hyperparams.get("gamma", 0.1) 
  lambda_ = hyperparams["lambda"]
  em_weight = 0.1
  
  DEVICE = "cuda"
  cudnn.benchmark

  NUM_WORKERS = 4

  # dataloader definition with given batch size
  source = DataLoader(synrod,  batch_size=batch_size, shuffle=True, num_workers=NUM_WORKERS, collate_fn=collate)
  source_rot = DataLoader(synrod,  batch_size=batch_size, shuffle=True, num_workers=NUM_WORKERS, collate_fn=collate)
  target_rot = DataLoader(rod,  batch_size=batch_size, shuffle=True, num_workers=NUM_WORKERS, collate_fn=collate)
  target = DataLoader(rod,  batch_size=batch_size, shuffle=True, num_workers=NUM_WORKERS, collate_fn=collate)

  # evaluation dataloaders (no data augmentation on these)
  source_validation = DataLoader(synrod_validation,  batch_size=batch_size, shuffle=True, num_workers=NUM_WORKERS, collate_fn=collate)
  target_test = DataLoader(rod_test,  batch_size=batch_size, shuffle=True, num_workers=NUM_WORKERS, collate_fn=collate)

  # smallest dataloader dim
  small_dl_dim = min([len(source), len(source_rot), len(target_rot), len(target)])

  # datasets dimensions
  source_dim = len(synrod)
  target_dim = len(rod)
  validation_dim = len(synrod_validation)
  test_dim = len(rod_test)

  # NET DEFINITION
  net = DNet(num_classes=47, dim_pretext=5).to(DEVICE) if preexisting_net is None else preexisting_net.to(DEVICE)

  classification_criterion = nn.CrossEntropyLoss() 
  entropy_min_criterion = HLoss()

  parameters_to_optimize = net.parameters() 
  optimizer = optim.SGD(parameters_to_optimize, lr=lr, 
                            momentum=curr_momentum, 
                            weight_decay=weight_decay)
  scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=curr_gamma)

  # lists that accumulate loss/accuracy values over the training period
  train_loss = []
  train_acc = []
  valid_loss = []
  valid_acc = []
  test_loss = []
  test_acc = []
 
  
  for i in range(epochs):
    train_main_corrects = 0
    train_main_loss = 0

    train_rot_source_corrects = 0
    train_rot_source_loss = 0

    train_rot_target_corrects = 0
    train_rot_target_loss = 0

    n_iters = 0

    print("EPOCH: ", i + 1)

    for source_batch, target_batch, source_rot_batch, target_rot_batch in tqdm(zip(source, target, source_rot, target_rot), total=small_dl_dim):
      
      net.train()
      S, _ = format_batch(source_batch)
      T, _ = format_batch(target_batch)

      _, S_hat = format_batch(source_rot_batch, pretext_task="zoom_clf")
      _, T_hat = format_batch(target_rot_batch, pretext_task="zoom_clf")

      # zero the gradients
      optimizer.zero_grad() 

      # ------------------------- MAIN TASK --------------------------
      # setup SOURCE DOMAIN STANDARD dataset to feed to the net
      source_rgb_images = S["rgb"].to(DEVICE)
      source_depth_images = S["depth"].to(DEVICE)
      source_main_labels = S["label"].to(DEVICE)
      
      # train on source original images 
      outputs = net.forward(source_rgb_images, source_depth_images, mode="main")
      loss_M = classification_criterion(outputs, source_main_labels)
      loss_M.backward()
      
      # compute stats
      _, preds = torch.max(outputs.data, 1)
      running_corrects = torch.sum(preds == source_main_labels.data).data.item()
      train_main_loss += loss_M.item()
      train_main_corrects += running_corrects

      tot_samples = len(source_main_labels)

      del source_rgb_images, source_depth_images, source_main_labels

      # entropy minimization
      # setup TARGET DOMAIN STANDARD dataset to feed to the net
      target_rgb_images = T["rgb"].to(DEVICE)
      target_depth_images = T["depth"].to(DEVICE)
      # target labels in this phase can't be used for training
      # train on source original images 
      outputs = net.forward(target_rgb_images, target_depth_images, mode="main")
      loss_entropy_min = entropy_min_criterion(outputs)
      loss_ent = (em_weight/tot_samples)*loss_entropy_min
      loss_ent.backward()

      del target_rgb_images, target_depth_images


      # ------------------------- PRETEXT TASK -------------------------------
      # setup  SOURCE DOMAIN ROTATED dataset to feed to the net
      source_rotated_rgb_images = S_hat["rgb"].to(DEVICE)
      source_rotated_depth_images = S_hat["depth"].to(DEVICE)
      source_rotated_labels = S_hat["label"].to(DEVICE)
      
      # train on source rotated 
      outputs = net.forward(source_rotated_rgb_images, source_rotated_depth_images, mode="pretext")
      loss_P_1 = classification_criterion(outputs, source_rotated_labels) 
      lossP1 = lambda_*loss_P_1
      lossP1.backward()

      # compute stats
      _, preds = torch.max(outputs.data, 1)
      running_corrects = torch.sum(preds == source_rotated_labels.data).data.item()
      train_rot_source_corrects += running_corrects
      train_rot_source_loss += loss_P_1.item()

      del source_rotated_rgb_images, source_rotated_depth_images, source_rotated_labels


      #setup TARGET DOMAIN ROTATED dataset to feed to the net
      target_rotated_rgb_images = T_hat["rgb"].to(DEVICE)
      target_rotated_depth_images = T_hat["depth"].to(DEVICE)
      target_rotated_labels = T_hat["label"].to(DEVICE)
      
      # train on target rotated
      outputs = net.forward(target_rotated_rgb_images, target_rotated_depth_images, mode="pretext")
      loss_P_2 = classification_criterion(outputs, target_rotated_labels)
      lossP2 = lambda_*loss_P_2
      lossP2.backward()
      
      # compute stats
      _, preds = torch.max(outputs.data, 1)
      running_corrects = torch.sum(preds == target_rotated_labels.data).data.item()
      train_rot_target_corrects += running_corrects
      train_rot_target_loss += loss_P_2.item()

      del target_rotated_rgb_images, target_rotated_depth_images, target_rotated_labels

    
      # UPDATE WEIGHTS
      optimizer.step()

      n_iters += 1

    
    train_loss.append(train_main_loss / n_iters)
    train_acc.append(train_main_corrects / source_dim)
    
    print("train main accuracy: ", train_main_corrects / source_dim)
    print("train main loss: ", train_main_loss / n_iters)
    print("train rot source accuracy: ", train_rot_source_corrects / source_dim)
    print("train rot source loss: ", train_rot_source_loss / n_iters)
    print("train rot target accuracy: ", train_rot_target_corrects / target_dim)
    print("train rot target loss: ", train_rot_target_loss / n_iters)

    
    # ---------------------- EVALUATION -----------------------------------
    if not light_validation or i == epochs - 1:
      # VALIDATION ON SOURCE
      val_acc, val_loss = evaluate_net(net, source_validation, "zoom_clf")

      valid_loss.append(val_loss)
      valid_acc.append(val_acc)
      print("validation main accuracy: ", val_acc)
      print("validation main loss: ", val_loss)

      
    if i == epochs - 1 or (i + 5) % 1 == 0:
      # TEST ON TARGET
      tst_acc, tst_loss = evaluate_net(net, target_test, "zoom_clf")
  
      test_loss.append(tst_losss)
      test_acc.append(tst_acc)
      print("test main target accuracy: ", tst_acc)
      print("test main target loss: ", tst_loss)


    print()
    scheduler.step()

    # SAVE NET: every 5 epochs and at the last one
    if (i + 1) % 5 == 0 or i == epochs - 1:
      model_save_path = os.path.join(save_folder,  net_name + ".pth"  )
      torch.save(net.state_dict(), model_save_path)
  
  return net, train_loss, train_acc, valid_loss, valid_acc, test_loss, test_acc

## Relative zoom (regression)

In [0]:
def train_test_zoom_clf(synrod, synrod_validation, rod, rod_test, hyperparams, save_folder, net_name="ours_net", light_validation=False, preexisting_net=None):
  """Our pretext task variation, implemented as a regression problem to predict the relative zoom between rgb and depth.
  The net is trained in end-to-end fashion.

  Args:
    synrod: train dataset
    rod: test dataset
    hyperparams: parameters dict with the keys
      {
        lr
        batch_size
        weight_decay 
        step_size
        epochs 
        lambda (!!!)
        momentum (optional, default 0.9)
        gamma (optional, default 0.1)
      }
    light_validation: if True the validation is done only in the last epoch.

  Return: 
    (trained_model, train_loss, train_acc, test_loss, test_acc).
  """
  lr = hyperparams["lr"]
  batch_size = hyperparams["batch_size"]
  weight_decay = hyperparams["weight_decay"]
  step_size = hyperparams["step_size"]
  epochs = hyperparams["epochs"]
  curr_momentum = hyperparams.get("momentum", 0.9)
  curr_gamma = hyperparams.get("gamma", 0.1) 
  lambda_ = hyperparams["lambda"]
  em_weight = 0.1
  
  DEVICE = "cuda"
  cudnn.benchmark

  NUM_WORKERS = 4

  # dataloader definition with given batch size
  source = DataLoader(synrod,  batch_size=batch_size, shuffle=True, num_workers=NUM_WORKERS, collate_fn=collate)
  source_rot = DataLoader(synrod,  batch_size=batch_size, shuffle=True, num_workers=NUM_WORKERS, collate_fn=collate)
  target_rot = DataLoader(rod,  batch_size=batch_size, shuffle=True, num_workers=NUM_WORKERS, collate_fn=collate)
  target = DataLoader(rod,  batch_size=batch_size, shuffle=True, num_workers=NUM_WORKERS, collate_fn=collate)

  # evaluation dataloaders (no data augmentation on these)
  source_validation = DataLoader(synrod_validation,  batch_size=batch_size, shuffle=True, num_workers=NUM_WORKERS, collate_fn=collate)
  target_test = DataLoader(rod_test,  batch_size=batch_size, shuffle=True, num_workers=NUM_WORKERS, collate_fn=collate)

  # smallest dataloader dim
  small_dl_dim = min([len(source), len(source_rot), len(target_rot), len(target)])

  # datasets dimensions
  source_dim = len(synrod)
  target_dim = len(rod)
  validation_dim = len(synrod_validation)
  test_dim = len(rod_test)

  # NET DEFINITION
  net = DNet(num_classes=47, dim_pretext=1).to(DEVICE) if preexisting_net is None else preexisting_net.to(DEVICE)

  classification_criterion = nn.CrossEntropyLoss() 
  regression_criterion = nn.MSELoss()
  entropy_min_criterion = HLoss()

  parameters_to_optimize = net.parameters() 
  optimizer = optim.SGD(parameters_to_optimize, lr=lr, 
                            momentum=curr_momentum, 
                            weight_decay=weight_decay)
  scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=curr_gamma)

  # lists that accumulate loss/accuracy values over the training period
  train_loss = []
  train_acc = []
  valid_loss = []
  valid_acc = []
  test_loss = []
  test_acc = []
 
  
  for i in range(epochs):
    train_main_corrects = 0
    train_main_loss = 0

    train_rot_source_corrects = 0
    train_rot_source_loss = 0

    train_rot_target_corrects = 0
    train_rot_target_loss = 0

    n_iters = 0

    print("EPOCH: ", i + 1)

    for source_batch, target_batch, source_rot_batch, target_rot_batch in tqdm(zip(source, target, source_rot, target_rot), total=small_dl_dim):
      
      net.train()
      S, _ = format_batch(source_batch)
      T, _ = format_batch(target_batch)

      _, S_hat = format_batch(source_rot_batch, pretext_task="zoom_reg")
      _, T_hat = format_batch(target_rot_batch, pretext_task="zoom_reg")

      # zero the gradients
      optimizer.zero_grad() 

      # ------------------------- MAIN TASK --------------------------
      # setup SOURCE DOMAIN STANDARD dataset to feed to the net
      source_rgb_images = S["rgb"].to(DEVICE)
      source_depth_images = S["depth"].to(DEVICE)
      source_main_labels = S["label"].to(DEVICE)
      
      # train on source original images 
      outputs = net.forward(source_rgb_images, source_depth_images, mode="main")
      loss_M = classification_criterion(outputs, source_main_labels)
      loss_M.backward()
      
      # compute stats
      _, preds = torch.max(outputs.data, 1)
      running_corrects = torch.sum(preds == source_main_labels.data).data.item()
      train_main_loss += loss_M.item()
      train_main_corrects += running_corrects

      tot_samples = len(source_main_labels)

      del source_rgb_images, source_depth_images, source_main_labels

      # entropy minimization
      # setup TARGET DOMAIN STANDARD dataset to feed to the net
      target_rgb_images = T["rgb"].to(DEVICE)
      target_depth_images = T["depth"].to(DEVICE)
      # target labels in this phase can't be used for training
      # train on source original images 
      outputs = net.forward(target_rgb_images, target_depth_images, mode="main")
      loss_entropy_min = entropy_min_criterion(outputs)
      loss_ent = (em_weight/tot_samples)*loss_entropy_min
      loss_ent.backward()

      del target_rgb_images, target_depth_images


      # ------------------------- PRETEXT TASK -------------------------------
      # setup  SOURCE DOMAIN ROTATED dataset to feed to the net
      source_rotated_rgb_images = S_hat["rgb"].to(DEVICE)
      source_rotated_depth_images = S_hat["depth"].to(DEVICE)
      source_rotated_labels = S_hat["label"].to(DEVICE)
      
      # train on source rotated 
      outputs = net.forward(source_rotated_rgb_images, source_rotated_depth_images, mode="pretext")
      loss_P_1 = regression_criterion(outputs.flatten(), source_rotated_labels) 
      lossP1 = lambda_*loss_P_1
      lossP1.backward()

      # compute stats
      _, preds = torch.max(outputs.data, 1)
      running_corrects = torch.sum(preds == source_rotated_labels.data).data.item()
      train_rot_source_corrects += running_corrects
      train_rot_source_loss += loss_P_1.item()

      del source_rotated_rgb_images, source_rotated_depth_images, source_rotated_labels


      #setup TARGET DOMAIN ROTATED dataset to feed to the net
      target_rotated_rgb_images = T_hat["rgb"].to(DEVICE)
      target_rotated_depth_images = T_hat["depth"].to(DEVICE)
      target_rotated_labels = T_hat["label"].to(DEVICE)
      
      # train on target rotated
      outputs = net.forward(target_rotated_rgb_images, target_rotated_depth_images, mode="pretext")
      loss_P_2 = regression_criterion(outputs.flatten(), target_rotated_labels)
      lossP2 = lambda_*loss_P_2
      lossP2.backward()
      
      # compute stats
      _, preds = torch.max(outputs.data, 1)
      running_corrects = torch.sum(preds == target_rotated_labels.data).data.item()
      train_rot_target_corrects += running_corrects
      train_rot_target_loss += loss_P_2.item()

      del target_rotated_rgb_images, target_rotated_depth_images, target_rotated_labels

    
      # UPDATE WEIGHTS
      optimizer.step()

      n_iters += 1

    
    train_loss.append(train_main_loss / n_iters)
    train_acc.append(train_main_corrects / source_dim)
    
    print("train main accuracy: ", train_main_corrects / source_dim)
    print("train main loss: ", train_main_loss / n_iters)
    print("train rot source accuracy: ", train_rot_source_corrects / source_dim)
    print("train rot source loss: ", train_rot_source_loss / n_iters)
    print("train rot target accuracy: ", train_rot_target_corrects / target_dim)
    print("train rot target loss: ", train_rot_target_loss / n_iters)

    
    # ---------------------- EVALUATION -----------------------------------
    if not light_validation or i == epochs - 1:
      # VALIDATION ON SOURCE
      val_acc, val_loss = evaluate_net(net, source_validation, "zoom_reg")

      valid_loss.append(val_loss)
      valid_acc.append(val_acc)
      print("validation main accuracy: ", val_acc)
      print("validation main loss: ", val_loss)

      
    if i == epochs - 1 or (i + 5) % 1 == 0:
      # TEST ON TARGET
      tst_acc, tst_loss = evaluate_net(net, target_test, "zoom_reg")
  
      test_loss.append(tst_losss)
      test_acc.append(tst_acc)
      print("test main target accuracy: ", tst_acc)
      print("test main target loss: ", tst_loss)


    print()
    scheduler.step()

    # SAVE NET: every 5 epochs and at the last one
    if (i + 1) % 5 == 0 or i == epochs - 1:
      model_save_path = os.path.join(save_folder,  net_name + ".pth"  )
      torch.save(net.state_dict(), model_save_path)
  
  return net, train_loss, train_acc, valid_loss, valid_acc, test_loss, test_acc

# Experiment section

### define normalization transformations

In [0]:
tfConfig = TransformConfig(resize_shape=256, centercrop_shape=224)   # or resize_shape=224                              # config types are imagenet, rgb_mod, depth_mod, rgb_depth_mod
synrod_param_values, rod_param_values = tfConfig.get_rotation_configuration(config_type="imagenet")    # mod corresponds to the modification of imagenet weights with the computed ones

### configure datasets

In [0]:
synrod_train = SynRODMOD(synrod_path,
                item_extractor_fn="rotation",
                item_extractor_param_values= synrod_param_values,
                synarid_path="/content/synARID_50k-split_sync_train1.txt")
synrod_test = SynRODMOD(synrod_path,
                item_extractor_fn="rotation",
                item_extractor_param_values= synrod_param_values,
                 synarid_path="/content/synARID_50k-split_sync_test1.txt")
rod = RODMOD(rod_path,
                item_extractor_fn="rotation",
                item_extractor_param_values=rod_param_values,
                 rod_split_path="/content/rod-split_sync.txt")

# Train 

In [0]:
# Save net
save_folder = "/content/net_dumps"
if not os.path.isdir(save_folder):
  !mkdir "net_dumps"
net_name = "kaiming_init_20epochs"

In [0]:
parameters_dict = { "lr" : [3e-4],
                    "batch_size":[64],
                   "epochs":[20],
                   "weight_decay":[5e-2],
                    "step_size": [7],
                   "lambda": [1.0]
                   }

for grid in ParameterGrid(parameters_dict):
  net, train_loss, train_acc, valid_loss, valid_acc, test_loss, test_acc = train_test_ours(synrod_train, 
                                                                                           synrod_test, 
                                                                                           rod, 
                                                                                           rod, 
                                                                                           grid, 
                                                                                           save_folder, 
                                                                                           net_name, 
                                                                                           light_validation=False,
                                                                                           preexisting_net=None)

  learning_curves(train_acc, train_loss, valid_acc, valid_loss, "Kaiming init")
  learning_curves([], [], test_acc, test_loss, "Test - Kaiming init")

# Load net
If needed

In [0]:
# Load net from folder
net_save_path = os.path.join(save_folder,  net_name + ".pth"  )

net2 = DNet(47, dim_pretext=5)
net2.load_state_dict(torch.load(net_save_path, map_location="cuda"))

Downloading: "https://download.pytorch.org/models/resnet18-5c106cde.pth" to /root/.cache/torch/checkpoints/resnet18-5c106cde.pth


HBox(children=(FloatProgress(value=0.0, max=46827520.0), HTML(value='')))




<All keys matched successfully>