# EMBRACENET APPLICATION

In [None]:
import torch
import torch.nn as nn


class EmbraceNet(nn.Module):
  
  def __init__(self, device, input_size_list, embracement_size=256, bypass_docking=False):
    """
    Initialize an EmbraceNet module.
    Args:
      device: A "torch.device()" object to allocate internal parameters of the EmbraceNet module.
      input_size_list: A list of input sizes.
      embracement_size: The length of the output of the embracement layer ("c" in the paper).
      bypass_docking: Bypass docking step, i.e., connect the input data directly to the embracement layer. If True, input_data must have a shape of [batch_size, embracement_size].
    """
    super(EmbraceNet, self).__init__()

    self.device = device
    self.input_size_list = input_size_list
    self.embracement_size = embracement_size
    self.bypass_docking = bypass_docking

    if (not bypass_docking):
      for i, input_size in enumerate(input_size_list):
        setattr(self, 'docking_%d' % (i), nn.Linear(input_size, embracement_size))


  def forward(self, input_list, availabilities=None, selection_probabilities=None):
    """
    Forward input data to the EmbraceNet module.
    Args:
      input_list: A list of input data. Each input data should have a size as in input_size_list.
      availabilities: A 2-D tensor of shape [batch_size, num_modalities], which represents the availability of data for each modality. If None, it assumes that data of all modalities are available.
      selection_probabilities: A 2-D tensor of shape [batch_size, num_modalities], which represents probabilities that output of each docking layer will be selected ("p" in the paper). If None, the same probability of being selected will be used for each docking layer.
    Returns:
      A 2-D tensor of shape [batch_size, embracement_size] that is the embraced output.
    """

    # check input data
    assert len(input_list) == len(self.input_size_list)
    num_modalities = len(input_list)
    batch_size = input_list[0].shape[0]
    

    # docking layer
    docking_output_list = []
    if (self.bypass_docking):
      docking_output_list = input_list
    else:
      for i, input_data in enumerate(input_list):
        x = getattr(self, 'docking_%d' % (i))(input_data)
        x = nn.functional.relu(x)
        docking_output_list.append(x)
    

    # check availabilities
    if (availabilities is None):
      availabilities = torch.ones(batch_size, len(input_list), dtype=torch.float, device=self.device)
    else:
      availabilities = availabilities.float()
    

    # adjust selection probabilities
    if (selection_probabilities is None):
      selection_probabilities = torch.ones(batch_size, len(input_list), dtype=torch.float, device=self.device)
    selection_probabilities = torch.mul(selection_probabilities, availabilities)

    probability_sum = torch.sum(selection_probabilities, dim=-1, keepdim=True)
    selection_probabilities = torch.div(selection_probabilities, probability_sum)


    # stack docking outputs
    docking_output_stack = torch.stack(docking_output_list, dim=-1)  # [batch_size, embracement_size, num_modalities]


    # embrace
    modality_indices = torch.multinomial(selection_probabilities, num_samples=self.embracement_size, replacement=True)  # [batch_size, embracement_size]
    modality_toggles = nn.functional.one_hot(modality_indices, num_classes=num_modalities).float()  # [batch_size, embracement_size, num_modalities]

    embracement_output_stack = torch.mul(docking_output_stack, modality_toggles)
    embracement_output = torch.sum(embracement_output_stack, dim=-1)  # [batch_size, embracement_size]

    return embracement_output

In [None]:
import torch.nn as nn

class VGG_pre(nn.Module):
    def __init__(self):
        super(VGG_pre, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(32), 
            nn.ReLU(),
      #      nn.Conv2d(32, 32, kernel_size=5, stride=1, padding=2),
       #     nn.BatchNorm2d(32),
        #    nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2))
        self.layer2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(64),
            nn.ReLU(),
     #       nn.Conv2d(64, 64, kernel_size=5, stride=1, padding=2),
      #      nn.BatchNorm2d(64),
       #     nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2))
        
        self.drop_out1 = nn.Dropout(p=0.3)
        self.drop_out2 = nn.Dropout(p=0.4)
       

    def forward(self, x):
      out = self.layer1(x)
      out = self.drop_out1(out)
      out = self.layer2(out)
      out = self.drop_out2(out)
      out = out.reshape(out.size(0), -1) 
      return out

In [None]:
from collections import defaultdict

training_loader = defaultdict(lambda:0)
validation_loader = defaultdict(lambda:0)

training_loader['left'] = loader_imag_train_L
training_loader['central'] = loader_imag_train_C
training_loader['right'] = loader_imag_train_R

validation_loader['left'] = loader_imag_validation_L
validation_loader['central'] = loader_imag_validation_C
validation_loader['right'] = loader_imag_validation_R

In [None]:
training_loader

defaultdict(<function __main__.<lambda>>,
            {'central': <torch.utils.data.dataloader.DataLoader at 0x7f0810cb6e10>,
             'left': <torch.utils.data.dataloader.DataLoader at 0x7f07ffb169d0>,
             'right': <torch.utils.data.dataloader.DataLoader at 0x7f07ffb92250>})

In [None]:
import torch.nn as nn
import torch.optim as optim


class EmbraceNetMultimodal(nn.Module):
  def __init__(self, device, n_classes, hyperparameters_tuning=False, args=None):
    super(EmbraceNetMultimodal, self).__init__()
    
    
    # input parameters
    self.device = device
    self.n_classes = n_classes
    self.args = args
    self.hyperparameters_tuning = hyperparameters_tuning

    
    # VGG convolutional neural network
    self.VGG_left = VGG_pre()
    self.VGG_central = VGG_pre()
    self.VGG_right = VGG_pre()

    # load previously trained models to find optimal hyperparameters
    if self.hyperparameters_tuning:
      load_model(self.VGG_left, 'best_model_L_hp.pt')
      load_model(self.VGG_central, 'best_model_C_hp.pt')
      load_model(self.VGG_right, 'best_model_R_hp.pt')
    
    # load previously trained models for final testing
    else:
      load_model(self.VGG_left, 'best_model_L_test.pt')
      load_model(self.VGG_central, 'best_model_C_test.pt')
      load_model(self.VGG_right, 'best_model_R_test.pt')

    # freeze layers
    for param in self.VGG_left.parameters():
      param.requires_grad = False
    for param in self.VGG_central.parameters():
      param.requires_grad = False
    for param in self.VGG_right.parameters():
      param.requires_grad = False
        
    self.pre_output_size = (5*64*16) #5120

    # embracenet
    self.embracenet = EmbraceNet(device=self.device, 
                                 input_size_list=[self.pre_output_size, self.pre_output_size, self.pre_output_size], 
                                 embracement_size=512)

    # post embracement layers
    self.post = nn.Linear(in_features=512, out_features=self.n_classes)

  
  def forward(self, x, availabilities=None, selection_probabilities=None):

    x_left, x_central, x_right = x

    x_left_final = self.VGG_left(x_left)
    x_central_final = self.VGG_central(x_central)
    x_right_final = self.VGG_right(x_right)

    # drop left or right modality
   # availabilities = None
    #if (self.args.model_drop_left or self.args.model_drop_central or self.args.model_drop_right):
   #   availabilities = torch.ones([x.shape[0], 3], device=self.device)
    #  if (self.args.model_drop_left):
     #   availabilities[:, 0] = 0
   #   if (self.args.model_drop_central):
    #    availabilities[:, 1] = 0
     # if (self.args.model_drop_right):
      #  availabilities[:, 2] = 0

    # dropout during training
 #   if (self.is_training and self.args.model_dropout):
  #    dropout_prob = torch.rand(1, device=self.device)[0]
   #   if (dropout_prob >= 0.5):
    #    target_modalities = torch.round(torch.rand([x.shape[0]], device=self.device)).to(torch.int64)
     #  availabilities = nn.functional.one_hot(target_modalities, num_classes=2).float()

    # embrace
    embracenet = self.embracenet([x_left_final, x_central_final, x_right_final]) #, availabilities=availabilities)

    # employ final layers
    output = self.post(embracenet)

    # output softmax
    return output
    
   # return nn.functional.log_softmax(output, dim=-1) #not needed since it's already applied
   #by cross-entropy loss

In [None]:
criterion = nn.CrossEntropyLoss()
num_epochs = 50
num_classes = 5
#learning_rate=0.0001
#optimizer = torch.optim.Adam(model.parameters(),lr = learning_rate)

#optimizer = torch.optim.Adam([x_left, x_central, x_right],lr = learning_rate)

In [None]:
import optuna
import torch.nn as nn
#import thop
import torch.optim as optim
import pickle
import re

class Param_Search_Multimodal():

  """Performs the hyper parameters tuning by using a TPE (Tree-structured Parzen Estimator) 
    algorithm sampler.  
    
    Parameters:
    ------------------
    model (torch.nn.Module): neural network model.
    train_loader (DataLoader): dictionary of training DataLoader objects. Keys of the
        dictionary must be 'FFNN', 'CNN', 'D2V_CNN'.
    test_loader (DataLoader): dictionary of testing DataLoader objects. Keys of the
        dictionary must be 'FFNN', 'CNN', 'D2V_CNN'.
    criterion : loss function for training the model.
    num_epochs (int): number of epochs.
    study_name (str): name of the Optuna study object.
    n_trial (int): number of trials to perform in the Optuna study.
        Default: 4
    
    Attributes:
    ------------------
    best_model: stores the weights of the common layers of the best performing model.
    
    Returns:
    ------------------
  Prints values of the optimised hyperparameters and saves the parameters of the best model.
    """
    
  def __init__(self, 
               model, 
               train_loader, 
               test_loader,
               criterion,
               num_epochs,
               n_trials,
               study_name):
    self.model = model
    self.train_loader = train_loader
    self.test_loader = test_loader
    self.criterion = criterion
    self.num_epochs = num_epochs
    self.n_trials = n_trials
    self.study_name = study_name
    self.len_train_loader = len(train_loader['FFNN'])
    self.len_test_loader = len(test_loader['FFNN'])
    self.best_model = None

  def objective(self, trial):
    """Defines the objective to be optimised (F1 test score) and saves
    each final model.
    """
    
    # generate the model
    model = self.model

    # generate the possible optimizers
    optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "RMSprop"])
    lr = trial.suggest_loguniform("lr", 1e-5, 1e-1)
    optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=lr)
    
    # convert model data type to double
    model = model.double()
    
    # Define the training and testing phases
    for epoch in tqdm(range(1, num_epochs + 1)):
      train_loss = 0.0
      test_loss = 0.0
      f1_test = 0.0
    
      # set the model in training modality
      model.train()
      for load1, load2 in tqdm(zip(self.train_loader['FFNN'],
                                          self.train_loader['CNN'])) 
                                      desc='Training model', total = self.len_train_loader):
        x_1, target = load1
        x_2, _ = load2

        # clear the gradients of all optimized variables
        optimizer.zero_grad()
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model([x_1.double(), x_2.double()])
        # calculate the batch loss as a sum of the single losses
        loss = self.criterion(output, target)
        # backward pass: compute gradient of the loss wrt model parameters
        loss.backward()
        # perform a single optimization step (parameter update)
        optimizer.step()
        # update training loss
        train_loss += loss.item()      
        
        
      # set the model in testing modality
      model.eval()
      for load1, load2 in tqdm(zip(self.test_loader['FFNN'],
                                          self.test_loader['CNN']), 
                                      desc='Testing model', total = self.len_test_loader):
        x_1, target = load1
        x_2, _, = load2

        # forward pass: compute predicted outputs by passing inputs to the model
        output1, output2 = model([x_1.double(), x_2.double()])
        # calculate the batch loss as a sum of the single losses
        loss = self.criterion(output, target)
        # update test loss 
        test_loss += loss.item() 
        # calculate F1 test score as weighted sum of the single F1 scores
        f1_test += F1(output,target) 
        
      # calculate epoch score by dividing by the number of observations
      f1_test /= self.len_test_loader
      # pass the score of the epoch to the study to monitor the intermediate objective values    
      trial.report(f1_test, epoch)

    # save the final model named with the number of the trial 
    with open("{}{}.pickle".format(self.study_name,trial.number), "wb") as fout:
      pickle.dump(model, fout)
    
    # return F1 score to the study        
    return f1_test



  def run_trial(self):
    """Runs Optuna study and stores the best model in class attribute 'best_model'."""

    # create a new study or load a pre-existing study. use sqlite backend to store the study.
    study = optuna.create_study(study_name=self.study_name, direction="maximize", 
                                storage='sqlite:///SA_optuna_tuning.db', load_if_exists=True)

    complete_trials = [t for t in study.trials if t.state == optuna.structs.TrialState.COMPLETE]
    pruned_trials = [t for t in study.trials if t.state == optuna.structs.TrialState.PRUNED]
    
    # if the number of already completed trials is lower than the total number of trials passed as
    #argument, perform the remaining trials 
    if len(complete_trials)<self.n_trials:
        # set the number of trials to be performed equal to the number of missing trials
        self.n_trials -= len(complete_trials)
        study.optimize(self.objective, n_trials=self.n_trials)
        pruned_trials = [t for t in study.trials if t.state == optuna.structs.TrialState.PRUNED]
        complete_trials = [t for t in study.trials if t.state == optuna.structs.TrialState.COMPLETE]
        
    # store the best model found in the class
    with open("{}{}.pickle".format(self.study_name, study.best_trial.number), "rb") as fin:
        best_model = pickle.load(fin)

    self.best_model = best_model
    
    print("Study statistics: ")
    print("  Number of finished trials: ", len(study.trials))
    print("  Number of pruned trials: ", len(pruned_trials))
    print("  Number of complete trials: ", len(complete_trials))

    print("Best trial:")
    trial = study.best_trial

    print("  Value: ", trial.value)

    print("  Params: ")
    for key, value in trial.params.items():
      print("    {}: {}".format(key, value))
                                          

    with open("{}.pickle".format(study.best_trial.number), "rb") as fin:
      best_model = pickle.load(fin)
    
    # store only best model
    self.best_model = best_model


In [None]:
from tqdm.auto import tqdm

def fit_multimodal(model, 
                   train_loader, 
                   test_loader, 
                   criterion, 
                   optimizer, 
                   num_epochs, 
                   filename_path, 
                   verbose=True): 
  """Performs the training of the multitask model. It implements also early stopping
    
    Parameters:
    ------------------
    model (torch.nn.Module): neural network model.
    train_loader (DataLoader): dictioary of training DataLoader objects. Keys of the
        dictionary must be 'FFNN', 'CNN', 'D2V_CNN'.
    test_loader (DataLoader): dictionary of testing DataLoader objects. Keys of the
        dictionary must be 'FFNN', 'CNN', 'D2V_CNN'.
    criterion: loss function for training the model.
    optimizer (torch.optim): optimization algorithm for training the model. 
    num_epochs (int): number of epochs.
    filename_path (str): where the weights of the model at each epoch will be stored. 
        Indicate only the name of the folder.
    patience (int): number of epochs in which the test error is not anymore decreasing
        before stopping the training.
    delta (int): minimum decrease in the test error to continue with the training.
        Default:0
    verbose (bool): prints the training error, test error, F1 training score, F1 test score 
        at each epoch.
        Default: True
    
    Attributes:
    ------------------
    f1_train_scores: stores the F1 training scores for each epoch.
    f1_test_scores: stores the F1 test scores for each epoch.
    
    Returns:
    ------------------
    Lists of F1 training scores and F1 test scores at each epoch.
    Prints training error, test error, F1 training score, F1 test score at each epoch.
    """

  basepath = 'exp'

  # keep track of epoch losses 
  f1_train_scores = []
  f1_test_scores = []

  # convert model data type to double
  model = model.double()

  # define early stopping
  early_stopping = EarlyStopping(patience=patience, delta=delta, verbose=True)

  len_train_loader = len(train_loader['FFNN'])
  len_test_loader = len(test_loader['FFNN'])
    

  for epoch in tqdm(range(1, num_epochs + 1), desc='Epochs'):
    train_loss = 0.0
    test_loss = 0.0
    
    f1_train = 0.0
    f1_test = 0.0
    
    # if there is already a trained model stored for a specific epoch, load the model
    #and don't retrain the model
    PATH = os.path.join(basepath, filename_path + '_' + str(epoch) + '.pt')
    if os.path.exists(PATH):
      checkpoint = torch.load(PATH)
      model.load_state_dict(checkpoint['model_state_dict'])
      f1_train = checkpoint['F1_train']
      f1_test = checkpoint['F1_test']
      train_loss = checkpoint['train_loss']
      test_loss = checkpoint['test_loss']
        
    else:
      # set the model in training modality
      model.train()
      for load1, load2, load3 in tqdm(zip(train_loader['FFNN'],
                                          train_loader['CNN']), 
                                      desc='Testing model', total = len_train_loader):
        x_1, target = load1
        x_2, _ = load2
        x_3, _ = load3
        
        # clear the gradients of all optimized variables
        optimizer.zero_grad()
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model([x_1.double(), x_2.double()])
        # calculate the batch loss as the sum of all the losses
        loss = criterion(output1 target) 
        # backward pass: compute gradient of the loss wrt model parameters
        loss.backward()
        # perform a single optimization step (parameter update)
        optimizer.step()
        # update training loss
        train_loss += loss.item()
        # calculate F1 training score as a weighted sum of the single F1 scores
        f1_train +=  F1(output,target) 
        
        
      # set the model in testing modality
      model.eval()
      for load1, load2, load3 in tqdm(zip(test_loader['FFNN'],
                                          test_loader['CNN']), 
                                      desc='Testing model', total = len_test_loader):
        x_1, target = load1
        x_2, _ = load2
        x_3, _ = load3
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model([x_1.double(), x_2.double()])
        # calculate the batch loss as the sum of all the losses
        loss = criterion(output, target) 
        # update test loss
        test_loss += loss.item()
        # calculate F1 test score as a weighted sum of the single F1 scores
        f1_test +=  F1(output,target) 
        
        
    # save the model weights, epoch, scores and losses at each epoch
    model_param = model.state_dict()
    PATH = os.path.join(basepath, filename_path + '_' + str(epoch) + '.pt')
    torch.save({'epoch': epoch,
                'model_state_dict': model_param,
                'F1_train': f1_train,
                'F1_test': f1_test,
                'train_loss': train_loss,
                'test_loss': test_loss},
               PATH)
     
    
    # calculate epoch score by dividing by the number of observations
    f1_train /= len_train_loader
    f1_test /= len_test_loader
    # store epoch scores
    f1_train_scores.append(f1_train)    
    f1_test_scores.append(f1_test)
      
    # print training/test statistics 
    if verbose == True:
      print('Epoch: {} \tTraining F1 score: {:.4f} \tTest F1 score: {:.4f} \tTraining Loss: {:.4f} \tTest Loss: {:.4f}'.format(
      epoch, f1_train, f1_test, train_loss, test_loss))
      
    # early stop the model if the test loss is not improving
    early_stopping(test_loss, model)
    if early_stopping.early_stop:
      print('Early stopping the training')
      # reload the previous best model before the test loss started decreasing
      best_checkpoint = torch.load(os.path.join(basepath,filename_path + '_' + '{}'.format(epoch-patience) + '.pt'))
      model.load_state_dict(best_checkpoint['model_state_dict'])
      break
    
        
  # return the scores at each epoch
  return f1_train_scores, f1_test_scores

## Hyperparameters tuning

### Model testing