# SENTIMENT ANALYSIS AND OPINION MINING PROJECT
## PUBLIC OPINION ON NEWS

https://island.ricerca.di.unimi.it/~alfio/shared/textsent/2020-21/projects.html

The New York Times Comments dataset (https://www.kaggle.com/aashita/nyt-comments?select=CommentsApril2017.csv) contains articles published on the New York Times and the comments they received from readers. <br>
The task of the project is to analyze, using the variables *editorsSelection*, *recommendations*, and *replyCount* as targets, the rate of success of a comment. This rate of success should be intepreted as a measure of how much controversial was the commented article. On this base, the project aims at studing which topics (indicated by the features sectionName and/or newDesk) were mostly controversial. Optionally, the project could also determine if a comment opinion is against or if favor of the article.

# 2. CLASSIFICATION OF CONTROVERSIAL COMMENTS WITH A MULTIMODAL NEURAL NETWORK

In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pylab as plt

import os
import pickle
from tqdm.auto import tqdm
#from glob import glob
import json
from sklearn.model_selection import train_test_split

import torch
import torch.nn.functional as F
#import torchvision
#import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
import gensim
import random

from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import RobustScaler
from scipy.sparse import csr_matrix
from scipy.sparse import coo_matrix, hstack
from sklearn.feature_extraction.text import TfidfVectorizer

## DATA LOADER

In [4]:
pip install gensim

Collecting gensim
  Downloading gensim-4.0.1-cp37-cp37m-manylinux1_x86_64.whl (23.9 MB)
[K     |████████████████████████████████| 23.9 MB 7.5 MB/s eta 0:00:01
Collecting smart-open>=1.8.1
  Downloading smart_open-5.0.0-py3-none-any.whl (56 kB)
[K     |████████████████████████████████| 56 kB 7.5 MB/s  eta 0:00:01
[?25hInstalling collected packages: smart-open, gensim
Successfully installed gensim-4.0.1 smart-open-5.0.0
Note: you may need to restart the kernel to use updated packages.


In [6]:
!unzip PROJECT_SA/dataset_cleansed_version.csv.zip -d PROJECT_SA

Archive:  PROJECT_SA/dataset_cleansed_version.csv.zip
  End-of-central-directory signature not found.  Either this file is not
  a zipfile, or it constitutes one disk of a multi-part archive.  In the
  latter case the central directory and zipfile comment will be found on
  the last disk(s) of this archive.
unzip:  cannot find zipfile directory in one of PROJECT_SA/dataset_cleansed_version.csv.zip or
        PROJECT_SA/dataset_cleansed_version.csv.zip.zip, and cannot find PROJECT_SA/dataset_cleansed_version.csv.zip.ZIP, period.


In [6]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'


class Data_Load():
  def  __init__(self, 
               data_path, 
               network_type=None):
    self.data_path = data_path
    self.network_type = network_type
    self.X = []
    self.y =  []

    if (self.network_type != 'FFNN' and self.network_type != 'CNN' and self.network_type != 'ENC_CNN'):
            raise ValueError(
            "Argument 'network_type' has an incorrect value: use 'FFNN', 'CNN', 'ENC_CNN'")


  def load(self):
    
    self.y =  pd.read_csv(
      os.path.join('PROJECT_SA', self.data_path),
      usecols = ['editorsSelection_TARGET','recommendations_TARGET','replyCount_TARGET']
      ) 
    
    self.y  = self.y .astype({
        'editorsSelection_TARGET': 'int',
        'recommendations_TARGET': 'int',
        'replyCount_TARGET': 'int'})
 
    
    # retrieve part of the dataset needed
    if self.network_type == 'FFNN':
      self.X  = pd.read_csv(
        os.path.join('PROJECT_SA',self.data_path),
        usecols = ['approveDate','commentType','createDate','depth','picURL','sharing','timespeople',
                  'trusted','updateDate','userTitle','sectionName_x','newDesk_x','articleWordCount_x','printPage_x',
                  'typeOfMaterial_x','documentType','pubDate','source']
        ) 
    
      self.X  = self.X .astype({
        'approveDate': 'float64',
        'createDate': 'float64',
        'depth': 'object',
        'picURL': 'object',
        'sharing': 'object',
        'timespeople': 'object',
        'trusted':'object',
        'updateDate': 'float64',
        'articleWordCount_x' : 'float64',
        'printPage_x' : 'object'
                    })
      
    elif self.network_type == 'CNN':
      self.X = pd.read_csv(
        os.path.join('PROJECT_SA',self.data_path),
        usecols = ['keywords']
        ) 
    
    elif self.network_type == 'ENC_CNN':
      self.X = pd.read_csv(
        os.path.join('PROJECT_SA',self.data_path),
        usecols = ['commentBody']
        ) 
    

    random.seed(123)
    self.X = self.X.sample(frac=1)
    self.y = self.y.iloc[self.X.index]
    self.X = self.X.reset_index(drop=True)
    self.y = self.y.reset_index(drop=True)

    return self.X, self.y




class Data_Preprocess():
  def __init__(self, 
               X,
               y, 
               network_type=None,
               usage=None, 
               test_size=0.25, 
               validation_size=0.15):
  
    self.X = X
    self.y = y
    self.network_type = network_type
    self.usage = usage
    self.test_size = test_size
    self.validation_size = validation_size

    self.vectorizer = TfidfVectorizer()
    self.robust_scaler = RobustScaler()
    self.onehot_encoder = OneHotEncoder(sparse=False)
    self.label_encoder = LabelEncoder()

    self.X_processed = torch.empty((0))
      
    self.X_train = []
    self.y_train = []
    self.X_val = []
    self.y_val = []
    self.X_test = []
    self.y_test = []

    if (self.network_type != 'FFNN' and self.network_type != 'CNN' and self.network_type != 'ENC_CNN'):
            raise ValueError(
            "Argument 'network_type' has an incorrect value: use 'FFNN', 'CNN', 'ENC_CNN'")

    if (self.usage != 'hyper_tuning' and self.usage != 'model_testing'):
            raise ValueError(
            "Argument 'usage' has an incorrect value: use 'hyper_tuning', 'model_testing'")



  def data_process_features(self):

    assert not ( (self.X.isna().sum(axis=0)).any() )
    assert not ( (self.y.isna().sum(axis=0)).any() )

    if self.X_processed.shape[0] == 0:

        # process data
      for col in sorted(self.X.columns):

        if self.X[col].dtype == object:

            # one hot encoding 
            # (resulting matrix is stored in an efficient way since it is sparse)
          X = ( csr_matrix( 
                      self.onehot_encoder
                      .fit_transform( 
                          self.label_encoder
                          .fit_transform(self.X[col])
                          .reshape(X.shape[0], 1) )))
              
          if self.X_processed.shape[0] == 0:
            self.X_processed = X
          else:
            assert (self.X_processed.shape[0] == X.shape[0])
            self.X_processed = hstack((self.X_processed, X))

        elif self.X[col].dtype == np.float64:

              # robust scaling of data, which might not be normally distributed
              # (resulting matrix is stored in an efficient way since it is sparse)
          X = csr_matrix( self.robust_scaler
                              .fit_transform(
                                  self.X[col].values.reshape(-1,1)
                              ) )
              
          if self.X_processed.shape[0] == 0:
            self.X_processed = X
          else:
            assert(self.X_processed.shape[0] == X.shape[0])
            self.X_processed = hstack((self.X_processed, X))

    self.X = []

      # change compressed matrix format for allowing slicing
    self.X_processed = self.X_processed.tocsr()
    

  def data_process_tfidf_doc_embedding(self):
    self.X_processed = self.vectorizer.fit_transform(self.X['keywords'].values.astype('U'))

    self.X = []


  def tag_docs(self):
    for i, line in enumerate(self.X):
      tokens = gensim.utils.simple_preprocess(line)
      yield gensim.models.doc2vec.TaggedDocument(tokens, [i])

  def data_process_doc2vec_embedding(self):
    # retrieve the word context with skipgram
    X = list(self.tag_docs())

    model = gensim.models.doc2vec.Doc2Vec(vector_size=50, min_count=1, epochs=50, dbow_words=0, window=5) #max_vocab_size
    model.build_vocab(X)
    model.train(X, total_examples=model.corpus_count, epochs=model.epochs)

    self.X_processed = csr_matrix( [ np.array( model.infer_vector( str(x).split()), dtype=float) for x in tqdm( self.X['commentBody'],
                                                                                     desc='Vectorising Documents' ) ] ).tocsr() Documents' ) ] )

    self.X = []


  def split_data(self):

    assert (self.X_processed.shape[0] == self.y.shape[0])
    self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(self.X_processed, self.y,
                                                      test_size=self.test_size, 
                                                      shuffle=False) 
    del self.X_processed
    del self.y


    if self.usage == 'hyper_tuning':
      assert (self.X_train.shape[0] == self.y_train.shape[0])

      del self.X_test
      del self.y_test

      self.X_train, self.X_val, self.y_train, self.y_val = train_test_split(self.X_train, self.y_train,
                                                      test_size=self.validation_size, 
                                                      shuffle=False) 
      
  def process(self):

    if self.network_type == 'FFNN':
      self.data_process_features()

    elif self.network_type == 'CNN':
      self.data_process_tfidf_doc_embedding()

    elif self.network_type == 'ENC_CNN':
      self.data_process_doc2vec_embedding()

    self.split_data()
    
    if self.usage == 'model_testing':
      return self.X_train, self.y_train, self.X_test, self.y_test
    if self.usage == 'hyper_tuning':
      return self.X_train, self.y_train, self.X_val, self.y_val


class Dataset_Wrap(Dataset):
  def __init__(self, X, y, network_type):
    self.X = X
    self.y = y
    self.network_type = network_type

  def __len__(self):
    return (self.X.shape[0])  

  def __getitem__(self, i):
    assert not ( self.X.shape[0] == 0 )
      # return corresponding data in the sparse format
    data = ( torch.tensor(self.X[i].todense()) ).reshape(-1)
    
    if self.network_type != 'FFNN':
      data = torch.reshape(data, (1, len(data)))

      
    labels = torch.tensor(self.y.values[i].astype(int))
    y_1, y_2, y_3 = labels
          
    return (data.to(device), y_1.to(device), y_2.to(device), y_3.to(device))




Remove userLocation and inline due to RAM problems in processing them since they have too many features

In [7]:
def build_DataLoader(
    data_path, 
    network_type, 
    usage, 
    test_size=0.25, 
    validation_size=0.15,
    batch_size = 100):

  X, y = Data_Load(
        data_path=data_path,
        network_type=network_type).load()
  
  X_train, y_train, X_test, y_test = Data_Preprocess(
      X=X,
      y=y, 
      network_type=network_type,
      usage=usage, 
      test_size=test_size, 
      validation_size=validation_size).process()
  
  train_wrap = Dataset_Wrap(X_train, y_train, network_type=network_type)
  test_wrap = Dataset_Wrap(X_test, y_test, network_type=network_type)
  loader_train = DataLoader(dataset = train_wrap, batch_size = batch_size, shuffle=True)             
  loader_test = DataLoader(dataset = test_wrap, batch_size = batch_size*2, shuffle=False)     

  return  loader_train, loader_test
  #return  X_test, y_test

## FUNCTIONS SETUP

In [8]:
pip install optuna

Note: you may need to restart the kernel to use updated packages.


In [9]:
from sklearn.metrics import f1_score

def F1(output, target):
  pred = torch.argmax(output, dim=1)
  return f1_score(pred.cpu().detach().numpy(), target.cpu().detach().numpy(), average='weighted')

In [62]:
import optuna
import torch.nn as nn
#import thop
import torch.optim as optim
import pickle
import re

import sqlite3
from sqlalchemy import create_engine
engine = create_engine('sqlite:///SA_optuna_tuning.db')


class Param_Search():

  def __init__(self, 
               model, 
               train_loader, 
               test_loader,
               criterion,
               num_epochs,
               study_name,
               n_trials=4,
               ):
    self.model = model
    self.train_loader = train_loader
    self.test_loader = test_loader
    self.criterion = criterion
    self.num_epochs = num_epochs
    self.study_name = study_name
    self.n_trials = n_trials
    self.best_model = None

  def objective(self, trial):

    # Generate the model.
    model = self.model

    # Generate the optimizers.
    optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "RMSprop"])
    lr = trial.suggest_loguniform("lr", 1e-5, 1e-1)
    optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=lr)

    model = model.double()

    
    for epoch in tqdm(range(1, self.num_epochs + 1), desc='Epochs'):
      train_loss = 0.0
      test_loss = 0.0
      f1_test = 0.0
    
      model.train()
      for data, target1, target2, target3 in tqdm(self.train_loader, desc='Passing dataset'):
        
        # clear-the-gradients-of-all-optimized-variables
        optimizer.zero_grad()
        # forward-pass: compute-predicted-outputs-by-passing-inputs-to-the-model
        output1, output2, output3 = model(data.double())
        # calculate-the-batch-loss
        loss = self.criterion(output1, target1) + self.criterion(output2, target2) + self.criterion(output3, target3)
        # backward-pass: compute-gradient-of-the-loss-wrt-model-parameters
        loss.backward()
        # perform-a-ingle-optimization-step (parameter-update)
        optimizer.step()
        # update-training-loss
        train_loss += loss.item()
        
    # validate-the-model
      model.eval()
      for data, target1, target2, target3 in self.test_loader:  
        output1, output2, output3 = model(data.double())
        loss = self.criterion(output1, target1) + self.criterion(output2, target2) + self.criterion(output3, target3)
        # update-average-validation-loss 
        test_loss += loss.item()
        f1_test += (( F1(output1,target1) + F1(output2,target2) + F1(output3,target3) )/3)*len(target1)

      f1_test /= len(self.test_loader)
    # moltiplica x label e dividi per test o train loader
    
      trial.report(f1_test, epoch)

    # save all models
    with open("{}.pickle".format(trial.number), "wb") as fout:
      pickle.dump(model, fout)
        
    return f1_test



  def run_trial(self):
    # use sqlite backend to save the study
    study = optuna.create_study(study_name=self.study_name, direction="maximize", 
                                storage='sqlite:///SA_optuna_tuning.db', load_if_exists=True)

    study.optimize(self.objective, n_trials=self.n_trials)
    pruned_trials = [t for t in study.trials if t.state == optuna.structs.TrialState.PRUNED]
    complete_trials = [t for t in study.trials if t.state == optuna.structs.TrialState.COMPLETE]

    print("Study statistics: ")
    print("  Number of finished trials: ", len(study.trials))
    print("  Number of pruned trials: ", len(pruned_trials))
    print("  Number of complete trials: ", len(complete_trials))

    print("Best trial:")
    trial = study.best_trial

    print("  Value: ", trial.value)

    print("  Params: ")
    for key, value in trial.params.items():
      print("    {}: {}".format(key, value))
    
    with open('PROJECT_SA/params_best_model_FFNN_hp', 'wb') as f:
      pickle.dump(trial.params, f)
# moltiplica per il batch size!
    with open("{}.pickle".format(study.best_trial.number), "rb") as fin:
      best_model = pickle.load(fin)
    
    # store only best model
    self.best_model = best_model

  def save_best_model(self, path):
    # remove last layers of the model
    model_param = self.best_model.state_dict()
    
    for key,value in model_param.copy().items():
      if re.findall('single', key):
        del model_param[str(key)]

    # save model with best hyperparameters on google drive
    basepath = 'PROJECT_SA'
    path = os.path.join(basepath, path)

    torch.save(model_param, path)

    return model_param

In [63]:
from tqdm.auto import tqdm

def fit(model, train_loader, test_loader, criterion, optimizer, num_epochs, filename_path, verbose=True): 

  basepath = 'PROJECT_SA'

  # keeping track of losses 
  train_losses = []
  test_losses = []
  f1_train_scores = []
  f1_test_scores = []

  model = model.double()

  for epoch in tqdm(range(1, num_epochs + 1), desc='Epochs'):
    # keep track of training and validation loss
    train_loss = 0.0
    test_loss = 0.0
    
    f1_train = 0.0
    f1_test = 0.0
    
    PATH = os.path.join(basepath, filename_path)
    
    if os.path.exists(PATH):
      checkpoint = torch.load(PATH)
      model.load_state_dict(checkpoint)
        
    else:
      model.train()
      for data, target1, target2, target3 in tqdm(train_loader, desc='Training model'):
        
        # clear-the-gradients-of-all-optimized-variables
        optimizer.zero_grad()
        # forward-pass: compute-predicted-outputs-by-passing-inputs-to-the-model
        output1, output2, output3 = model(data.double())
        # calculate-the-batch-loss
        loss = criterion(output1, target1) + criterion(output2, target2) + criterion(output3, target3)
        # backward-pass: compute-gradient-of-the-loss-wrt-model-parameters
        loss.backward()
        # perform-a-ingle-optimization-step (parameter-update)
        optimizer.step()
        # update-training-loss
        train_loss += loss.item()

        f1_train += (( F1(output1,target1) + F1(output2,target2) + F1(output3,target3) )/3)*len(target1)

        
    # validate-the-model
    model.eval()
    for data, target1, target2, target3 in tqdm(test_loader, desc='Testing model'):
      
      output1, output2, output3 = model(data.double())
        
      loss = criterion(output1, target1) + criterion(output2, target2) + criterion(output3, target3)
        
        # update-average-validation-loss 
      test_loss += loss.item()
          
      f1_test += (( F1(output1,target1) + F1(output2,target2) + F1(output3,target3) ) /3)*len(target1)
    
    # append losses
    train_losses.append(train_loss)
    test_losses.append(test_loss)

    f1_train /= len(train_loader)
    f1_test /= len(test_loader)
    
    f1_train_scores.append(f1_train)    
    f1_test_scores.append(f1_test)
        
    # save the model at each epoch
    model_param = model.state_dict()
    path = os.path.join(basepath, filename_path)
    torch.save(model_param, path)
      
    # print-training/validation-statistics 
    if verbose == True:
      print('Epoch: {} \tTraining Loss: {:.6f} \tTest Loss: {:.6f} \tTraining F1 score: {:.6f} \tTest F1 score: {:.6f}'.format(
      epoch, train_loss, test_loss, f1_train, f1_test))
      
      


  # return the scores at each epoch
  return f1_test_scores, f1_train_scores

In [12]:
# load pre-trained model from google drive and update the current model

def load_model(model, path):
  basepath = 'PROJECT_SA'
  path = os.path.join(basepath, path)
  checkpoint = torch.load(path)
  model.load_state_dict(checkpoint)
  model.eval() # to set dropout and batch normalization layers to evaluation 

In [155]:
for key,value in model.state_dict().copy().items():
    print(key)

module.layer1.0.weight
module.layer1.0.bias
module.layer1.2.weight
module.layer1.2.bias
module.single_layer2_1.0.weight
module.single_layer2_1.0.bias
module.single_layer2_2.0.weight
module.single_layer2_2.0.bias
module.single_layer2_3.0.weight
module.single_layer2_3.0.bias
module.single_last_layer1.weight
module.single_last_layer1.bias
module.single_last_layer2.weight
module.single_last_layer2.bias
module.single_last_layer3.weight
module.single_last_layer3.bias


In [13]:
# to save the best tested model 

def save_best_model(model, path):
    model_param = model.state_dict()
    for key,value in model_param.copy().items():
      if re.findall('single', key):
        del model_param[str(key)]

    basepath = 'PROJECT_SA'
    path = os.path.join(basepath, path)

    torch.save(model_param, path)

## 1. FEED FORWARD NN

In [15]:
import torch.nn as nn

class FFNN_multitask(nn.Module):
  def __init__(self):
    super(FFNN_multitask, self).__init__()
 # batch_size * input variables # [500, 261]
    
    self.layer1 = nn.Sequential(
        nn.Linear(261, 4000), # 500x261 and 130500x4000
        nn.ReLU(),
        nn.Linear(4000, 2000),
        nn.ReLU()) 

    self.single_layer2_1 = nn.Sequential(
    nn.Linear(2000, 1000),
    nn.ReLU())

    self.single_layer2_2 = nn.Sequential(
    nn.Linear(2000, 1000),
    nn.ReLU())

    self.single_layer2_3 = nn.Sequential(
    nn.Linear(2000, 1000),
    nn.ReLU())

    self.single_last_layer1 = nn.Linear(1000, 2) 
    self.single_last_layer2 = nn.Linear(1000, 2)
    self.single_last_layer3 = nn.Linear(1000, 2)  

    self.drop_out1 = nn.Dropout(p=0.3)
    self.drop_out2 = nn.Dropout(p=0.4) 

  def forward(self, x):
      
      # first block in common
    out = self.layer1(x)
    out = self.drop_out1(out)

      # single blocks for each label
    out1 = self.single_layer2_1(out)
    out1 = self.drop_out2(out1)
    out1 = self.single_last_layer1(out1)

    out2 = self.single_layer2_2(out) 
    out2 = self.drop_out2(out2)
    out2 = self.single_last_layer2(out2)

    out3 = self.single_layer2_3(out) 
    out3 = self.drop_out2(out3)
    out3 = self.single_last_layer3(out3)
 
    return out1, out2, out3

### Hyperparameters Tuning

In [16]:
train_loader, test_loader = build_DataLoader(
        data_path='dataset_cleansed_version.csv',
        network_type='FFNN', 
        usage='hyper_tuning'
    )

In [17]:
model=FFNN_multitask()

if torch.cuda.device_count() >= 1:
  print("Let's use", torch.cuda.device_count(), "GPUs!")
  model = nn.DataParallel(model)

model.to(device)

Let's use 1 GPUs!


DataParallel(
  (module): FFNN_multitask(
    (layer1): Sequential(
      (0): Linear(in_features=261, out_features=4000, bias=True)
      (1): ReLU()
      (2): Linear(in_features=4000, out_features=2000, bias=True)
      (3): ReLU()
    )
    (single_layer2_1): Sequential(
      (0): Linear(in_features=2000, out_features=1000, bias=True)
      (1): ReLU()
    )
    (single_layer2_2): Sequential(
      (0): Linear(in_features=2000, out_features=1000, bias=True)
      (1): ReLU()
    )
    (single_layer2_3): Sequential(
      (0): Linear(in_features=2000, out_features=1000, bias=True)
      (1): ReLU()
    )
    (single_last_layer1): Linear(in_features=1000, out_features=2, bias=True)
    (single_last_layer2): Linear(in_features=1000, out_features=2, bias=True)
    (single_last_layer3): Linear(in_features=1000, out_features=2, bias=True)
    (drop_out1): Dropout(p=0.3, inplace=False)
    (drop_out2): Dropout(p=0.4, inplace=False)
  )
)

In [18]:
num_epochs = 30
criterion = nn.CrossEntropyLoss()

In [19]:
param_search = Param_Search(model, train_loader, test_loader,
            criterion, num_epochs, n_trials=1, study_name='hp_FFNN')

param_search.run_trial()

[32m[I 2021-05-20 14:29:07,007][0m A new study created in memory with name: no-name-12a14d71-7d9f-428d-b49c-9ac725e6ed49[0m


Epochs:   0%|          | 0/30 [00:00<?, ?it/s]

Passing dataset:   0%|          | 0/13842 [00:00<?, ?it/s]

Passing dataset:   0%|          | 0/13842 [00:00<?, ?it/s]

Passing dataset:   0%|          | 0/13842 [00:00<?, ?it/s]

Passing dataset:   0%|          | 0/13842 [00:00<?, ?it/s]

Passing dataset:   0%|          | 0/13842 [00:00<?, ?it/s]

Passing dataset:   0%|          | 0/13842 [00:00<?, ?it/s]

Passing dataset:   0%|          | 0/13842 [00:00<?, ?it/s]

Passing dataset:   0%|          | 0/13842 [00:00<?, ?it/s]

Passing dataset:   0%|          | 0/13842 [00:00<?, ?it/s]

Passing dataset:   0%|          | 0/13842 [00:00<?, ?it/s]

Passing dataset:   0%|          | 0/13842 [00:00<?, ?it/s]

Passing dataset:   0%|          | 0/13842 [00:00<?, ?it/s]

Passing dataset:   0%|          | 0/13842 [00:00<?, ?it/s]

Passing dataset:   0%|          | 0/13842 [00:00<?, ?it/s]

Passing dataset:   0%|          | 0/13842 [00:00<?, ?it/s]

Passing dataset:   0%|          | 0/13842 [00:00<?, ?it/s]

Passing dataset:   0%|          | 0/13842 [00:00<?, ?it/s]

Passing dataset:   0%|          | 0/13842 [00:00<?, ?it/s]

Passing dataset:   0%|          | 0/13842 [00:00<?, ?it/s]

Passing dataset:   0%|          | 0/13842 [00:00<?, ?it/s]

Passing dataset:   0%|          | 0/13842 [00:00<?, ?it/s]

Passing dataset:   0%|          | 0/13842 [00:00<?, ?it/s]

Passing dataset:   0%|          | 0/13842 [00:00<?, ?it/s]

Passing dataset:   0%|          | 0/13842 [00:00<?, ?it/s]

Passing dataset:   0%|          | 0/13842 [00:00<?, ?it/s]

Passing dataset:   0%|          | 0/13842 [00:00<?, ?it/s]

Passing dataset:   0%|          | 0/13842 [00:00<?, ?it/s]

Passing dataset:   0%|          | 0/13842 [00:00<?, ?it/s]

Passing dataset:   0%|          | 0/13842 [00:00<?, ?it/s]

Passing dataset:   0%|          | 0/13842 [00:00<?, ?it/s]

[32m[I 2021-05-21 00:52:57,496][0m Trial 0 finished with value: 2.593005354483164 and parameters: {'optimizer': 'RMSprop', 'lr': 0.0003091193417317815}. Best is trial 0 with value: 2.593005354483164.[0m


Study statistics: 
  Number of finished trials:  1
  Number of pruned trials:  0
  Number of complete trials:  1
Best trial:
  Value:  2.593005354483164
  Params: 
    optimizer: RMSprop
    lr: 0.0003091193417317815


In [20]:
best_model_FFNN_hp = param_search.save_best_model('best_model_FFNN_hp.pt')

### Model Testing

In [None]:
train_loader, test_loader = build_DataLoader(
        data_path='dataset_cleansed_version.csv',
        network_type='FFNN', 
        usage='model_testing'
    )

In [None]:
model=FFNN_multitask()

if torch.cuda.device_count() > 1:
  print("Let's use", torch.cuda.device_count(), "GPUs!")
  model = nn.DataParallel(model)

model.to(device)

In [None]:
num_epochs = 30
criterion = nn.CrossEntropyLoss()

In [None]:
best_lr = 0.0003091193417317815
optimizer = optim.RMSprop(model.parameters(), lr=best_lr)

In [None]:
F1_train, F1_test = fit(model, train_loader, test_loader,
                        criterion, optimizer, num_epochs, filename_path='ffnn_testing.pt', verbose=True)

#save_best_model(model, 'best_model_FFNN_test.pt')

## 2. CONVOLUTIONAL NN 

In [14]:
import torch.nn as nn

class CNN_multitask(nn.Module):
  def __init__(self, fc_layer_size):
    super(CNN_multitask, self).__init__()
    self.fc_layer_size = fc_layer_size 
    
    self.layer1 = nn.Sequential(
            nn.Conv1d(1, 32, kernel_size=15, stride=2, padding=1), #The average word length in English language is 4.7 characters.
            nn.BatchNorm1d(32), 
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=10, stride=2))

    self.layer2 = nn.Sequential(
            nn.Conv1d(32, 32, kernel_size=15, stride=2, padding=1),
            nn.BatchNorm1d(32),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=10, stride=2))
    
    self.single_layer3_1 = nn.Sequential(
            nn.Conv1d(32, 64, kernel_size=15, stride=2, padding=1),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=10, stride=2))
    
    self.single_layer3_2 = nn.Sequential(
            nn.Conv1d(32, 64, kernel_size=15, stride=2, padding=1),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=10, stride=2))
    
    self.single_layer3_3 = nn.Sequential(
            nn.Conv1d(32, 64, kernel_size=15, stride=2, padding=1),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=10, stride=2))
        
    self.drop_out1 = nn.Dropout(p=0.3)
    self.drop_out2 = nn.Dropout(p=0.4)
    self.drop_out3 = nn.Dropout(p=0.5)
    
    self.single_last_layer1_1 = nn.Linear(self.fc_layer_size, 1000) 
    self.single_last_layer2_1 = nn.Linear(1000, 5)

    self.single_last_layer1_2 = nn.Linear(self.fc_layer_size, 1000) 
    self.single_last_layer2_2 = nn.Linear(1000, 5)

    self.single_last_layer1_3 = nn.Linear(self.fc_layer_size, 1000) 
    self.single_last_layer2_3 = nn.Linear(1000, 5)


  def forward(self, x):
      
      # first blocks in common
    out = self.layer1(x)
    out = self.drop_out1(out)
    out = self.layer2(out)
    out = self.drop_out2(out)

      # single blocks for each label
    out1 = self.single_layer3_1(out)
    out1 = self.drop_out3(out1)
    out1 = out1.reshape(out1.size(0), -1) 
    out1 = self.single_last_layer1_1(out1)
    out1 = self.single_last_layer2_1(out1)

    out2 = self.single_layer3_2(out) 
    out2 = self.drop_out2(out2)
    out2 = out2.reshape(out2.size(0), -1) 
    out2 = self.single_last_layer1_2(out2)
    out2 = self.single_last_layer2_2(out2)

    out3 = self.single_layer3_3(out) 
    out3 = self.drop_out3(out3)
    out3 = out3.reshape(out3.size(0), -1) 
    out3 = self.single_last_layer1_3(out3)
    out3 = self.single_last_layer2_3(out3)

 
    return out1, out2, out3

### Hyperparameters Tuning

In [15]:
train_loader, test_loader = build_DataLoader(
        data_path='dataset_cleansed_version.csv',
        network_type='CNN', 
        usage='hyper_tuning'
    )

In [132]:
for key,value in model.state_dict().copy().items():
    print(key)

layer1.0.weight
layer1.0.bias
layer1.1.weight
layer1.1.bias
layer1.1.running_mean
layer1.1.running_var
layer1.1.num_batches_tracked
layer2.0.weight
layer2.0.bias
layer2.1.weight
layer2.1.bias
layer2.1.running_mean
layer2.1.running_var
layer2.1.num_batches_tracked
single_layer3_1.0.weight
single_layer3_1.0.bias
single_layer3_1.1.weight
single_layer3_1.1.bias
single_layer3_1.1.running_mean
single_layer3_1.1.running_var
single_layer3_1.1.num_batches_tracked
single_layer3_2.0.weight
single_layer3_2.0.bias
single_layer3_2.1.weight
single_layer3_2.1.bias
single_layer3_2.1.running_mean
single_layer3_2.1.running_var
single_layer3_2.1.num_batches_tracked
single_layer3_3.0.weight
single_layer3_3.0.bias
single_layer3_3.1.weight
single_layer3_3.1.bias
single_layer3_3.1.running_mean
single_layer3_3.1.running_var
single_layer3_3.1.num_batches_tracked
single_last_layer1_1.weight
single_last_layer1_1.bias
single_last_layer2_1.weight
single_last_layer2_1.bias
single_last_layer1_2.weight
single_last_lay

In [18]:
model=CNN_multitask(fc_layer_size=7936)

if torch.cuda.device_count() >= 1:
  print("Let's use", torch.cuda.device_count(), "GPUs!")
  model = nn.DataParallel(model)

model.to(device)

CNN_multitask(
  (layer1): Sequential(
    (0): Conv1d(1, 32, kernel_size=(15,), stride=(2,), padding=(1,))
    (1): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool1d(kernel_size=10, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer2): Sequential(
    (0): Conv1d(32, 32, kernel_size=(15,), stride=(2,), padding=(1,))
    (1): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool1d(kernel_size=10, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (single_layer3_1): Sequential(
    (0): Conv1d(32, 64, kernel_size=(15,), stride=(2,), padding=(1,))
    (1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool1d(kernel_size=10, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (single_layer3_2): Sequential(
    (0): Conv1d(32, 64, kernel_size=(15,), stride=(2,), padding=(1,))
    (1): B

In [17]:
num_epochs = 30
criterion = nn.CrossEntropyLoss()

In [None]:
param_search = Param_Search(model, train_loader, test_loader,
            criterion, num_epochs, n_trials=1,study_name='hp_CNN')

param_search.run_trial()

[32m[I 2021-05-21 00:53:39,094][0m A new study created in memory with name: no-name-627df471-00cf-49db-829d-b091a86dd604[0m


Epochs:   0%|          | 0/30 [00:00<?, ?it/s]

Passing dataset:   0%|          | 0/13842 [00:00<?, ?it/s]

Passing dataset:   0%|          | 0/13842 [00:00<?, ?it/s]

Passing dataset:   0%|          | 0/13842 [00:00<?, ?it/s]

Passing dataset:   0%|          | 0/13842 [00:00<?, ?it/s]

Passing dataset:   0%|          | 0/13842 [00:00<?, ?it/s]

Passing dataset:   0%|          | 0/13842 [00:00<?, ?it/s]

In [None]:
best_model_CNN_hp = param_search.save_best_model('best_model_CNN_hp.pt')

### Model Testing

In [None]:
train_loader, test_loader = build_DataLoader(
        data_path='dataset_cleansed_version.csv',
        network_type='CNN', 
        usage='model_testing'
    )

In [None]:
model=CNN_multitask()

if torch.cuda.device_count() >= 1:
  print("Let's use", torch.cuda.device_count(), "GPUs!")
  model = nn.DataParallel(model)

model.to(device)

In [None]:
num_epochs = 50
criterion = nn.CrossEntropyLoss()

In [None]:
best_lr = 0.6363636255264282
optimizer = optim.RMSprop(model.parameters(), lr=best_lr)

In [None]:
F1_test = fit(model, loader_train, loader_test, 
                        criterion, optimizer, num_epochs, verbose=True)

save_best_model(model, 'best_model_CNN_test.pt')

## 3. EMBEDDING + CONVOLUTIONAL NN 

### Hyperparameters Tuning

In [None]:
train_loader, test_loader = build_DataLoader(
        data_path='dataset_cleansed_version.csv',
        network_type='ENC_CNN', 
        usage='hyper_tuning'
    )

In [None]:
model=CNN_multitask()

if torch.cuda.device_count() >= 1:
  print("Let's use", torch.cuda.device_count(), "GPUs!")
  model = nn.DataParallel(model)

model.to(device)

In [None]:
num_epochs = 50
criterion = nn.CrossEntropyLoss()

In [None]:
param_search = Param_Search(model, train_loader, test_loader,
            criterion, num_epochs, n_trials=2, study_name='hp_ENC_CNN')

param_search.run_trial()
best_model_ENC_CNN_hp = param_search.save_best_model('best_model_ENC_CNN_hp.pt')

### Model Testing

In [None]:
train_loader, test_loader = build_DataLoader(
        data_path='dataset_cleansed_version.csv',
        network_type='ENC_CNN', 
        usage='model_testing'
    )

In [None]:
model=CNN_multitask()

if torch.cuda.device_count() >= 1:
  print("Let's use", torch.cuda.device_count(), "GPUs!")
  model = nn.DataParallel(model)

model.to(device)

In [None]:
num_epochs = 50
criterion = nn.CrossEntropyLoss()

In [None]:
best_lr = 0.6363636255264282
optimizer = optim.RMSprop(model.parameters(), lr=best_lr)

In [None]:
F1_test = fit(model, loader_train, loader_test, 
                        criterion, optimizer, num_epochs, verbose=True)

save_best_model(model, 'best_model_ENC_CNN_test.pt')

## 4. MULTIMODAL NN

In [None]:
import optuna
import torch.nn as nn
#import thop
import torch.optim as optim
import pickle
import re

class Param_Search_Multimodal():

  def __init__(self, 
               model, 
               train_loader, 
               test_loader,
               criterion,
               num_epochs,
               n_trials=4,
               study_name):
    self.model = model
    self.train_loader = train_loader
    self.test_loader = test_loader
    self.criterion = criterion
    self.num_epochs = num_epochs
    self.n_trials = n_trials
    self.study_name = study_name
    self.best_model = None

  def objective(self, trial):

    # Generate the model.
    model = self.model

    # Generate the optimizers.
    optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "RMSprop"])
    lr = trial.suggest_loguniform("lr", 1e-5, 1e-1)
    optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=lr)

    model = model.double()
    
    for epoch in tqdm(range(1, num_epochs + 1)):
    # keep-track-of-training-and-validation-loss
      train_loss = 0.0
      test_loss = 0.0
      f1_test = 0.0
    
    # training-the-model
      model.train()
      for load1, load2, load3 in tqdm(self.train_loaders, desc='Passing dataset'):
        x_1, target1, target2, target3 = load1
        x_2, _, _, _ = load2
        x_3, _, _, _ = load3

        # clear-the-gradients-of-all-optimized-variables
        optimizer.zero_grad()
        # forward-pass: compute-predicted-outputs-by-passing-inputs-to-the-model
        output1, output2, output3 = model([x_1.double(), x_2.double(), x_3.double()])
        # calculate-the-batch-loss
        loss = self.criterion(output1, target1) + self.criterion(output2, target2) + self.criterion(output3, target3)
        # backward-pass: compute-gradient-of-the-loss-wrt-model-parameters
        loss.backward()
        # perform-a-ingle-optimization-step (parameter-update)
        optimizer.step()
        # update-training-loss
        train_loss += loss.item() 
            
        
    # validate-the-model
      model.eval()
      for load1, load2, load3 in self.test_loaders:
        x_1, target1, target2, target3 = load1
        x_2, _,_,_ = load2
        x_3, _,_,_ = load3

        output1, output2, output3 = model([x_1.double(), x_2.double(), x_3.double()]
        loss = self.criterion(output1, target1) + self.criterion(output2, target2) + self.criterion(output3, target3)
        # update-average-validation-loss 
        test_loss += loss.item() 

        f1_test = (( F1(output1,target1) + F1(output2,target2) + F1(output3,target3) )/3)*len(target1)

      f1_test /= len(self.test_loader)
    
      trial.report(f1_test, epoch)

    # save all models
    with open("{}.pickle".format(trial.number), "wb") as fout:
      pickle.dump(model, fout)
        
    return f1_test



  def run_trial(self):

    # use sqlite backend to save the study
    study = optuna.create_study(study_name=self.study_name, direction="maximize", 
                                storage='sqlite:///SA_optuna_tuning.db', load_if_exists=True)

    study.optimize(self.objective, n_trials=self.n_trials)
    pruned_trials = [t for t in study.trials if t.state == optuna.structs.TrialState.PRUNED]
    complete_trials = [t for t in study.trials if t.state == optuna.structs.TrialState.COMPLETE]

    print("Study statistics: ")
    print("  Number of finished trials: ", len(study.trials))
    print("  Number of pruned trials: ", len(pruned_trials))
    print("  Number of complete trials: ", len(complete_trials))

    print("Best trial:")
    trial = study.best_trial

    print("  Value: ", trial.value)

    print("  Params: ")
    for key, value in trial.params.items():
      print("    {}: {}".format(key, value))
    
    with open('PROJECT_SA/params_best_model_FFNN_hp', 'wb') as f:
      pickle.dump(trial.params, f)

    with open("{}.pickle".format(study.best_trial.number), "rb") as fin:
      best_model = pickle.load(fin)
    
    # store only best model
    self.best_model = best_model

  def save_best_model(self, path):
    # remove last layers of the model
    model_param = self.best_model.state_dict()
    
    for key,value in model_param.copy().items():
      if re.findall('single', key):
        del model_param[str(key)]

    # save model with best hyperparameters on google drive
    basepath = 'PROJECT_SA'
    path = os.path.join(basepath, path)

    torch.save(model_param, path)

    return model_param



In [None]:
from tqdm.auto import tqdm

def fit_multimodal(
    model, 
    train_loaders, 
    test_loaders, 
    criterion, 
    optimizer, 
    num_epochs,
    filename_path,
    verbose=True
        ): 

# keeping-track-of-losses 
  train_losses = []
  test_losses = []
  f1_train_scores = []
  f1_test_scores = []

  model = model.double()

  for epoch in tqdm(range(1, num_epochs + 1)):
    # keep-track-of-training-and-validation-loss
    train_loss = 0.0
    test_loss = 0.0
    
    f1_train = 0.0
    f1_test = 0.0
      
    PATH = os.path.join(basepath, filename_path)
    
    if os.path.exists(PATH):
      checkpoint = torch.load(PATH)
      model.load_state_dict(checkpoint)

    else:
    # training-the-model
      model.train()
      for load1, load2, load3 in tqdm(train_loaders, desc='Training model'):
        x_1, target1, target2, target3 = load1
        x_2, _,_,_ = load2
        x_3, _,_,_ = load3

       #  assert label_left == label_central and label_left == label_right

        # clear-the-gradients-of-all-optimized-variables
        optimizer.zero_grad()
        # forward-pass: compute-predicted-outputs-by-passing-inputs-to-the-model
        output1, output2, output3 = model([x_1.double(), x_2.double(), x_3.double()])
        # calculate-the-batch-loss
        loss = criterion(output1, target1) + criterion(output2, target2) + criterion(output3, target3)
        # backward-pass: compute-gradient-of-the-loss-wrt-model-parameters
        loss.backward()
        # perform-a-ingle-optimization-step (parameter-update)
        optimizer.step()
        # update-training-loss
        train_loss += loss.item() 

        f1_train = (( F1(output1,target1) + F1(output2,target2) + F1(output3,target3) )/3)*len(target1)
          #accuracy_train = accuracy(output, target)

        
    # validate-the-model
    model.eval()
    for load1, load2, load3 in tqdm(test_loaders, desc='Testing model'):
      x_1, target1, target2, target3 = load1
      x_2, _,_,_ = load2
      x_3, _,_,_ = load3

      #  assert label_left == label_central and label_left == label_right
      output1, output2, output3 = model([x_1.double(), x_2.double(), x_3.double()])
      loss = criterion(output1, target1) + criterion(output2, target2) + criterion(output3, target3)
      # update-average-validation-loss 
      test_loss += loss.item() 
      f1_test = (( F1(output1,target1) + F1(output2,target2) + F1(output3,target3) )/3)*len(target1)
  

    # calculate-average-losses
    train_losses.append(train_loss)
    test_losses.append(test_loss)
    
    f1_train /= len(train_loader)
    f1_test /= len(test_loader)
    
    f1_train_scores.append(f1_train)    
    f1_test_scores.append(f1_test)
    
    # save the model at each epoch
    model_param = model.state_dict()
    path = os.path.join(basepath, filename_path)
    torch.save(model_param, path)

    # print-training/validation-statistics 
    if verbose == True:
      print('Epoch: {} \tTraining Loss: {:.6f} \tTest Loss: {:.6f} \tTraining F1 Score : {:.6f} \tTest F1 Score: {:.6f}'.format(
      epoch, train_loss, test_loss, f1_train, f1_test))

  # return the scores at each epoch
  return f1_test_scores, f1_train_scores

In [None]:
import torch.nn as nn

class FFNN_multitask_pre(nn.Module):
  def __init__(self):
    super(FFNN_multitask, self).__init__()
 # batch_size * input variables # [500, 261]
    
    self.layer1 = nn.Sequential(
        nn.Linear(261, 4000), # 500x261 and 130500x4000
        nn.ReLU(),
        nn.Linear(4000, 2000),
        nn.ReLU()) 

    self.drop_out1 = nn.Dropout(p=0.3)

  def forward(self, x):
      
      # first block in common
    out = self.layer1(x)
    out = self.drop_out1(out)
    out = out.reshape(out.size(0), -1) 
 
    return out

In [None]:
import torch.nn as nn

class CNN_multitask_pre(nn.Module):
  def __init__(self):
    super(CNN_multitask, self).__init__()
    
    self.layer1 = nn.Sequential(
            nn.Conv2d(2, 32, kernel_size=5, stride=1, padding=2), #The average word length in English language is 4.7 characters.
            nn.BatchNorm2d(32), 
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2))

    self.layer2 = nn.Sequential(
            nn.Conv2d(32, 32, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2))
        
    self.drop_out1 = nn.Dropout(p=0.3)
    self.drop_out2 = nn.Dropout(p=0.4)


  def forward(self, x):
      
      # first blocks in common
    out = self.layer1(x)
    out = self.drop_out1(out)
    out = self.layer2(out)
    out = self.drop_out2(out)
    out = out.reshape(out.size(0), -1) 
 
    return out

In [None]:
import torch.nn as nn
import torch.optim as optim


class MultimodalMultitask_NN(nn.Module):
  def __init__(self, device, n_classes, hyperparameters_tuning=False, args=None):
    super(MultimodalMultitask_NN, self).__init__()
    #Like in other object-oriented languages, it allows you to call 
    #methods of the superclass in your subclass. The primary use case of 
    #this is to extend the functionality of the inherited method.
    
    # input parameters
    self.device = device
    self.n_classes = n_classes
    self.args = args
    self.hyperparameters_tuning = hyperparameters_tuning

    
    # VGG convolutional neural network
    self.FFNN_multitask = FFNN_multitask_pre()
    self.CNN_multitask = CNN_multitask_pre()
    self.ENC_CNN_multitask = ENC_CNN_multitask_pre()

    # load previously trained models to find optimal hyperparameters
    if self.hyperparameters_tuning:
      load_model(self.FFNN_multitask, 'best_model_FFNN_hp.pt')
      load_model(self.CNN_multitask, 'best_model_CNN_hp.pt')
      load_model(self.ENC_CNN_multitask, 'best_model_ENC_CNN_hp.pt')
    
    # load previously trained models for final testing
    else:
      load_model(self.FFNN_multitask, 'best_model_FFNN_test.pt')
      load_model(self.CNN_multitask, 'best_model_CNN_test.pt')
      load_model(self.ENC_CNN_multitask, 'best_model_ENC_CNN_test.pt')

    # freeze layers
    for param in self.FFNN_multitask.parameters():
      param.requires_grad = False
    for param in self.CNN_multitask.parameters():
      param.requires_grad = False
    for param in self.ENC_CNN_multitask.parameters():
      param.requires_grad = False
      
    #self.pre_output_size = (5*64*16) #5120

    # post concat layers

    self.post_layer1 = nn.Sequential(
        nn.Linear(261, 4000), # 500x261 and 130500x4000
        nn.ReLU()) 

    self.single_post_layer2_1 = nn.Sequential(
    nn.Linear(2000, 1000),
    nn.ReLU())

    self.single_post_layer2_2 = nn.Sequential(
    nn.Linear(2000, 1000),
    nn.ReLU())

    self.single_post_layer2_3 = nn.Sequential(
    nn.Linear(2000, 1000),
    nn.ReLU())

    self.single_post_last_layer1 = nn.Linear(1000, 2) 
    self.single_post_last_layer2 = nn.Linear(1000, 2)
    self.single_post_last_layer3 = nn.Linear(1000, 2)  

    self.drop_out1 = nn.Dropout(p=0.3)
    self.drop_out2 = nn.Dropout(p=0.4) 

  
  def forward(self, x):

    x_1, x_2, x_3 = x

    out_1 = self.FFNN_multitask(x_1)
    out_2 = self.CNN_multitask(x_2)
    out_3 = self.ENC_CNN_multitask(x_3)

    
    # concat layer
    out = torch.cat((out_1, out_2, out_3), dim=1)

    # final layer in common
    out = self.post_layer1(out)
    out = self.drop_out1(out)

    # final single layers
    out1 = self.single_post_layer2_1(out)
    out1 = self.drop_out2(out1)
    out1 = self.single_post_last_layer1(out1)

    out2 = self.single_post_layer2_2(out)
    out2 = self.drop_out2(out2)
    out2 = self.single_post_last_layer2(out2)

    out3 = self.single_post_layer2_3(out)
    out3 = self.drop_out2(out3)
    out3 = self.single_post_last_layer3(out3)

    # output softmax
    return out1, out2, out3
    
   # return nn.functional.log_softmax(output, dim=-1) #not needed since it's already applied
   #by cross-entropy loss

### Hyperparameters tuning

In [None]:
train_loader_1, test_loader_1 = build_DataLoader(
        data_path='dataset_cleansed_version.csv',
        network_type='FFNN', 
        usage='hyper_tuning'
    )

train_loader_2, test_loader_2 = build_DataLoader(
        data_path='dataset_cleansed_version.csv',
        network_type='CNN', 
        usage='hyper_tuning'
    )

train_loader_3, test_loader_3 = build_DataLoader(
        data_path='dataset_cleansed_version.csv',
        network_type='ENC_CNN', 
        usage='hyper_tuning'
    )

In [None]:
model=MultimodalMultitask_NN(fc_layer_size=3840)

if torch.cuda.device_count() >= 1:
  print("Let's use", torch.cuda.device_count(), "GPUs!")
  model = nn.DataParallel(model)

model.to(device)

In [None]:
num_epochs = 1
criterion = nn.CrossEntropyLoss()

In [None]:
param_search = Param_Search_Multimodal(
    model, 
    zip(train_loader_1, train_loader_2, train_loader_3), 
    zip(test_loader_1, test_loader_2, test_loader_3),
    criterion, 
    num_epochs, 
    n_trials=1,
    study_name='hp_MM'
    )

param_search.run_trial()
best_model_MM_hp = grid_search.save_best_model('best_model_MM_hp.pt', network_type='MM')

### Model Testing

In [None]:
train_loader_1, test_loader_1 = build_DataLoader(
        data_path='dataset_cleansed_version.csv',
        network_type='FFNN', 
        usage='model_testing'
    )

train_loader_2, test_loader_2 = build_DataLoader(
        data_path='dataset_cleansed_version.csv',
        network_type='CNN', 
        usage='model_testing'
    )

train_loader_3, test_loader_3 = build_DataLoader(
        data_path='dataset_cleansed_version.csv',
        network_type='ENC_CNN', 
        usage='model_testing'
    )

In [None]:
num_epochs = 1
criterion = nn.CrossEntropyLoss()

In [54]:
import sqlite3
engine = create_engine('sqlite:///SA_optuna_tuning.db')

study_name = 'example-study'  # Unique identifier of the study.
study = optuna.create_study(study_name=study_name, storage='sqlite:///SA_optuna_tuning.db', load_if_exists=True))