<a href="https://www.kaggle.com/code/vincemarcs/mvsa-fusion-models-ml?scriptVersionId=101138015" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [1]:
SEED = 61

import os
import re
import gc
import h5py
import torch
import pickle
import numpy as np
import pandas as pd
import tensorflow as tf
import random as python_random
import tensorflow_addons as tfa
import matplotlib.pyplot as plt

from tqdm import tqdm
from nltk import tokenize
from IPython.display import display_html
from imblearn.over_sampling import SMOTE
from imblearn.over_sampling import BorderlineSMOTE
from transformers import BertTokenizer, BertForMaskedLM, BertModel
from tensorflow.python.keras.layers import Layer, InputSpec, Lambda

from sklearn.preprocessing import LabelEncoder
from sklearn.decomposition import PCA
from sklearn.model_selection import RepeatedKFold, KFold
from sklearn.model_selection import cross_val_score
from sklearn.metrics import f1_score, accuracy_score, confusion_matrix, ConfusionMatrixDisplay, classification_report

from keras import backend as K
from keras import initializers,regularizers,constraints
from keras.preprocessing.text import Tokenizer, text_to_word_sequence
from keras.preprocessing.sequence import pad_sequences
from keras.utils.np_utils import to_categorical
from keras.layers import Reshape, Input, Embedding, Flatten, Dense, Dropout, BatchNormalization, Activation #, merge
from keras.layers import TimeDistributed, LSTM, GRU, Bidirectional, Convolution1D, MaxPooling1D, MaxPooling2D
from keras.layers.core import RepeatVector #, Reshape
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from keras.models import Sequential, Model, load_model

def reset_seeds():
    np.random.seed(SEED) 
    python_random.seed(SEED)
    tf.random.set_seed(SEED)
    os.environ["PYTHONHASHSEED"] = str(SEED)

# from tensorflow.keras import Model
# from attention import Attention_input1, Attention_input2
# from keras.optimizers import SGD, RMSprop, Adagrad

In [2]:
def read_hdf5(path):
    read_file = h5py.File(path, 'r')
    
    feature_names = list(read_file.keys())
    loaded_data = []
    
    for name in feature_names:
        dataset = read_file[name][:]
        if dataset.dtype == np.dtype('object'):
            dataset = np.array([x.decode('UTF-8') for x in dataset])            
        loaded_data.append((name, dataset))

    return loaded_data

def loadz(path):
    data = np.load(path)['arr_0']
    return data

In [3]:
def load_labels(path):
    data = read_hdf5(path)

    for x in data:
        if x[0] == 'multimodal-labels':
            labels = x[1]
        if x[0] == 'text-labels':
            text_labels = x[1]
        if x[0] == 'image-labels':
            image_labels = x[1]
        
    return labels, text_labels, image_labels

def merge_mvsa(mvsa_single, mvsa_multiple):
    mvsa = np.concatenate((mvsa_single, mvsa_multiple), axis=0)
    return mvsa

def load_mvsa_feature(feature_name, merge=False):
    folder_path = os.path.join('../input/mvsa-features/', feature_name)
    single_file = 'mvsa-single-{}.npz'.format(feature_name)
    multiple_file = 'mvsa-multiple-{}.npz'.format(feature_name)
    mvsa_single = loadz(os.path.join(folder_path, single_file))
    mvsa_multiple = loadz(os.path.join(folder_path, multiple_file))
    
    if merge == True:
        return merge_mvsa(mvsa_single, mvsa_multiple)
    
    return mvsa_single, mvsa_multiple

In [4]:
def get_features(feature_names):
    mvsa_single_features = []
    mvsa_multiple_features = []

    for name in feature_names:
        
        name_split = name.split('-')
        textual = name_split[1]
        visual = name_split[0]
        
        if textual == 'bert':
            textual = 'bert-base'

        textual_features = load_mvsa_feature(textual)
        visual_features = load_mvsa_feature(visual)

        if 'pos' in name and 'ner' not in name:
            temp = []
            pos_features = load_mvsa_feature('pos-tfidf')
            temp.append(np.concatenate((textual_features[0], pos_features[0]), axis=1))
            temp.append(np.concatenate((textual_features[1], pos_features[1]), axis=1))
            textual_features = temp

        elif 'pos' not in name and 'ner' in name:
            temp = []
            ner_features = load_mvsa_feature('ner-tfidf')
            temp.append(np.concatenate((textual_features[0], ner_features[0]), axis=1))
            temp.append(np.concatenate((textual_features[1], ner_features[1]), axis=1))
            textual_features = temp

        elif 'pos' in name and 'ner' in name:
            temp = []
            pos_features = load_mvsa_feature('pos-tfidf')
            ner_features = load_mvsa_feature('ner-tfidf')
            temp.append(np.concatenate((textual_features[0], pos_features[0], ner_features[0]), axis=1))
            temp.append(np.concatenate((textual_features[1], pos_features[1], ner_features[1]), axis=1))
            textual_features = temp
    
        mvsa_single_features.append([textual_features[0], visual_features[0]])
        mvsa_multiple_features.append([textual_features[1], visual_features[1]])
        
    return mvsa_single_features, mvsa_multiple_features

In [5]:
# e.g. validation_split=0.1 -----> 8:1:1 ratio of train, val, test
def split_data(data, validation_split):
    num_val = int(validation_split * data.shape[0])
    data_train = data[:-(num_val*2)]
    data_val = data[-(num_val*2):-(num_val)]
    data_test = data[-num_val:]
    return data_train, data_val, data_test

In [6]:
def shuffle_mvsa(mvsa_features, labels):
    shuffled_features = []
    random_idx = np.random.permutation(len(labels))
    for i in range(len(mvsa_features)):
        x, y = mvsa_features[i][0][random_idx], mvsa_features[i][1][random_idx]
        shuffled_features.append([x, y])
    return shuffled_features, labels[random_idx]

def preprocess_inputs(X1, X2, y):
    y = le.fit_transform(y)
    y = to_categorical(np.asarray(y))
    
    X1_train, X1_val, X1_test = split_data(X1, VALIDATION_SPLIT)
    X2_train, X2_val, X2_test = split_data(X2, VALIDATION_SPLIT)
    y_train, y_val, y_test = split_data(y, VALIDATION_SPLIT)

    oversample = BorderlineSMOTE(sampling_strategy='minority', random_state=SEED, kind='borderline-2')
#     oversample = SMOTE(sampling_strategy='minority', random_state=SEED)
    X1_train, _ = oversample.fit_resample(X1_train, y_train)
    X2_train, y_train = oversample.fit_resample(X2_train, y_train)

    return {'texts': [X1_train, X1_val, X1_test], 'images': [X2_train, X2_val, X2_test], 'labels':[y_train, y_val, y_test]}

def get_preprocess_input(feature_names, mvsa_features, labels):
    mvsa_features_shuffled, labels_shuffled = shuffle_mvsa(mvsa_features, labels)
    mvsa_features_split = []
    for i in range(len(feature_names)):
        preprocess_splits = preprocess_inputs(mvsa_features_shuffled[i][0], mvsa_features_shuffled[i][1], labels_shuffled)
        mvsa_features_split.append(preprocess_splits)
    return mvsa_features_split

def process_dup(names):
    new_names = []
    for i in range(len(names)):
        count_dup = 0
        for j in range(0, i+1):
            if names[i] == names[j]:
                count_dup += 1
        if count_dup > 1:
            new_names.append(names[i] + '-' + str(count_dup))
        else:
            new_names.append(names[i])
    return new_names

In [7]:
def weighted_average(weights, probs):
    ''' Calculate the weighted average probability distribution from all input probs and its weights 
    weights: weights list (or array)
    probs: probability distributions array list
    '''
    output_probs = []
    weighted_probs = [probs[i] * weights[i] for i in range(len(weights))]
    for i in range(len(probs[0])):
        sum_prob = np.zeros(len(probs[0][0]))
        for j in range(len(weights)):
            sum_prob = np.sum((sum_prob, weighted_probs[j][i]), axis=0)
        output_probs.append(sum_prob)
    return np.asarray(output_probs, dtype='float32')

def get_average_weights(*scores, inverse=False):
    ''' Get the corresponding weight of each input score 
    inverse: (bool) get inverse weights value in case of the smaller score value, the bigger weight value (such as model loss)
    '''
    
    weights = []
    for score in scores:
        weights.append(score/np.sum(scores))
    
    if inverse == True:
        inverse_weights = []
        inverse = [1/weight for weight in weights]
        for inv in inverse:
            inverse_weights.append(inv/np.sum(inverse))
        weights = inverse_weights

    return weights

In [8]:
NUM_CLASSES = 3
f1_macro = tfa.metrics.F1Score(num_classes=NUM_CLASSES, average='macro', name='f1_macro')
f1_weighted = tfa.metrics.F1Score(num_classes=NUM_CLASSES, average='weighted', name='f1_weighted')
    
def create_model_text(input_shape, lstm=True):
    text_input = Input(shape=input_shape)
    dropout = Dropout(DROPOUT_INPUT) (text_input)
    if lstm == True:
        text_reshape = Reshape((1, -1)) (dropout)
        text_lstm = LSTM(NUM_LSTM) (text_reshape)
        dropout = Dropout(DROPOUT_LSTM) (text_lstm)
    outputs = Dense(NUM_CLASSES, activation='softmax') (dropout)
    model = Model(text_input, outputs)
    model.compile(optimizer=OPTIMIZER, loss=LOSS, metrics=['accuracy', f1_macro, f1_weighted])
    return model

def create_model_image(input_shape, lstm=True):
    image_input = Input(shape=input_shape)
    dropout = Dropout(0.2) (image_input)    
    if lstm == True:
        image_reshape = Reshape((1, -1)) (dropout)
        image_lstm = LSTM(NUM_LSTM_IMG) (image_reshape)
        dropout = Dropout(0.2) (image_lstm)
    outputs = Dense(NUM_CLASSES, activation='softmax') (dropout)
    model = Model(image_input, outputs)
    model.compile(optimizer=OPTIMIZER, loss=LOSS, metrics=['accuracy', f1_macro, f1_weighted])
    return model

def create_model_IF(text_shape, image_shape, lstm=True):
    text_input = Input(shape=text_shape)
    image_input = Input(shape=image_shape)
    
    text_dropout = Dropout(DROPOUT_INPUT) (text_input)    
    image_dropout = Dropout(0.2) (image_input)    

    if lstm == True:
        text_reshape = Reshape((1, -1)) (text_dropout)
        text_lstm = LSTM(NUM_LSTM) (text_reshape)
        text_dropout = Dropout(DROPOUT_LSTM) (text_lstm)    

        image_reshape = Reshape((1, -1)) (image_dropout)
        image_lstm = LSTM(NUM_LSTM_IMG) (image_reshape)
        image_dropout = Dropout(0.2) (image_lstm)    

    text_image_concat = tf.keras.layers.Concatenate(axis=1)([text_dropout, image_dropout])
        
    concat_self_attention = tf.keras.layers.Attention() ([text_image_concat, text_image_concat])
    dropout = Dropout(DROPOUT_ATT) (concat_self_attention)
    
    outputs = Dense(NUM_CLASSES, activation='softmax') (dropout)
    model = Model([text_input, image_input], outputs)
    model.compile(optimizer=OPTIMIZER, loss=LOSS, metrics=['accuracy', f1_macro, f1_weighted])
    return model

2022-07-18 14:35:11.889320: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-18 14:35:11.890599: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-18 14:35:11.891322: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-18 14:35:11.892150: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compil

In [9]:
def run_and_evaluate_IF(name, data, verbose=0, lstm=True):
    '''  Train and test Intermediate Fusion model Multimodal Labels 
    X1: text input
    X2: image input
    y: multimodal labels
    '''
    
    X1_train, X1_val, X1_test = data['texts']
    X2_train, X2_val, X2_test = data['images']
    y_train, y_val, y_test = data['labels']
    
    early_stopping_IF = EarlyStopping(monitor='val_loss', min_delta=1e-4, patience=EARLY_STOPPING)
    
    if 'multiple' in name:
        batch_size = 256
    else:
        batch_size = BATCH_SIZE # 128

    checkpoint_IF_path = './model_checkpoint/{}.h5'.format(name.split('-')[0] + '-' + '-'.join(name.split('-')[2:]))
    history_IF_path = './model_history/{}.npy'.format(name.split('-')[0] + '-' + '-'.join(name.split('-')[2:]))

    if os.path.exists(checkpoint_IF_path) and os.path.exists(history_IF_path):
        model_IF = load_model(checkpoint_IF_path)
        history_IF = pickle.load(open(history_IF_path, 'rb'))
    else:
        model_IF = create_model_IF(X1_train.shape[1:], X2_train.shape[1:], lstm=lstm)
        checkpoint_IF = ModelCheckpoint(checkpoint_IF_path, save_best_only=True, verbose=verbose)
        history_IF = model_IF.fit([X1_train, X2_train], y_train, validation_data=([X1_val, X2_val], y_val), 
                            epochs=EPOCHS, batch_size=batch_size, verbose=verbose,
                            callbacks=[checkpoint_IF, early_stopping_IF])
        if not os.path.exists(os.path.split(history_IF_path)[0]):
            os.makedirs(os.path.split(history_IF_path)[0])
        pickle.dump(history_IF.history, open(history_IF_path, 'wb'))
        history_IF = history_IF.history
        model_IF = load_model(checkpoint_IF_path)

    if verbose == 1:
        best_epoch = np.argmin(history['val_loss'])
        print('\nCheckpoint loaded at epoch:', best_epoch)

    return history_IF, evaluate_model_IF(model_IF, X1_test, X2_test, y_test, verbose=verbose)

def evaluate_model_IF(model, X_texts, X_images, y_test, verbose=1):

    loss, acc, f1_macro, f1_weighted = model.evaluate([X_texts, X_images], y_test, verbose=verbose)

    if verbose == 1:
        print('Loss:', loss)
        print('Accuracy:', acc)
        print('Macro F1-score:', f1_macro)
        print('Weighted F1-score:', f1_weighted)

        y_pred = model.predict([X_texts, X_images])
        matrix = confusion_matrix(le.inverse_transform(y_test.argmax(axis=1)), le.inverse_transform(y_pred.argmax(axis=1)), 
                                  labels=list(le.classes_))
        cm_disp = ConfusionMatrixDisplay(confusion_matrix=matrix,
                                  display_labels=list(le.classes_))
        cm_disp.plot()
        plt.show()
        
    return acc, f1_macro, f1_weighted

In [10]:
def run_and_evaluate_LF(name, data, verbose=0, lstm=True):
    ''' Train and test Late Fusion model only with Multimodal Labels 
    X1: text input
    X2: image input
    y: multimodal labels
    y1: text labels
    y2: image labels
    verbose: 0 or 1 to print tracking on progress
    '''

    X1_train, X1_val, X1_test = data['texts']
    X2_train, X2_val, X2_test = data['images']
    y_train, y_val, y_test = data['labels']

    early_stopping_text = EarlyStopping(monitor='val_loss', min_delta=1e-4, patience=EARLY_STOPPING)
    early_stopping_image = EarlyStopping(monitor='val_loss', min_delta=1e-4, patience=EARLY_STOPPING)

    if 'multiple' in name:
        batch_size = 256
    else:
        batch_size = BATCH_SIZE # 128

    checkpoint_text_path = './model_checkpoint/{}.h5'.format(name.split('-')[0] + '-' + '-'.join(name.split('-')[3:]))
    checkpoint_image_path = './model_checkpoint/{}.h5'.format(name.split('-')[0] + '-' + name.split('-')[2])    
    history_text_path = './model_history/{}.npy'.format(name.split('-')[0] + '-' + '-'.join(name.split('-')[3:]))
    history_image_path = './model_history/{}.npy'.format(name.split('-')[0] + '-' + name.split('-')[2])

    if os.path.exists(checkpoint_text_path) and os.path.exists(history_text_path):
        model_text = load_model(checkpoint_text_path)
        history_text = pickle.load(open(history_text_path, 'rb'))
    else:
        model_text = create_model_text(X1_train.shape[1:], lstm=lstm)
        checkpoint_text = ModelCheckpoint(checkpoint_text_path, save_best_only=True, verbose=verbose)
        history_text = model_text.fit(X1_train, y_train, validation_data=(X1_val, y_val), 
                                  epochs=EPOCHS, batch_size=batch_size, verbose=verbose,
                                  callbacks=[checkpoint_text, early_stopping_text])
        if not os.path.exists(os.path.split(history_text_path)[0]):
            os.makedirs(os.path.split(history_text_path)[0])
        pickle.dump(history_text.history, open(history_text_path, 'wb'))
        history_text = history_text.history
        model_text = load_model(checkpoint_text_path)
        
    if os.path.exists(checkpoint_image_path) and os.path.exists(history_image_path):
        model_image = load_model(checkpoint_image_path)
        history_image = pickle.load(open(history_image_path, 'rb'))
    else:
        model_image = create_model_image(X2_train.shape[1:], lstm=lstm)
        checkpoint_image = ModelCheckpoint(checkpoint_image_path, 
                                       save_best_only=True, verbose=verbose)
        history_image = model_image.fit(X2_train, y_train, validation_data=(X2_val, y_val), 
                                epochs=EPOCHS, batch_size=batch_size, verbose=verbose,
                                callbacks=[checkpoint_image, early_stopping_image])
        pickle.dump(history_image.history, open(history_image_path, 'wb'))
        history_image = history_image.history
        model_image = load_model(checkpoint_image_path)

    y_pred_text = model_text.predict(X1_test)
    y_pred_image = model_image.predict(X2_test)

    best_epoch_text = np.argmin(history_text['val_loss'])
    best_epoch_image = np.argmin(history_image['val_loss'])

    val_acc_text = history_text['val_accuracy'][best_epoch_text]
    val_acc_image = history_image['val_accuracy'][best_epoch_image]

    weights = get_average_weights(val_acc_text, val_acc_image)

    y_pred = weighted_average(weights, np.asarray([y_pred_text, y_pred_image], dtype='float32'))

    if verbose == 1:
        print('Checkpoint of text model loaded at epoch:', best_epoch_text)
        print('Checkpoint of image model loaded at epoch:', best_epoch_image)
        print('Weights of text and image models:', weights)
    
    eval_text = evaluate_model_uni(model_text, X1_test, y_test, verbose=verbose)
    eval_image = evaluate_model_uni(model_image, X2_test, y_test, verbose=verbose)
    eval_LF = evaluate_model_LF(y_test, y_pred, verbose=verbose)
    return eval_text, eval_image, eval_LF

In [11]:
def run_and_evaluate_HF(name, data, verbose=0, lstm=True):
    ''' Train Hybrid Fusion model and test only with Multimodal Labels
    X1: text input
    X2: image input
    y: labels
    verbose: 0 or 1 to print tracking on progress
    '''

    X1_train, X1_val, X1_test = data['texts']
    X2_train, X2_val, X2_test = data['images']
    y_train, y_val, y_test = data['labels']

    early_stopping_text = EarlyStopping(monitor='val_loss', min_delta=1e-4, patience=EARLY_STOPPING)
    early_stopping_image = EarlyStopping(monitor='val_loss', min_delta=1e-4, patience=EARLY_STOPPING)
    early_stopping_IF = EarlyStopping(monitor='val_loss', min_delta=1e-4, patience=EARLY_STOPPING)

    if 'multiple' in name:
        batch_size = 256
    else:
        batch_size = BATCH_SIZE # 128
        
    checkpoint_text_path = './model_checkpoint/{}.h5'.format(name.split('-')[0] + '-' + '-'.join(name.split('-')[3:]))
    checkpoint_image_path = './model_checkpoint/{}.h5'.format(name.split('-')[0] + '-' + name.split('-')[2])
    checkpoint_IF_path = './model_checkpoint/{}.h5'.format(name.split('-')[0] + '-' + '-'.join(name.split('-')[2:]))

    history_text_path = './model_history/{}.npy'.format(name.split('-')[0] + '-' + '-'.join(name.split('-')[3:]))
    history_image_path = './model_history/{}.npy'.format(name.split('-')[0] + '-' + name.split('-')[2])
    history_IF_path = './model_history/{}.npy'.format(name.split('-')[0] + '-' + '-'.join(name.split('-')[2:]))

    if os.path.exists(checkpoint_text_path) and os.path.exists(history_text_path):
        model_text = load_model(checkpoint_text_path)
        history_text = pickle.load(open(history_text_path, 'rb'))
    else:
        model_text = create_model_text(X1_train.shape[1:], lstm=lstm)
        checkpoint_text = ModelCheckpoint(checkpoint_text_path, save_best_only=True, verbose=verbose)
        history_text = model_text.fit(X1_train, y_train, validation_data=(X1_val, y_val), 
                                  epochs=EPOCHS, batch_size=batch_size, verbose=verbose,
                                  callbacks=[checkpoint_text, early_stopping_text])
        if not os.path.exists(os.path.split(history_text_path)[0]):
            os.makedirs(os.path.split(history_text_path)[0])
        pickle.dump(history_text.history, open(history_text_path, 'wb'))
        history_text = history_text.history
        model_text = load_model(checkpoint_text_path)
        
    if os.path.exists(checkpoint_image_path) and os.path.exists(history_image_path):
        model_image = load_model(checkpoint_image_path)
        history_image = pickle.load(open(history_image_path, 'rb'))
    else:
        model_image = create_model_image(X2_train.shape[1:], lstm=lstm)        
        checkpoint_image = ModelCheckpoint(checkpoint_image_path, save_best_only=True, verbose=verbose)
        history_image = model_image.fit(X2_train, y_train, validation_data=(X2_val, y_val), 
                                epochs=EPOCHS, batch_size=batch_size, verbose=verbose,
                                callbacks=[checkpoint_image, early_stopping_image])
        pickle.dump(history_image.history, open(history_image_path, 'wb'))
        history_image = history_image.history
        model_image = load_model(checkpoint_image_path)

    if os.path.exists(checkpoint_IF_path) and os.path.exists(history_IF_path):
        model_IF = load_model(checkpoint_IF_path)
        history_IF = pickle.load(open(history_IF_path, 'rb'))
    else:
        model_IF = create_model_IF(X1_train.shape[1:], X2_train.shape[1:], lstm=lstm)
        checkpoint_IF = ModelCheckpoint(checkpoint_IF_path, save_best_only=True, verbose=verbose)
        history_IF = model_IF.fit([X1_train, X2_train], y_train, validation_data=([X1_val, X2_val], y_val), 
                            epochs=EPOCHS, batch_size=batch_size, verbose=verbose,
                            callbacks=[checkpoint_IF, early_stopping_IF])
        if not os.path.exists(os.path.split(history_IF_path)[0]):
            os.makedirs(os.path.split(history_IF_path)[0])
        pickle.dump(history_IF.history, open(history_IF_path, 'wb'))
        history_IF = history_IF.history
        model_IF = load_model(checkpoint_IF_path)
    
    y_pred_text = model_text.predict(X1_test)
    y_pred_image = model_image.predict(X2_test)
    y_pred_IF = model_IF.predict([X1_test, X2_test])
    
    best_epoch_text = np.argmin(history_text['val_loss'])
    best_epoch_image = np.argmin(history_image['val_loss'])
    best_epoch_IF = np.argmin(history_IF['val_loss'])
    
    val_acc_text = history_text['val_accuracy'][best_epoch_text]
    val_acc_image = history_image['val_accuracy'][best_epoch_image]
    val_acc_IF = history_IF['val_accuracy'][best_epoch_IF]
    
    weights = get_average_weights(val_acc_text, val_acc_image, val_acc_IF)

    y_pred = weighted_average(weights, np.asarray([y_pred_text, y_pred_image, y_pred_IF], dtype='float32'))

    if verbose == 1:
        print('Checkpoint of text model loaded at epoch:', best_epoch_text)
        print('Checkpoint of image model loaded at epoch:', best_epoch_image)
        print('Checkpoint of IF model loaded at epoch:', best_epoch_IF)

    eval_text = evaluate_model_uni(model_text, X1_test, y_test, verbose=verbose)
    eval_image = evaluate_model_uni(model_image, X2_test, y_test, verbose=verbose)
    eval_IF = evaluate_model_IF(model_IF, X1_test, X2_test, y_test, verbose=verbose)
    eval_HF = evaluate_model_LF(y_test, y_pred, verbose=verbose)
    return eval_text, eval_image, eval_IF, eval_HF

In [12]:
def evaluate_model_LF(y_true, y_pred, verbose=0):
    
    y_pred = le.inverse_transform(y_pred.argmax(axis=1))
    y_true = le.inverse_transform(y_true.argmax(axis=1))

    acc = accuracy_score(y_true, y_pred)
    f1_macro = f1_score(y_true, y_pred, average='macro')
    f1_weighted = f1_score(y_true, y_pred, average='weighted')

    if verbose == 1:
        print(classification_report(y_true, y_pred))
        matrix = confusion_matrix(y_true, y_pred,
                                  labels=list(le.classes_))
        cm_disp = ConfusionMatrixDisplay(confusion_matrix=matrix,
                                  display_labels=list(le.classes_))
        cm_disp.plot()
        plt.show()

    return acc, f1_macro, f1_weighted

In [13]:
def evaluate_model_uni(model, X_test, y_test, verbose=1):
    
    loss, acc, f1_macro, f1_weighted = model.evaluate(X_test, y_test, verbose=verbose)
    
    if verbose == 1:
        print('Loss:', loss)
        print('Accuracy:', acc)
        print('Macro F1-score:', f1_macro)
        print('Weighted F1-score:', f1_weighted)
        
    return acc, f1_macro, f1_weighted

In [14]:
def plot_metrics(history):
    fig = plt.figure(figsize=(20, 5))

    fig.add_subplot(1, 4, 1)
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('LOSS')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'val'], loc='best')

    fig.add_subplot(1, 4, 2)
    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title('ACCURACY')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'val'], loc='best')

    fig.add_subplot(1, 4, 3)
    plt.plot(history.history['f1_macro'])
    plt.plot(history.history['val_f1_macro'])
    plt.title('Macro F1-SCORE')
    plt.ylabel('f1-macro')
    plt.xlabel('epoch')
    plt.legend(['train', 'val'], loc='best')
    
    fig.add_subplot(1, 4, 4)
    plt.plot(history.history['f1_weighted'])
    plt.plot(history.history['val_f1_weighted'])
    plt.title('Weighted F1-SCORE')
    plt.ylabel('f1-weighted')
    plt.xlabel('epoch')
    plt.legend(['train', 'val'], loc='best')

    plt.show()

In [15]:
def style_dataframe(dataframe):
    return dataframe.style.highlight_max(subset=['Accuracy', 'F1-macro', 'F1-weighted'], props='color:lawngreen', axis=0)\
                          .highlight_min(subset=['Accuracy', 'F1-macro', 'F1-weighted'], props='color:tomato', axis=0)

def highlight_neg(cell):
    if type(cell) != str and cell < 0 :
        return 'color: tomato'
    else:
        return 'color: lawngreen'

def display_dataframes(dfs, names=[], index=False):
    def to_df(x):
        if isinstance(x, pd.Series):
            return pd.DataFrame(x)
        else:
            return x
    html_str = ''
    if names:
        html_str += ('<tr>' + 
                     ''.join(f'<td style="text-align:center">{name}</td>' for name in names) + 
                     '</tr>')
    html_str += ('<tr>' + 
                 ''.join(f'<td style="vertical-align:top"> {to_df(df).to_html()}</td>' 
                         for df in dfs) + 
                 '</tr>')
    html_str = f'<table>{html_str}</table>'
    html_str = html_str.replace('table','table style="display:inline"')
    display_html(html_str, raw=True)

# Load data

In [16]:
# ## Choose best
# # Load text feature
# mvsa_single_bert, mvsa_multiple_bert = load_mvsa_feature('bert-base')
# mvsa_single_pos_bow, mvsa_multiple_pos_bow = load_mvsa_feature('pos-bow')
# mvsa_single_pos_tfidf, mvsa_multiple_pos_tfidf = load_mvsa_feature('pos-tfidf')
# mvsa_single_ner_bow, mvsa_multiple_ner_bow = load_mvsa_feature('ner-bow')
# mvsa_single_ner_tfidf, mvsa_multiple_ner_tfidf = load_mvsa_feature('ner-tfidf')

# ## Load image feature
# mvsa_single_vgg16, mvsa_multiple_vgg16 = load_mvsa_feature('vgg16')
# mvsa_single_vgg19, mvsa_multiple_vgg19 = load_mvsa_feature('vgg19')
# mvsa_single_resnet50, mvsa_multiple_resnet50 = load_mvsa_feature('resnet50')
# mvsa_single_resnet101, mvsa_multiple_resnet101 = load_mvsa_feature('resnet101')
# mvsa_single_resnet152, mvsa_multiple_resnet152 = load_mvsa_feature('resnet152')
# mvsa_single_densenet121, mvsa_multiple_densenet121 = load_mvsa_feature('densenet121')
# mvsa_single_densenet169, mvsa_multiple_densenet169 = load_mvsa_feature('densenet169')
# mvsa_single_densenet201, mvsa_multiple_densenet201 = load_mvsa_feature('densenet201')

# mvsa_single_bert_pos = np.concatenate((mvsa_single_bert, mvsa_single_pos_tfidf), axis=1)
# mvsa_single_bert_ner = np.concatenate((mvsa_single_bert, mvsa_single_ner_tfidf), axis=1)
# mvsa_single_bert_pos_ner = np.concatenate((mvsa_single_bert, mvsa_single_pos_tfidf, mvsa_single_ner_tfidf), axis=1)

# mvsa_multiple_bert_pos = np.concatenate((mvsa_multiple_bert, mvsa_multiple_pos_tfidf), axis=1)
# mvsa_multiple_bert_ner = np.concatenate((mvsa_multiple_bert, mvsa_multiple_ner_tfidf), axis=1)
# mvsa_multiple_bert_pos_ner = np.concatenate((mvsa_multiple_bert, mvsa_multiple_pos_tfidf, mvsa_multiple_ner_tfidf), axis=1)

In [17]:
mvsa_single_multimodal_labels, mvsa_single_text_labels, mvsa_single_image_labels = load_labels('../input/mvsa-features/labels/mvsa-single-labels.hdf5')
mvsa_multiple_multimodal_labels, mvsa_multiple_text_labels, mvsa_multiple_image_labels = load_labels('../input/mvsa-features/labels/mvsa-multiple-labels.hdf5')

le = LabelEncoder()
le.fit(mvsa_multiple_multimodal_labels)
NUM_CLASSES = len(le.classes_) # = 3

In [18]:
feature_names = ['resnet101-bert', 'resnet101-bert-lstm', 'densenet201-bert-lstm', 'densenet201-bert-pos-lstm', 'densenet201-bert-ner-lstm', 'densenet201-bert-pos-ner-lstm']
visual_feature_names = process_dup([name.split('-')[0] for name in feature_names])
textual_feature_names = process_dup(['-'.join(name.split('-')[1:]) for name in feature_names])
# for i in range(len(feature_names)):
#     x = ' '.join(feature_names[i].split('-')[1:]).rstrip()
#     if len(x.split()) > 1:
#         x = '-'.join(x.rstrip('-lstm').rstrip().split())
#     textual_feature_names.append(x)

In [19]:
mvsa_single_features, mvsa_multiple_features = get_features(feature_names)

In [20]:
reset_seeds()
EPOCHS = 100
BATCH_SIZE = 128
VALIDATION_SPLIT = 0.1
EARLY_STOPPING = 10
NUM_LSTM = 128
NUM_LSTM_IMG = 128
DROPOUT_LSTM = 0.5
DROPOUT_INPUT = 0.4
DROPOUT_ATT = 0.2
OPTIMIZER = 'adam'
LOSS = 'categorical_crossentropy'

In [21]:
mvsa_single_features_split = get_preprocess_input(feature_names, mvsa_single_features, mvsa_single_multimodal_labels)
mvsa_multiple_features_split = get_preprocess_input(feature_names, mvsa_multiple_features, mvsa_multiple_multimodal_labels)

In [22]:
# a = preprocess_inputs(mvsa_single_features[-1][0], mvsa_single_features[-1][1], mvsa_single_multimodal_labels)
# run_and_evaluate_LF('test', a, verbose=1)

# Intermediate Fusion

In [23]:
print('MVSA-Single with Intermediate Fusion')
scores = []
for i in range(len(feature_names)):
    print('MVSA-Single:', feature_names[i])
    if 'lstm' in feature_names[i]:
        _, score = run_and_evaluate_IF('single-IF-' + feature_names[i], mvsa_single_features_split[i], verbose=0)
    else:
        _, score = run_and_evaluate_IF('single-IF-' + feature_names[i], mvsa_single_features_split[i], verbose=0, lstm=False)
    scores.append(score)
df0_single_scores_IF = pd.DataFrame(scores, columns=['Accuracy', 'F1-macro', 'F1-weighted'], index=feature_names)

print('\nMVSA-Multiple with Intermediate Fusion')
scores = []
for i in range(len(feature_names)):
    print('MVSA-Multiple:', feature_names[i])
    if 'lstm' in feature_names[i]:
        _, score = run_and_evaluate_IF('multiple-IF-' + feature_names[i], mvsa_multiple_features_split[i], verbose=0)
    else:
        _, score = run_and_evaluate_IF('multiple-IF-' + feature_names[i], mvsa_multiple_features_split[i], verbose=0, lstm=False)
    scores.append(score)
df0_multiple_scores_IF = pd.DataFrame(scores, columns=['Accuracy', 'F1-macro', 'F1-weighted'], index=feature_names)

MVSA-Single with Intermediate Fusion
MVSA-Single: resnet101-bert


2022-07-18 14:35:45.133377: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


MVSA-Single: resnet101-bert-lstm


2022-07-18 14:35:53.274656: I tensorflow/stream_executor/cuda/cuda_dnn.cc:369] Loaded cuDNN version 8005


MVSA-Single: densenet201-bert-lstm
MVSA-Single: densenet201-bert-pos-lstm
MVSA-Single: densenet201-bert-ner-lstm
MVSA-Single: densenet201-bert-pos-ner-lstm

MVSA-Multiple with Intermediate Fusion
MVSA-Multiple: resnet101-bert
MVSA-Multiple: resnet101-bert-lstm
MVSA-Multiple: densenet201-bert-lstm
MVSA-Multiple: densenet201-bert-pos-lstm
MVSA-Multiple: densenet201-bert-ner-lstm
MVSA-Multiple: densenet201-bert-pos-ner-lstm


# Late Fusion

In [24]:
print('MVSA-Single with Late Fusion')
scores_text = []
scores_image = []
scores_LF = []
for i in range(len(feature_names)):
    print('MVSA-Single:', feature_names[i])
    if 'lstm' in feature_names[i]:
        score_text, score_image, score_LF = run_and_evaluate_LF('single-LF-' + feature_names[i], mvsa_single_features_split[i], verbose=0)
    else:
        score_text, score_image, score_LF = run_and_evaluate_LF('single-LF-' + feature_names[i], mvsa_single_features_split[i], verbose=0, lstm=False)
    scores_text.append(score_text)
    scores_image.append(score_image)
    scores_LF.append(score_LF)

df1_single_scores_text = pd.DataFrame(scores_text, columns=['Accuracy', 'F1-macro', 'F1-weighted'], index=textual_feature_names)
df1_single_scores_image = pd.DataFrame(scores_image, columns=['Accuracy', 'F1-macro', 'F1-weighted'], index=visual_feature_names)
df1_single_scores_LF = pd.DataFrame(scores_LF, columns=['Accuracy', 'F1-macro', 'F1-weighted'], index=feature_names)

print('\nMVSA-Multiple with Late Fusion')
scores_text = []
scores_image = []
scores_LF = []
for i in range(len(feature_names)):
    print('MVSA-Multiple:', feature_names[i])
    if 'lstm' in feature_names[i]:
        score_text, score_image, score_LF = run_and_evaluate_LF('multiple-LF-' + feature_names[i], mvsa_multiple_features_split[i], verbose=0)
    else:
        score_text, score_image, score_LF = run_and_evaluate_LF('multiple-LF-' + feature_names[i], mvsa_multiple_features_split[i], verbose=0, lstm=False)
    scores_text.append(score_text)
    scores_image.append(score_image)
    scores_LF.append(score_LF)
df1_multiple_scores_text = pd.DataFrame(scores_text, columns=['Accuracy', 'F1-macro', 'F1-weighted'], index=textual_feature_names)
df1_multiple_scores_image = pd.DataFrame(scores_image, columns=['Accuracy', 'F1-macro', 'F1-weighted'], index=visual_feature_names)
df1_multiple_scores_LF = pd.DataFrame(scores_LF, columns=['Accuracy', 'F1-macro', 'F1-weighted'], index=feature_names)

MVSA-Single with Late Fusion
MVSA-Single: resnet101-bert
MVSA-Single: resnet101-bert-lstm
MVSA-Single: densenet201-bert-lstm
MVSA-Single: densenet201-bert-pos-lstm
MVSA-Single: densenet201-bert-ner-lstm
MVSA-Single: densenet201-bert-pos-ner-lstm

MVSA-Multiple with Late Fusion
MVSA-Multiple: resnet101-bert
MVSA-Multiple: resnet101-bert-lstm
MVSA-Multiple: densenet201-bert-lstm
MVSA-Multiple: densenet201-bert-pos-lstm
MVSA-Multiple: densenet201-bert-ner-lstm
MVSA-Multiple: densenet201-bert-pos-ner-lstm


# Hybrid Fusion

In [25]:
print('MVSA-Single with Hybrid Fusion')
scores_text = []
scores_image = []
scores_IF = []
scores_HF = []
for i in range(len(feature_names)):
    print('MVSA-Single:', feature_names[i])
    if 'lstm' in feature_names[i]:
        score_text, score_image, score_IF, score_HF = run_and_evaluate_HF('single-HF-' + feature_names[i], mvsa_single_features_split[i], verbose=0)
    else:
        score_text, score_image, score_IF, score_HF = run_and_evaluate_HF('single-HF-' + feature_names[i], mvsa_single_features_split[i], verbose=0, lstm=False)
    scores_text.append(score_text)
    scores_image.append(score_image)
    scores_IF.append(score_IF)
    scores_HF.append(score_HF)
df2_single_scores_text = pd.DataFrame(scores_text, columns=['Accuracy', 'F1-macro', 'F1-weighted'], index=textual_feature_names)
df2_single_scores_image = pd.DataFrame(scores_image, columns=['Accuracy', 'F1-macro', 'F1-weighted'], index=visual_feature_names)
df2_single_scores_IF = pd.DataFrame(scores_IF, columns=['Accuracy', 'F1-macro', 'F1-weighted'], index=feature_names)
df2_single_scores_HF = pd.DataFrame(scores_HF, columns=['Accuracy', 'F1-macro', 'F1-weighted'], index=feature_names)

print('\nMVSA-Multiple with Hybrid Fusion')
scores_text = []
scores_image = []
scores_IF = []
scores_HF = []
for i in range(len(feature_names)):
    print('MVSA-Multiple:', feature_names[i])
    if 'lstm' in feature_names[i]:
        score_text, score_image, score_IF, score_HF = run_and_evaluate_HF('multiple-HF-' + feature_names[i], mvsa_multiple_features_split[i], verbose=0)
    else:
        score_text, score_image, score_IF, score_HF = run_and_evaluate_HF('multiple-HF-' + feature_names[i], mvsa_multiple_features_split[i], verbose=0, lstm=False)
    scores_text.append(score_text)
    scores_image.append(score_image)
    scores_IF.append(score_IF)
    scores_HF.append(score_HF)
df2_multiple_scores_text = pd.DataFrame(scores_text, columns=['Accuracy', 'F1-macro', 'F1-weighted'], index=textual_feature_names)
df2_multiple_scores_image = pd.DataFrame(scores_image, columns=['Accuracy', 'F1-macro', 'F1-weighted'], index=visual_feature_names)
df2_multiple_scores_IF = pd.DataFrame(scores_IF, columns=['Accuracy', 'F1-macro', 'F1-weighted'], index=feature_names)
df2_multiple_scores_HF = pd.DataFrame(scores_HF, columns=['Accuracy', 'F1-macro', 'F1-weighted'], index=feature_names)

MVSA-Single with Hybrid Fusion
MVSA-Single: resnet101-bert
MVSA-Single: resnet101-bert-lstm
MVSA-Single: densenet201-bert-lstm
MVSA-Single: densenet201-bert-pos-lstm
MVSA-Single: densenet201-bert-ner-lstm
MVSA-Single: densenet201-bert-pos-ner-lstm

MVSA-Multiple with Hybrid Fusion
MVSA-Multiple: resnet101-bert
MVSA-Multiple: resnet101-bert-lstm
MVSA-Multiple: densenet201-bert-lstm
MVSA-Multiple: densenet201-bert-pos-lstm
MVSA-Multiple: densenet201-bert-ner-lstm
MVSA-Multiple: densenet201-bert-pos-ner-lstm


# Display results

In [26]:
print('Intermediate Fusion')
display_dataframes((style_dataframe(df0_single_scores_IF), style_dataframe(df0_multiple_scores_IF)), 
                   names=['MVSA-Single', 'MVSA-Multiple'])

Intermediate Fusion


Unnamed: 0_level_0,Accuracy,F1-macro,F1-weighted
Unnamed: 0_level_1,Accuracy,F1-macro,F1-weighted
resnet101-bert,0.578714,0.282958,0.452327
resnet101-bert-lstm,0.658537,0.546403,0.660821
densenet201-bert-lstm,0.651885,0.554396,0.659153
densenet201-bert-pos-lstm,0.660754,0.577675,0.66819
densenet201-bert-ner-lstm,0.631929,0.540632,0.640241
densenet201-bert-pos-ner-lstm,0.665188,0.554806,0.656275
resnet101-bert,0.537603,0.391603,0.540861
resnet101-bert-lstm,0.645711,0.412775,0.580503
densenet201-bert-lstm,0.678026,0.481398,0.640153
densenet201-bert-pos-lstm,0.668625,0.48235,0.636878

Unnamed: 0,Accuracy,F1-macro,F1-weighted
resnet101-bert,0.578714,0.282958,0.452327
resnet101-bert-lstm,0.658537,0.546403,0.660821
densenet201-bert-lstm,0.651885,0.554396,0.659153
densenet201-bert-pos-lstm,0.660754,0.577675,0.66819
densenet201-bert-ner-lstm,0.631929,0.540632,0.640241
densenet201-bert-pos-ner-lstm,0.665188,0.554806,0.656275

Unnamed: 0,Accuracy,F1-macro,F1-weighted
resnet101-bert,0.537603,0.391603,0.540861
resnet101-bert-lstm,0.645711,0.412775,0.580503
densenet201-bert-lstm,0.678026,0.481398,0.640153
densenet201-bert-pos-lstm,0.668625,0.48235,0.636878
densenet201-bert-ner-lstm,0.670975,0.478857,0.635001
densenet201-bert-pos-ner-lstm,0.678026,0.47377,0.634052


In [27]:
EPOCHS

100

In [28]:
print('LATE FUSION \n')
print('MVSA-Single')
print(display_dataframes((style_dataframe(df1_single_scores_text), style_dataframe(df1_single_scores_image), 
                          style_dataframe(df1_single_scores_LF)), 
                         names=['Model Text', 'Model Image', 'Model LF']))
print('\nMVSA-Multiple')
print(display_dataframes((style_dataframe(df1_multiple_scores_text), style_dataframe(df1_multiple_scores_image), style_dataframe(df1_multiple_scores_LF)), 
                   names=['Model Text', 'Model Image', 'Model LF']))

LATE FUSION 

MVSA-Single


Unnamed: 0_level_0,Accuracy,F1-macro,F1-weighted
Unnamed: 0_level_1,Accuracy,F1-macro,F1-weighted
Unnamed: 0_level_2,Accuracy,F1-macro,F1-weighted
bert,0.574279,0.473881,0.585845
bert-lstm,0.589800,0.497960,0.600763
bert-lstm-2,0.589800,0.497960,0.600763
bert-pos-lstm,0.618625,0.509129,0.627721
bert-ner-lstm,0.651885,0.532070,0.649449
bert-pos-ner-lstm,0.603104,0.509213,0.613814
resnet101,0.476718,0.279234,0.410403
resnet101-2,0.476718,0.279234,0.410403
densenet201,0.638581,0.543812,0.639814
densenet201-2,0.638581,0.543812,0.639814

Unnamed: 0,Accuracy,F1-macro,F1-weighted
bert,0.574279,0.473881,0.585845
bert-lstm,0.5898,0.49796,0.600763
bert-lstm-2,0.5898,0.49796,0.600763
bert-pos-lstm,0.618625,0.509129,0.627721
bert-ner-lstm,0.651885,0.53207,0.649449
bert-pos-ner-lstm,0.603104,0.509213,0.613814

Unnamed: 0,Accuracy,F1-macro,F1-weighted
resnet101,0.476718,0.279234,0.410403
resnet101-2,0.476718,0.279234,0.410403
densenet201,0.638581,0.543812,0.639814
densenet201-2,0.638581,0.543812,0.639814
densenet201-3,0.638581,0.543812,0.639814
densenet201-4,0.638581,0.543812,0.639814

Unnamed: 0,Accuracy,F1-macro,F1-weighted
resnet101-bert,0.59867,0.466078,0.581165
resnet101-bert-lstm,0.609756,0.491336,0.607429
densenet201-bert-lstm,0.660754,0.556291,0.657866
densenet201-bert-pos-lstm,0.660754,0.550094,0.659843
densenet201-bert-ner-lstm,0.691796,0.569708,0.679533
densenet201-bert-pos-ner-lstm,0.682927,0.577165,0.67882


None

MVSA-Multiple


Unnamed: 0_level_0,Accuracy,F1-macro,F1-weighted
Unnamed: 0_level_1,Accuracy,F1-macro,F1-weighted
Unnamed: 0_level_2,Accuracy,F1-macro,F1-weighted
bert,0.629260,0.428222,0.587688
bert-lstm,0.657462,0.458283,0.618529
bert-lstm-2,0.657462,0.458283,0.618529
bert-pos-lstm,0.659224,0.448318,0.617495
bert-ner-lstm,0.665100,0.454829,0.622851
bert-pos-ner-lstm,0.659224,0.458649,0.621711
resnet101,0.556404,0.334855,0.515499
resnet101-2,0.556404,0.334855,0.515499
densenet201,0.640423,0.392655,0.583561
densenet201-2,0.640423,0.392655,0.583561

Unnamed: 0,Accuracy,F1-macro,F1-weighted
bert,0.62926,0.428222,0.587688
bert-lstm,0.657462,0.458283,0.618529
bert-lstm-2,0.657462,0.458283,0.618529
bert-pos-lstm,0.659224,0.448318,0.617495
bert-ner-lstm,0.6651,0.454829,0.622851
bert-pos-ner-lstm,0.659224,0.458649,0.621711

Unnamed: 0,Accuracy,F1-macro,F1-weighted
resnet101,0.556404,0.334855,0.515499
resnet101-2,0.556404,0.334855,0.515499
densenet201,0.640423,0.392655,0.583561
densenet201-2,0.640423,0.392655,0.583561
densenet201-3,0.640423,0.392655,0.583561
densenet201-4,0.640423,0.392655,0.583561

Unnamed: 0,Accuracy,F1-macro,F1-weighted
resnet101-bert,0.63631,0.411713,0.577299
resnet101-bert-lstm,0.655112,0.422977,0.588269
densenet201-bert-lstm,0.678026,0.448307,0.61098
densenet201-bert-pos-lstm,0.673913,0.434266,0.604073
densenet201-bert-ner-lstm,0.679788,0.449127,0.612472
densenet201-bert-pos-ner-lstm,0.67215,0.4352,0.604415


None


In [29]:
print('HYBRID FUSION \n')
print('MVSA-Single')
print(display_dataframes((style_dataframe(df2_single_scores_text), style_dataframe(df2_single_scores_image), 
                          style_dataframe(df2_single_scores_IF), style_dataframe(df2_single_scores_HF)), 
                   names=['Model Text', 'Model Image', 'Model IF', 'Model HF']))
print('\nMVSA-Multiple')
print(display_dataframes((style_dataframe(df2_multiple_scores_text), style_dataframe(df2_multiple_scores_image), 
                          style_dataframe(df2_multiple_scores_IF), style_dataframe(df2_multiple_scores_HF)), 
                   names=['Model Text', 'Model Image', 'Model IF', 'Model HF']))

HYBRID FUSION 

MVSA-Single


Unnamed: 0_level_0,Accuracy,F1-macro,F1-weighted
Unnamed: 0_level_1,Accuracy,F1-macro,F1-weighted
Unnamed: 0_level_2,Accuracy,F1-macro,F1-weighted
Unnamed: 0_level_3,Accuracy,F1-macro,F1-weighted
bert,0.574279,0.473881,0.585845
bert-lstm,0.589800,0.497960,0.600763
bert-lstm-2,0.589800,0.497960,0.600763
bert-pos-lstm,0.618625,0.509129,0.627721
bert-ner-lstm,0.651885,0.532070,0.649449
bert-pos-ner-lstm,0.603104,0.509213,0.613814
resnet101,0.476718,0.279234,0.410403
resnet101-2,0.476718,0.279234,0.410403
densenet201,0.638581,0.543812,0.639814
densenet201-2,0.638581,0.543812,0.639814

Unnamed: 0,Accuracy,F1-macro,F1-weighted
bert,0.574279,0.473881,0.585845
bert-lstm,0.5898,0.49796,0.600763
bert-lstm-2,0.5898,0.49796,0.600763
bert-pos-lstm,0.618625,0.509129,0.627721
bert-ner-lstm,0.651885,0.53207,0.649449
bert-pos-ner-lstm,0.603104,0.509213,0.613814

Unnamed: 0,Accuracy,F1-macro,F1-weighted
resnet101,0.476718,0.279234,0.410403
resnet101-2,0.476718,0.279234,0.410403
densenet201,0.638581,0.543812,0.639814
densenet201-2,0.638581,0.543812,0.639814
densenet201-3,0.638581,0.543812,0.639814
densenet201-4,0.638581,0.543812,0.639814

Unnamed: 0,Accuracy,F1-macro,F1-weighted
resnet101-bert,0.578714,0.282958,0.452327
resnet101-bert-lstm,0.658537,0.546403,0.660821
densenet201-bert-lstm,0.651885,0.554396,0.659153
densenet201-bert-pos-lstm,0.660754,0.577675,0.66819
densenet201-bert-ner-lstm,0.631929,0.540632,0.640241
densenet201-bert-pos-ner-lstm,0.665188,0.554806,0.656275

Unnamed: 0,Accuracy,F1-macro,F1-weighted
resnet101-bert,0.607539,0.431153,0.549056
resnet101-bert-lstm,0.631929,0.516667,0.627257
densenet201-bert-lstm,0.667406,0.569748,0.668975
densenet201-bert-pos-lstm,0.654102,0.55385,0.654572
densenet201-bert-ner-lstm,0.665188,0.549695,0.662165
densenet201-bert-pos-ner-lstm,0.667406,0.566051,0.660495


None

MVSA-Multiple


Unnamed: 0_level_0,Accuracy,F1-macro,F1-weighted
Unnamed: 0_level_1,Accuracy,F1-macro,F1-weighted
Unnamed: 0_level_2,Accuracy,F1-macro,F1-weighted
Unnamed: 0_level_3,Accuracy,F1-macro,F1-weighted
bert,0.629260,0.428222,0.587688
bert-lstm,0.657462,0.458283,0.618529
bert-lstm-2,0.657462,0.458283,0.618529
bert-pos-lstm,0.659224,0.448318,0.617495
bert-ner-lstm,0.665100,0.454829,0.622851
bert-pos-ner-lstm,0.659224,0.458649,0.621711
resnet101,0.556404,0.334855,0.515499
resnet101-2,0.556404,0.334855,0.515499
densenet201,0.640423,0.392655,0.583561
densenet201-2,0.640423,0.392655,0.583561

Unnamed: 0,Accuracy,F1-macro,F1-weighted
bert,0.62926,0.428222,0.587688
bert-lstm,0.657462,0.458283,0.618529
bert-lstm-2,0.657462,0.458283,0.618529
bert-pos-lstm,0.659224,0.448318,0.617495
bert-ner-lstm,0.6651,0.454829,0.622851
bert-pos-ner-lstm,0.659224,0.458649,0.621711

Unnamed: 0,Accuracy,F1-macro,F1-weighted
resnet101,0.556404,0.334855,0.515499
resnet101-2,0.556404,0.334855,0.515499
densenet201,0.640423,0.392655,0.583561
densenet201-2,0.640423,0.392655,0.583561
densenet201-3,0.640423,0.392655,0.583561
densenet201-4,0.640423,0.392655,0.583561

Unnamed: 0,Accuracy,F1-macro,F1-weighted
resnet101-bert,0.537603,0.391603,0.540861
resnet101-bert-lstm,0.645711,0.412775,0.580503
densenet201-bert-lstm,0.678026,0.481398,0.640153
densenet201-bert-pos-lstm,0.668625,0.48235,0.636878
densenet201-bert-ner-lstm,0.670975,0.478857,0.635001
densenet201-bert-pos-ner-lstm,0.678026,0.47377,0.634052

Unnamed: 0,Accuracy,F1-macro,F1-weighted
resnet101-bert,0.650411,0.4269,0.584193
resnet101-bert-lstm,0.653349,0.422017,0.587285
densenet201-bert-lstm,0.675676,0.453805,0.615898
densenet201-bert-pos-lstm,0.670975,0.447364,0.611442
densenet201-bert-ner-lstm,0.676263,0.450318,0.616198
densenet201-bert-pos-ner-lstm,0.674501,0.44458,0.612284


None


In [30]:
# get max values of late and intermediate fusion
df_single_higher_fusion = pd.DataFrame(np.where(df1_single_scores_LF.gt(df0_single_scores_IF.values), 
                                                df1_single_scores_LF.values, df0_single_scores_IF.values),
                                       columns=['Accuracy','F1-macro','F1-weighted'], index=feature_names)

df_multiple_higher_fusion = pd.DataFrame(np.where(df1_multiple_scores_LF.gt(df0_multiple_scores_IF.values),
                                                  df1_multiple_scores_LF.values, df0_multiple_scores_IF.values),
                                         columns=['Accuracy','F1-macro','F1-weighted'], index=feature_names)

df_single_subtract = df2_single_scores_HF.subtract(df_single_higher_fusion)
df_multiple_subtract = df2_multiple_scores_HF.subtract(df_multiple_higher_fusion)

In [31]:
print('Compare Hybrid Fusion with other Fusion Models (>scores)')
display_dataframes((df_single_subtract.style.applymap(highlight_neg), df_multiple_subtract.style.applymap(highlight_neg)), 
                   names=['MVSA-Single', 'MVSA-Multiple'])

Compare Hybrid Fusion with other Fusion Models (>scores)


Unnamed: 0_level_0,Accuracy,F1-macro,F1-weighted
Unnamed: 0_level_1,Accuracy,F1-macro,F1-weighted
resnet101-bert,0.008869,-0.034925,-0.032109
resnet101-bert-lstm,-0.026608,-0.029736,-0.033564
densenet201-bert-lstm,0.006652,0.013457,0.009822
densenet201-bert-pos-lstm,-0.006652,-0.023825,-0.013618
densenet201-bert-ner-lstm,-0.026608,-0.020013,-0.017368
densenet201-bert-pos-ner-lstm,-0.015521,-0.011114,-0.018326
resnet101-bert,0.014101,0.015188,0.006894
resnet101-bert-lstm,-0.001763,-0.00096,-0.000984
densenet201-bert-lstm,-0.002350,-0.027593,-0.024255
densenet201-bert-pos-lstm,-0.002938,-0.034986,-0.025436

Unnamed: 0,Accuracy,F1-macro,F1-weighted
resnet101-bert,0.008869,-0.034925,-0.032109
resnet101-bert-lstm,-0.026608,-0.029736,-0.033564
densenet201-bert-lstm,0.006652,0.013457,0.009822
densenet201-bert-pos-lstm,-0.006652,-0.023825,-0.013618
densenet201-bert-ner-lstm,-0.026608,-0.020013,-0.017368
densenet201-bert-pos-ner-lstm,-0.015521,-0.011114,-0.018326

Unnamed: 0,Accuracy,F1-macro,F1-weighted
resnet101-bert,0.014101,0.015188,0.006894
resnet101-bert-lstm,-0.001763,-0.00096,-0.000984
densenet201-bert-lstm,-0.00235,-0.027593,-0.024255
densenet201-bert-pos-lstm,-0.002938,-0.034986,-0.025436
densenet201-bert-ner-lstm,-0.003525,-0.028539,-0.018802
densenet201-bert-pos-ner-lstm,-0.003525,-0.02919,-0.021768


# Drafts

In [32]:
# def get_preprocess_input(feature_names, mvsa_single_features, mvsa_multiple_features):

#     mvsa_single_features_split = []
#     mvsa_multiple_features_split = []
#     splited_single = dict()
#     splited_multiplw = dict()

#     for i in range(len(feature_names)):
#         visual = feature_names[i].split('-')[0]
#         textual = ' '.join(feature_names[i].split('-')[1:]).rstrip()
#         if len(textual.split()) > 1:
#             textual = textual.rstrip('lstm').rstrip()
            
#         if textual in splited_single.keys()

#         single_splits = preprocess_inputs(mvsa_single_features[i][0], mvsa_single_features[i][1], mvsa_single_multimodal_labels)
#         multiple_splits = preprocess_inputs(mvsa_multiple_features[i][0], mvsa_multiple_features[i][1], mvsa_multiple_multimodal_labels)
        
#         splited_single[textual] = single_splits['texts']
#         splited_single[visual] = single_splits['images']
#         splited_single['labels'] = single_splits['labels']
        
#         splited_multiple[textual] = multiple_splits['texts']
#         splited_multiple[visual] = multiple_splits['images']
#         splited_multiple['labels'] = multiple_splits['labels']
        
#         mvsa_single_features_split.append(single_splits)
#         mvsa_multiple_features_split.append(multiple_splits)
#     return mvsa_single_features_split, mvsa_multiple_features_split

In [33]:
# mvsa_single_features_split = []
# mvsa_multiple_features_split = []

# for x in mvsa_single_features:
#     splits = preprocess_inputs(x[0], x[1], mvsa_single_multimodal_labels)
#     mvsa_single_features_split.append(splits)

# for x in mvsa_multiple_features:
#     splits = preprocess_inputs(x[0], x[1], mvsa_multiple_multimodal_labels)
#     mvsa_multiple_features_split.append(splits)

In [34]:
# import shutil
# def remove_folder(path):
#     # check if folder exists
#     if os.path.exists(path):
#          # remove if exists
#          shutil.rmtree(path)
#     else:
#          # throw your exception to handle this special scenario
#          raise XXError("your exception") 
# remove_folder("./model_checkpoint")
# remove_folder("./model_history")