<a href="https://www.kaggle.com/code/vincemarcs/mvsa-fusion-models-ml?scriptVersionId=101392502" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [1]:
!pip install keras-self-attention

Collecting keras-self-attention
  Downloading keras-self-attention-0.51.0.tar.gz (11 kB)
  Preparing metadata (setup.py) ... [?25l- done
Building wheels for collected packages: keras-self-attention
  Building wheel for keras-self-attention (setup.py) ... [?25l- \ done
[?25h  Created wheel for keras-self-attention: filename=keras_self_attention-0.51.0-py3-none-any.whl size=18912 sha256=4bf7e7b26d09c7c3463ce30371f39a5596e46d3ec6b96d094a0048b44e3a34c3
  Stored in directory: /root/.cache/pip/wheels/95/b1/a8/5ee00cc137940b2f6fa198212e8f45d813d0e0d9c3a04035a3
Successfully built keras-self-attention
Installing collected packages: keras-self-attention
Successfully installed keras-self-attention-0.51.0
[0m

In [2]:
SEED = 61

import os
import re
import gc
import h5py
import torch
import pickle
import numpy as np
import pandas as pd
import tensorflow as tf
import random as python_random
import tensorflow_addons as tfa
import matplotlib.pyplot as plt

from tqdm import tqdm
from nltk import tokenize
from IPython.display import display_html
from imblearn.over_sampling import SMOTE
from imblearn.over_sampling import BorderlineSMOTE
from keras_self_attention import SeqSelfAttention
from transformers import BertTokenizer, BertForMaskedLM, BertModel
from tensorflow.python.keras.layers import Layer, InputSpec, Lambda

from sklearn.preprocessing import LabelEncoder
from sklearn.decomposition import PCA
from sklearn.model_selection import RepeatedKFold, KFold
from sklearn.model_selection import cross_val_score
from sklearn.metrics import f1_score, accuracy_score, confusion_matrix, ConfusionMatrixDisplay, classification_report

from keras import backend as K
from keras import initializers,regularizers,constraints
from keras.preprocessing.text import Tokenizer, text_to_word_sequence
from keras.preprocessing.sequence import pad_sequences
from keras.utils.np_utils import to_categorical
from keras.layers import Reshape, Input, Embedding, Flatten, Dense, Dropout, BatchNormalization, Activation #, merge
from keras.layers import TimeDistributed, LSTM, GRU, Bidirectional, Convolution1D, MaxPooling1D, MaxPooling2D, GlobalMaxPooling1D
from keras.layers.core import RepeatVector #, Reshape
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from keras.models import Sequential, Model, load_model

def reset_seeds():
    np.random.seed(SEED) 
    python_random.seed(SEED)
    tf.random.set_seed(SEED)
    os.environ["PYTHONHASHSEED"] = str(SEED)

# from tensorflow.keras import Model
# from attention import Attention_input1, Attention_input2
# from keras.optimizers import SGD, RMSprop, Adagrad

In [3]:
def read_hdf5(path):
    read_file = h5py.File(path, 'r')
    
    feature_names = list(read_file.keys())
    loaded_data = []
    
    for name in feature_names:
        dataset = read_file[name][:]
        if dataset.dtype == np.dtype('object'):
            dataset = np.array([x.decode('UTF-8') for x in dataset])            
        loaded_data.append((name, dataset))

    return loaded_data

def loadz(path):
    data = np.load(path)['arr_0']
    return data

In [4]:
def load_labels(path):
    data = read_hdf5(path)

    for x in data:
        if x[0] == 'multimodal-labels':
            labels = x[1]
        if x[0] == 'text-labels':
            text_labels = x[1]
        if x[0] == 'image-labels':
            image_labels = x[1]

    return labels, text_labels, image_labels

def merge_mvsa(mvsa_single, mvsa_multiple):
    mvsa = np.concatenate((mvsa_single, mvsa_multiple), axis=0)
    return mvsa

def load_mvsa_feature(feature_name, merge=False):
    folder_path = os.path.join('../input/mvsa-features/', feature_name)
    single_file = 'mvsa-single-{}.npz'.format(feature_name)
    multiple_file = 'mvsa-multiple-{}.npz'.format(feature_name)
    mvsa_single = loadz(os.path.join(folder_path, single_file))
    mvsa_multiple = loadz(os.path.join(folder_path, multiple_file))
    
    if merge == True:
        return merge_mvsa(mvsa_single, mvsa_multiple)
    
    return mvsa_single, mvsa_multiple

In [5]:
def get_features(feature_names):
    mvsa_single_features = []
    mvsa_multiple_features = []

    for name in feature_names:
        
        name_split = name.split('-')
        textual = name_split[1]
        visual = name_split[0]
        
        if textual == 'bert':
            textual = 'bert-base'

        textual_features = load_mvsa_feature(textual)
        visual_features = load_mvsa_feature(visual)

        if 'pos' in name and 'ner' not in name:
            temp = []
            pos_features = load_mvsa_feature('pos-tfidf')
            temp.append(np.concatenate((textual_features[0], pos_features[0]), axis=1))
            temp.append(np.concatenate((textual_features[1], pos_features[1]), axis=1))
            textual_features = temp

        elif 'pos' not in name and 'ner' in name:
            temp = []
            ner_features = load_mvsa_feature('ner-tfidf')
            temp.append(np.concatenate((textual_features[0], ner_features[0]), axis=1))
            temp.append(np.concatenate((textual_features[1], ner_features[1]), axis=1))
            textual_features = temp

        elif 'pos' in name and 'ner' in name:
            temp = []
            pos_features = load_mvsa_feature('pos-tfidf')
            ner_features = load_mvsa_feature('ner-tfidf')
            temp.append(np.concatenate((textual_features[0], pos_features[0], ner_features[0]), axis=1))
            temp.append(np.concatenate((textual_features[1], pos_features[1], ner_features[1]), axis=1))
            textual_features = temp

        mvsa_single_features.append([textual_features[0], visual_features[0]])
        mvsa_multiple_features.append([textual_features[1], visual_features[1]])

    return mvsa_single_features, mvsa_multiple_features

In [6]:
# e.g. validation_split=0.1 -----> 8:1:1 ratio of train, val, test
def split_data(data, validation_split):
    num_val = int(validation_split * data.shape[0])
    data_train = data[:-(num_val*2)]
    data_val = data[-(num_val*2):-(num_val)]
    data_test = data[-num_val:]
    return data_train, data_val, data_test

In [7]:
def shuffle_mvsa(mvsa_features, labels, indices):
    shuffled_features = []
#     random_idx = np.random.permutation(len(labels))
    for i in range(len(mvsa_features)):
        x, y = mvsa_features[i][0][indices], mvsa_features[i][1][indices]
        shuffled_features.append([x, y])
    return shuffled_features, labels[indices]

def preprocess_inputs(X1, X2, y):
    y = le.fit_transform(y)
    y = to_categorical(np.asarray(y))
    
    X1_train, X1_val, X1_test = split_data(X1, VALIDATION_SPLIT)
    X2_train, X2_val, X2_test = split_data(X2, VALIDATION_SPLIT)
    y_train, y_val, y_test = split_data(y, VALIDATION_SPLIT)

# #     oversample = BorderlineSMOTE(sampling_strategy='minority', random_state=SEED, kind='borderline-2')
#     oversample = SMOTE(sampling_strategy='minority', random_state=SEED)
#     X1_train, _ = oversample.fit_resample(X1_train, y_train)
#     X2_train, y_train = oversample.fit_resample(X2_train, y_train)

    return {'texts': [X1_train, X1_val, X1_test], 'images': [X2_train, X2_val, X2_test], 'labels':[y_train, y_val, y_test]}

# def get_preprocess_input(feature_names, mvsa_features, labels):
#     mvsa_features_shuffled, labels_shuffled = shuffle_mvsa(mvsa_features, labels)
#     mvsa_features_split = []
#     for i in range(len(feature_names)):
#         preprocess_splits = preprocess_inputs(mvsa_features_shuffled[i][0], mvsa_features_shuffled[i][1], labels_shuffled)
#         mvsa_features_split.append(preprocess_splits)
#     return mvsa_features_split

def process_dup(names):
    new_names = []
    for i in range(len(names)):
        count_dup = 0
        for j in range(0, i+1):
            if names[i] == names[j]:
                count_dup += 1
        if count_dup > 1:
            new_names.append(names[i] + '-' + str(count_dup))
        else:
            new_names.append(names[i])
    return new_names

In [8]:
NUM_CLASSES = 3
f1_macro = tfa.metrics.F1Score(num_classes=NUM_CLASSES, average='macro', name='f1_macro')
f1_weighted = tfa.metrics.F1Score(num_classes=NUM_CLASSES, average='weighted', name='f1_weighted')
    
def create_model_text(input_shape, lstm=True):
    text_input = Input(shape=input_shape)
    dropout = Dropout(DROPOUT_INPUT) (text_input)
    if lstm == True:
        text_reshape = Reshape((1, -1)) (dropout)
        text_lstm = LSTM(NUM_LSTM) (text_reshape)
        dropout = Dropout(DROPOUT_LSTM) (text_lstm)
    outputs = Dense(NUM_CLASSES, activation='softmax') (dropout)
    model = Model(text_input, outputs)
    model.compile(optimizer=OPTIMIZER, loss=LOSS, metrics=['accuracy', f1_macro, f1_weighted])
    return model

def create_model_image(input_shape, lstm=True):
    image_input = Input(shape=input_shape)
    dropout = Dropout(DROPOUT_INPUT_IMG) (image_input)    
    if lstm == True:
        image_reshape = Reshape((1, -1)) (dropout)
        image_lstm = LSTM(NUM_LSTM_IMG) (image_reshape)
        dropout = Dropout(DROPOUT_LSTM_IMG) (image_lstm)
    outputs = Dense(NUM_CLASSES, activation='softmax') (dropout)
    model = Model(image_input, outputs)
    model.compile(optimizer=OPTIMIZER, loss=LOSS, metrics=['accuracy', f1_macro, f1_weighted])
    return model

def create_model_IF(text_shape, image_shape, lstm=True):
    text_input = Input(shape=text_shape)
    image_input = Input(shape=image_shape)
    
    text_dropout = Dropout(DROPOUT_INPUT) (text_input)    
    image_dropout = Dropout(DROPOUT_INPUT_IMG) (image_input)    

    if lstm == True:
        text_reshape = Reshape((1, -1)) (text_dropout)
        text_lstm = LSTM(NUM_LSTM) (text_reshape)
        text_dropout = Dropout(DROPOUT_LSTM) (text_lstm)

        image_reshape = Reshape((1, -1)) (image_dropout)
        image_lstm = LSTM(NUM_LSTM_IMG) (image_reshape)
        image_dropout = Dropout(DROPOUT_LSTM_IMG) (image_lstm)

    text_image_concat = tf.keras.layers.Concatenate(axis=1)([text_dropout, image_dropout])
    concat_reshape = Reshape((1, -1)) (text_image_concat)
    self_attention = SeqSelfAttention() (concat_reshape)
    dropout = Dropout(DROPOUT_ATT) (self_attention)
    flatten = GlobalMaxPooling1D () (self_attention)
    outputs = Dense(NUM_CLASSES, activation='softmax') (flatten)
    model = Model([text_input, image_input], outputs)
    model.compile(optimizer=OPTIMIZER, loss=LOSS, metrics=['accuracy', f1_macro, f1_weighted])
    return model

2022-07-21 10:06:29.829807: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.


In [9]:
def weighted_average(weights, probs):
    ''' Calculate the weighted average probability distribution from all input probs and its weights 
    weights: weights list (or array)
    probs: probability distributions array list
    '''
    output_probs = []
    weighted_probs = [probs[i] * weights[i] for i in range(len(weights))]
    for i in range(len(probs[0])):
        sum_prob = np.zeros(len(probs[0][0]))
        for j in range(len(weights)):
            sum_prob = np.sum((sum_prob, weighted_probs[j][i]), axis=0)
        output_probs.append(sum_prob)
    return np.asarray(output_probs, dtype='float32')

def get_average_weights(*scores, inverse=False):
    ''' Get the corresponding weight of each input score 
    inverse: (bool) get inverse weights value in case of the smaller score value, the bigger weight value (such as model loss)
    '''
    
    weights = []
    for score in scores:
        weights.append(score/np.sum(scores))
    
    if inverse == True:
        inverse_weights = []
        inverse = [1/weight for weight in weights]
        for inv in inverse:
            inverse_weights.append(inv/np.sum(inverse))
        weights = inverse_weights

    return weights

In [10]:
def run_and_evaluate_IF(name, X1, X2, y, verbose=0, lstm=True):

    data = preprocess_inputs(X1, X2, y)
    X1_train, X1_val, X1_test = data['texts']
    X2_train, X2_val, X2_test = data['images']
    y_train, y_val, y_test = data['labels']
    
    
    if 'multiple' in name:
        batch_size = 256
    else:
        batch_size = BATCH_SIZE # 128

    checkpoint_IF_path = './model_checkpoint/{}.h5'.format(name.split('-')[0] + '-' + '-'.join(name.split('-')[2:]))
    history_IF_path = './model_history/{}.npy'.format(name.split('-')[0] + '-' + '-'.join(name.split('-')[2:]))

    if not(os.path.exists(checkpoint_IF_path) and os.path.exists(history_IF_path)):
        print('Create new IF model:', os.path.split(checkpoint_IF_path)[1])

        model_IF = create_model_IF(X1_train.shape[1:], X2_train.shape[1:], lstm=lstm)
        checkpoint_IF = ModelCheckpoint(checkpoint_IF_path, save_best_only=True, verbose=verbose)
        early_stopping_IF = EarlyStopping(monitor='val_loss', min_delta=1e-4, patience=EARLY_STOPPING)
        
        history_IF = model_IF.fit([X1_train, X2_train], y_train, validation_data=([X1_val, X2_val], y_val), 
                            epochs=EPOCHS, batch_size=batch_size, verbose=verbose,
                            callbacks=[checkpoint_IF, early_stopping_IF])
        if not os.path.exists(os.path.split(history_IF_path)[0]):
            os.makedirs(os.path.split(history_IF_path)[0])
        pickle.dump(history_IF.history, open(history_IF_path, 'wb'))
        
    model_IF = load_model(checkpoint_IF_path, custom_objects={'SeqSelfAttention': SeqSelfAttention})
    history_IF = pickle.load(open(history_IF_path, 'rb'))
    
    best_epoch = np.argmin(history_IF['val_loss'])
    print('Model IF checkpoint loaded at epoch:', best_epoch)

    return history_IF, evaluate_model_IF(model_IF, X1_test, X2_test, y_test, verbose=verbose)

In [11]:
def run_and_evaluate_LF(name, X1, X2, y, verbose=0, lstm=True):

    data = preprocess_inputs(X1, X2, y)
    X1_train, X1_val, X1_test = data['texts']
    X2_train, X2_val, X2_test = data['images']
    y_train, y_val, y_test = data['labels']

    early_stopping_text = EarlyStopping(monitor='val_loss', min_delta=1e-4, patience=EARLY_STOPPING)
    early_stopping_image = EarlyStopping(monitor='val_loss', min_delta=1e-4, patience=EARLY_STOPPING)

    if 'multiple' in name:
        batch_size = 256
    else:
        batch_size = BATCH_SIZE # 128

    checkpoint_text_path = './model_checkpoint/{}.h5'.format(name.split('-')[0] + '-' + '-'.join(name.split('-')[3:]))
    checkpoint_image_path = './model_checkpoint/{}.h5'.format(name.split('-')[0] + '-' + name.split('-')[2])

    history_text_path = './model_history/{}.npy'.format(name.split('-')[0] + '-' + '-'.join(name.split('-')[3:]))
    history_image_path = './model_history/{}.npy'.format(name.split('-')[0] + '-' + name.split('-')[2])

    if lstm == True:
        checkpoint_image_path = './model_checkpoint/{}.h5'.format(name.split('-')[0] + '-' + name.split('-')[2] + '-lstm')
        history_image_path = './model_history/{}.npy'.format(name.split('-')[0] + '-' + name.split('-')[2] + '-lstm')
    
    if not (os.path.exists(checkpoint_text_path) and os.path.exists(history_text_path)):
        print('Create new text model:', os.path.split(checkpoint_text_path)[1])

        model_text = create_model_text(X1_train.shape[1:], lstm=lstm)
        checkpoint_text = ModelCheckpoint(checkpoint_text_path, save_best_only=True, verbose=verbose)
        history_text = model_text.fit(X1_train, y_train, validation_data=(X1_val, y_val), 
                                  epochs=EPOCHS, batch_size=batch_size, verbose=verbose,
                                  callbacks=[checkpoint_text, early_stopping_text])
        if not os.path.exists(os.path.split(history_text_path)[0]):
            os.makedirs(os.path.split(history_text_path)[0])
        pickle.dump(history_text.history, open(history_text_path, 'wb'))
    
    if not(os.path.exists(checkpoint_image_path) and os.path.exists(history_image_path)):
        print('Create new image model:', os.path.split(checkpoint_image_path)[1])

        model_image = create_model_image(X2_train.shape[1:], lstm=lstm)
        checkpoint_image = ModelCheckpoint(checkpoint_image_path, 
                                       save_best_only=True, verbose=verbose)
        history_image = model_image.fit(X2_train, y_train, validation_data=(X2_val, y_val), 
                                epochs=EPOCHS, batch_size=batch_size, verbose=verbose,
                                callbacks=[checkpoint_image, early_stopping_image])
        pickle.dump(history_image.history, open(history_image_path, 'wb'))
    
    model_text = load_model(checkpoint_text_path)
    model_image = load_model(checkpoint_image_path)

    history_text = pickle.load(open(history_text_path, 'rb'))    
    history_image = pickle.load(open(history_image_path, 'rb'))

    y_pred_text = model_text.predict(X1_test)
    y_pred_image = model_image.predict(X2_test)

    best_epoch_text = np.argmin(history_text['val_loss'])
    best_epoch_image = np.argmin(history_image['val_loss'])
    print('Model text checkpoint loaded at epoch:', best_epoch_text)
    print('Model image checkpoint loaded at epoch:', best_epoch_image)
    
    val_acc_text = history_text['val_accuracy'][best_epoch_text]
    val_acc_image = history_image['val_accuracy'][best_epoch_image]
    weights = get_average_weights(val_acc_text, val_acc_image)
    print('Model weights (text, image):', weights)

    y_pred = weighted_average(weights, np.asarray([y_pred_text, y_pred_image], dtype='float32'))
    
    eval_text = evaluate_model_uni(model_text, X1_test, y_test, verbose=verbose)
    eval_image = evaluate_model_uni(model_image, X2_test, y_test, verbose=verbose)
    eval_LF = evaluate_model_LF(y_test, y_pred, verbose=verbose)
    return eval_text, eval_image, eval_LF

In [12]:
def run_and_evaluate_HF(name, X1, X2, y, verbose=0, lstm=True):
    
    data = preprocess_inputs(X1, X2, y)
    X1_train, X1_val, X1_test = data['texts']
    X2_train, X2_val, X2_test = data['images']
    y_train, y_val, y_test = data['labels']

    early_stopping_text = EarlyStopping(monitor='val_loss', min_delta=1e-4, patience=EARLY_STOPPING)
    early_stopping_image = EarlyStopping(monitor='val_loss', min_delta=1e-4, patience=EARLY_STOPPING)
    early_stopping_IF = EarlyStopping(monitor='val_loss', min_delta=1e-4, patience=EARLY_STOPPING)

    if 'multiple' in name:
        batch_size = 256
    else:
        batch_size = BATCH_SIZE # 128
        
    checkpoint_text_path = './model_checkpoint/{}.h5'.format(name.split('-')[0] + '-' + '-'.join(name.split('-')[3:]))
    checkpoint_image_path = './model_checkpoint/{}.h5'.format(name.split('-')[0] + '-' + name.split('-')[2])
    checkpoint_IF_path = './model_checkpoint/{}.h5'.format(name.split('-')[0] + '-' + '-'.join(name.split('-')[2:]))

    history_text_path = './model_history/{}.npy'.format(name.split('-')[0] + '-' + '-'.join(name.split('-')[3:]))
    history_image_path = './model_history/{}.npy'.format(name.split('-')[0] + '-' + name.split('-')[2])
    history_IF_path = './model_history/{}.npy'.format(name.split('-')[0] + '-' + '-'.join(name.split('-')[2:]))
    
    if lstm == True:
        checkpoint_image_path = './model_checkpoint/{}.h5'.format(name.split('-')[0] + '-' + name.split('-')[2] + '-lstm')
        history_image_path = './model_history/{}.npy'.format(name.split('-')[0] + '-' + name.split('-')[2] + '-lstm')

    if not(os.path.exists(checkpoint_text_path) and os.path.exists(history_text_path)):
        print('Create new text model:', os.path.split(checkpoint_text_path)[1])

        model_text = create_model_text(X1_train.shape[1:], lstm=lstm)
        checkpoint_text = ModelCheckpoint(checkpoint_text_path, save_best_only=True, verbose=verbose)
        history_text = model_text.fit(X1_train, y_train, validation_data=(X1_val, y_val), 
                                  epochs=EPOCHS, batch_size=batch_size, verbose=verbose,
                                  callbacks=[checkpoint_text, early_stopping_text])
        if not os.path.exists(os.path.split(history_text_path)[0]):
            os.makedirs(os.path.split(history_text_path)[0])
        pickle.dump(history_text.history, open(history_text_path, 'wb'))
    
    if not(os.path.exists(checkpoint_image_path) and os.path.exists(history_image_path)):
        print('Create new image model:', os.path.split(checkpoint_image_path)[1])

        model_image = create_model_image(X2_train.shape[1:], lstm=lstm)        
        checkpoint_image = ModelCheckpoint(checkpoint_image_path, save_best_only=True, verbose=verbose)
        history_image = model_image.fit(X2_train, y_train, validation_data=(X2_val, y_val), 
                                epochs=EPOCHS, batch_size=batch_size, verbose=verbose,
                                callbacks=[checkpoint_image, early_stopping_image])
        pickle.dump(history_image.history, open(history_image_path, 'wb'))
    
    if not(os.path.exists(checkpoint_IF_path) and os.path.exists(history_IF_path)):
        print('Create new IF model:', os.path.split(checkpoint_IF_path)[1])

        model_IF = create_model_IF(X1_train.shape[1:], X2_train.shape[1:], lstm=lstm)
        checkpoint_IF = ModelCheckpoint(checkpoint_IF_path, save_best_only=True, verbose=verbose)
        history_IF = model_IF.fit([X1_train, X2_train], y_train, validation_data=([X1_val, X2_val], y_val), 
                            epochs=EPOCHS, batch_size=batch_size, verbose=verbose,
                            callbacks=[checkpoint_IF, early_stopping_IF])
        pickle.dump(history_IF.history, open(history_IF_path, 'wb'))

    model_text = load_model(checkpoint_text_path)
    model_image = load_model(checkpoint_image_path)
    model_IF = load_model(checkpoint_IF_path, custom_objects={'SeqSelfAttention': SeqSelfAttention})

    history_image = pickle.load(open(history_image_path, 'rb'))
    history_text = pickle.load(open(history_text_path, 'rb'))
    history_IF = pickle.load(open(history_IF_path, 'rb'))

    y_pred_text = model_text.predict(X1_test)
    y_pred_image = model_image.predict(X2_test)
    y_pred_IF = model_IF.predict([X1_test, X2_test])
    
    best_epoch_text = np.argmin(history_text['val_loss'])
    best_epoch_image = np.argmin(history_image['val_loss'])
    best_epoch_IF = np.argmin(history_IF['val_loss'])
    print('Model text checkpoint loaded at epoch:', best_epoch_text)
    print('Model image checkpoint loaded at epoch:', best_epoch_image)
    print('Model IF checkpoint loaded at epoch:', best_epoch_IF)

    val_acc_text = history_text['val_accuracy'][best_epoch_text]
    val_acc_image = history_image['val_accuracy'][best_epoch_image]
    val_acc_IF = history_IF['val_accuracy'][best_epoch_IF]
    
    weights = get_average_weights(val_acc_text, val_acc_image, val_acc_IF)
    print('Model weights (text, image, IF):', weights)
    y_pred = weighted_average(weights, np.asarray([y_pred_text, y_pred_image, y_pred_IF], dtype='float32'))

    eval_text = evaluate_model_uni(model_text, X1_test, y_test, verbose=verbose)
    eval_image = evaluate_model_uni(model_image, X2_test, y_test, verbose=verbose)
    eval_IF = evaluate_model_IF(model_IF, X1_test, X2_test, y_test, verbose=verbose)
    eval_HF = evaluate_model_LF(y_test, y_pred, verbose=verbose)
    return eval_text, eval_image, eval_IF, eval_HF

In [13]:
def evaluate_model_IF(model, X_texts, X_images, y_test, verbose=1):

    loss, acc, f1_macro, f1_weighted = model.evaluate([X_texts, X_images], y_test, verbose=verbose)

    if verbose == 1:
        print('Loss:', loss)
        print('Accuracy:', acc)
        print('Macro F1-score:', f1_macro)
        print('Weighted F1-score:', f1_weighted)

        y_pred = model.predict([X_texts, X_images])
        matrix = confusion_matrix(le.inverse_transform(y_test.argmax(axis=1)), le.inverse_transform(y_pred.argmax(axis=1)), 
                                  labels=list(le.classes_))
        cm_disp = ConfusionMatrixDisplay(confusion_matrix=matrix,
                                  display_labels=list(le.classes_))
        cm_disp.plot()
        plt.show()
        
    return acc, f1_macro, f1_weighted

def evaluate_model_LF(y_true, y_pred, verbose=0):
    
    y_pred = le.inverse_transform(y_pred.argmax(axis=1))
    y_true = le.inverse_transform(y_true.argmax(axis=1))

    acc = accuracy_score(y_true, y_pred)
    f1_macro = f1_score(y_true, y_pred, average='macro')
    f1_weighted = f1_score(y_true, y_pred, average='weighted')

    if verbose == 1:
        print(classification_report(y_true, y_pred))
        matrix = confusion_matrix(y_true, y_pred,
                                  labels=list(le.classes_))
        cm_disp = ConfusionMatrixDisplay(confusion_matrix=matrix,
                                  display_labels=list(le.classes_))
        cm_disp.plot()
        plt.show()

    return acc, f1_macro, f1_weighted

def evaluate_model_uni(model, X_test, y_test, verbose=1):
    
    loss, acc, f1_macro, f1_weighted = model.evaluate(X_test, y_test, verbose=verbose)
    
    if verbose == 1:
        print('Loss:', loss)
        print('Accuracy:', acc)
        print('Macro F1-score:', f1_macro)
        print('Weighted F1-score:', f1_weighted)
        
    return acc, f1_macro, f1_weighted

In [14]:
def plot_metrics(history):
    fig = plt.figure(figsize=(20, 5))

    fig.add_subplot(1, 4, 1)
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('LOSS')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'val'], loc='best')

    fig.add_subplot(1, 4, 2)
    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title('ACCURACY')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'val'], loc='best')

    fig.add_subplot(1, 4, 3)
    plt.plot(history.history['f1_macro'])
    plt.plot(history.history['val_f1_macro'])
    plt.title('Macro F1-SCORE')
    plt.ylabel('f1-macro')
    plt.xlabel('epoch')
    plt.legend(['train', 'val'], loc='best')
    
    fig.add_subplot(1, 4, 4)
    plt.plot(history.history['f1_weighted'])
    plt.plot(history.history['val_f1_weighted'])
    plt.title('Weighted F1-SCORE')
    plt.ylabel('f1-weighted')
    plt.xlabel('epoch')
    plt.legend(['train', 'val'], loc='best')

    plt.show()

In [15]:
def style_dataframe(dataframe):
    return dataframe.style.highlight_max(subset=['Accuracy', 'F1-macro', 'F1-weighted'], props='color:lawngreen', axis=0)\
                          .highlight_min(subset=['Accuracy', 'F1-macro', 'F1-weighted'], props='color:tomato', axis=0)

def highlight_neg(cell):
    if type(cell) != str and cell < 0 :
        return 'color: tomato'
    else:
        return 'color: lawngreen'

def display_dataframes(dfs, names=[], index=False):
    def to_df(x):
        if isinstance(x, pd.Series):
            return pd.DataFrame(x)
        else:
            return x
    html_str = ''
    if names:
        html_str += ('<tr>' + 
                     ''.join(f'<td style="text-align:center">{name}</td>' for name in names) + 
                     '</tr>')
    html_str += ('<tr>' + 
                 ''.join(f'<td style="vertical-align:top"> {to_df(df).to_html()}</td>' 
                         for df in dfs) + 
                 '</tr>')
    html_str = f'<table>{html_str}</table>'
    html_str = html_str.replace('table','table style="display:inline"')
    display_html(html_str, raw=True)

# Load data

In [16]:
mvsa_single_multimodal_labels, _, _ = load_labels('../input/mvsa-features/labels/mvsa-single-labels.hdf5')
mvsa_multiple_multimodal_labels, _, _ = load_labels('../input/mvsa-features/labels/mvsa-multiple-labels.hdf5')

le = LabelEncoder()
le.fit(mvsa_multiple_multimodal_labels)
NUM_CLASSES = len(le.classes_) # = 3
mapping = dict(zip(range(len(le.classes_)), le.classes_))
print(mapping)

{0: 'negative', 1: 'neutral', 2: 'positive'}


In [17]:
feature_names = ['resnet101-bert', 'resnet101-bert-lstm', 'densenet201-bert-lstm', 'densenet201-bert-pos-lstm', 
#                  'densenet201-bert-ner-lstm', 
                 'densenet201-bert-pos-ner-lstm']
visual_feature_names = process_dup([name.split('-')[0] for name in feature_names])
textual_feature_names = process_dup(['-'.join(name.split('-')[1:]) for name in feature_names])
# for i in range(len(feature_names)):
#     x = ' '.join(feature_names[i].split('-')[1:]).rstrip()
#     if len(x.split()) > 1:
#         x = '-'.join(x.rstrip('-lstm').rstrip().split())
#     textual_feature_names.append(x)

In [18]:
mvsa_single_features, mvsa_multiple_features = get_features(feature_names)

In [19]:
# # Fix random indices for consistency between other experiments
# mvsa_single_features, mvsa_single_multimodal_labels = shuffle_mvsa(mvsa_single_features, mvsa_single_multimodal_labels, np.load('../input/mvsa-shuffle-indices/mvsa-single-shuffle-indices.npy'))
# mvsa_multiple_features, mvsa_multiple_multimodal_labels = shuffle_mvsa(mvsa_multiple_features, mvsa_multiple_multimodal_labels, np.load('../input/mvsa-shuffle-indices/mvsa-multiple-shuffle-indices.npy'))

In [20]:
reset_seeds()
EPOCHS = 100
BATCH_SIZE = 128
VALIDATION_SPLIT = 0.1
EARLY_STOPPING = 10

NUM_LSTM = 256
DROPOUT_INPUT = 0.0
DROPOUT_LSTM = 0.2

NUM_LSTM_IMG = 256
DROPOUT_INPUT_IMG = 0.0
DROPOUT_LSTM_IMG = 0.2

DROPOUT_ATT = 0.0
OPTIMIZER = 'adam'
LOSS = 'categorical_crossentropy'

# Intermediate Fusion

In [21]:
print('MVSA-Single with Intermediate Fusion')
scores = []
for i in range(len(feature_names)):
    print('MVSA-Single:', feature_names[i])
    if 'lstm' in feature_names[i]:
        _, score = run_and_evaluate_IF('single-IF-' + feature_names[i], 
                                       mvsa_single_features[i][0],
                                       mvsa_single_features[i][1],
                                       mvsa_single_multimodal_labels, 
                                       verbose=0)
    else:
        _, score = run_and_evaluate_IF('single-IF-' + feature_names[i], 
                                       mvsa_single_features[i][0],
                                       mvsa_single_features[i][1],
                                       mvsa_single_multimodal_labels, 
                                       verbose=0, lstm=False)
    scores.append(score)
    print()
df0_single_scores_IF = pd.DataFrame(scores, columns=['Accuracy', 'F1-macro', 'F1-weighted'], index=feature_names)

print('----------------------------------------')
print('\nMVSA-Multiple with Intermediate Fusion')
scores = []
for i in range(len(feature_names)):
    print('MVSA-Multiple:', feature_names[i])
    if 'lstm' in feature_names[i]:
        _, score = run_and_evaluate_IF('multiple-IF-' + feature_names[i], 
                                       mvsa_multiple_features[i][0],
                                       mvsa_multiple_features[i][1],
                                       mvsa_multiple_multimodal_labels, 
                                       verbose=0)
    else:
        _, score = run_and_evaluate_IF('multiple-IF-' + feature_names[i], 
                                       mvsa_multiple_features[i][0],
                                       mvsa_multiple_features[i][1],
                                       mvsa_multiple_multimodal_labels, 
                                       verbose=0, lstm=False)
    scores.append(score)
    print()
df0_multiple_scores_IF = pd.DataFrame(scores, columns=['Accuracy', 'F1-macro', 'F1-weighted'], index=feature_names)

MVSA-Single with Intermediate Fusion
MVSA-Single: resnet101-bert
Create new IF model: single-resnet101-bert.h5


2022-07-21 10:06:37.354597: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


Model IF checkpoint loaded at epoch: 10

MVSA-Single: resnet101-bert-lstm
Create new IF model: single-resnet101-bert-lstm.h5
Model IF checkpoint loaded at epoch: 6

MVSA-Single: densenet201-bert-lstm
Create new IF model: single-densenet201-bert-lstm.h5
Model IF checkpoint loaded at epoch: 1

MVSA-Single: densenet201-bert-pos-lstm
Create new IF model: single-densenet201-bert-pos-lstm.h5
Model IF checkpoint loaded at epoch: 1

MVSA-Single: densenet201-bert-pos-ner-lstm
Create new IF model: single-densenet201-bert-pos-ner-lstm.h5
Model IF checkpoint loaded at epoch: 1

----------------------------------------

MVSA-Multiple with Intermediate Fusion
MVSA-Multiple: resnet101-bert
Create new IF model: multiple-resnet101-bert.h5
Model IF checkpoint loaded at epoch: 12

MVSA-Multiple: resnet101-bert-lstm
Create new IF model: multiple-resnet101-bert-lstm.h5
Model IF checkpoint loaded at epoch: 7

MVSA-Multiple: densenet201-bert-lstm
Create new IF model: multiple-densenet201-bert-lstm.h5
Model I

# Late Fusion

In [22]:
print('MVSA-Single with Late Fusion')
scores_text = []
scores_image = []
scores_LF = []
for i in range(len(feature_names)):
    print('MVSA-Single:', feature_names[i])
    if 'lstm' in feature_names[i]:
        score_text, score_image, score_LF = run_and_evaluate_LF('single-LF-' + feature_names[i],
                                                                mvsa_single_features[i][0], 
                                                                mvsa_single_features[i][1],
                                                                mvsa_single_multimodal_labels, 
                                                                verbose=0)
    else:
        score_text, score_image, score_LF = run_and_evaluate_LF('single-LF-' + feature_names[i],
                                                                mvsa_single_features[i][0], 
                                                                mvsa_single_features[i][1],
                                                                mvsa_single_multimodal_labels, 
                                                                verbose=0, lstm=False)    
    scores_text.append(score_text)
    scores_image.append(score_image)
    scores_LF.append(score_LF)
    print()

df1_single_scores_text = pd.DataFrame(scores_text, columns=['Accuracy', 'F1-macro', 'F1-weighted'], index=textual_feature_names)
df1_single_scores_image = pd.DataFrame(scores_image, columns=['Accuracy', 'F1-macro', 'F1-weighted'], index=visual_feature_names)
df1_single_scores_LF = pd.DataFrame(scores_LF, columns=['Accuracy', 'F1-macro', 'F1-weighted'], index=feature_names)

print('--------------------------------')
print('\nMVSA-Multiple with Late Fusion')
scores_text = []
scores_image = []
scores_LF = []
for i in range(len(feature_names)):
    print('MVSA-Multiple:', feature_names[i])
    if 'lstm' in feature_names[i]:
        score_text, score_image, score_LF = run_and_evaluate_LF('multiple-LF-' + feature_names[i],
                                                                mvsa_multiple_features[i][0], 
                                                                mvsa_multiple_features[i][1],
                                                                mvsa_multiple_multimodal_labels, 
                                                                verbose=0)
    else:
        score_text, score_image, score_LF = run_and_evaluate_LF('multiple-LF-' + feature_names[i],
                                                                mvsa_multiple_features[i][0], 
                                                                mvsa_multiple_features[i][1],
                                                                mvsa_multiple_multimodal_labels, 
                                                                verbose=0, lstm=False)
    scores_text.append(score_text)
    scores_image.append(score_image)
    scores_LF.append(score_LF)
    print()

df1_multiple_scores_text = pd.DataFrame(scores_text, columns=['Accuracy', 'F1-macro', 'F1-weighted'], index=textual_feature_names)
df1_multiple_scores_image = pd.DataFrame(scores_image, columns=['Accuracy', 'F1-macro', 'F1-weighted'], index=visual_feature_names)
df1_multiple_scores_LF = pd.DataFrame(scores_LF, columns=['Accuracy', 'F1-macro', 'F1-weighted'], index=feature_names)

MVSA-Single with Late Fusion
MVSA-Single: resnet101-bert
Create new text model: single-bert.h5
Create new image model: single-resnet101.h5
Model text checkpoint loaded at epoch: 17
Model image checkpoint loaded at epoch: 4
Model weights (text, image): [0.5398230185336202, 0.4601769814663798]

MVSA-Single: resnet101-bert-lstm
Create new text model: single-bert-lstm.h5
Create new image model: single-resnet101-lstm.h5
Model text checkpoint loaded at epoch: 4
Model image checkpoint loaded at epoch: 4
Model weights (text, image): [0.541300535460688, 0.458699464539312]

MVSA-Single: densenet201-bert-lstm
Create new image model: single-densenet201-lstm.h5
Model text checkpoint loaded at epoch: 4
Model image checkpoint loaded at epoch: 1
Model weights (text, image): [0.5141903262607549, 0.4858096737392451]

MVSA-Single: densenet201-bert-pos-lstm
Create new text model: single-bert-pos-lstm.h5
Model text checkpoint loaded at epoch: 4
Model image checkpoint loaded at epoch: 1
Model weights (text,

# Hybrid Fusion

In [23]:
print('MVSA-Single with Hybrid Fusion')
scores_text = []
scores_image = []
scores_IF = []
scores_HF = []
for i in range(len(feature_names)):
    print('MVSA-Single:', feature_names[i])
    if 'lstm' in feature_names[i]:
        score_text, score_image, score_IF, score_HF = run_and_evaluate_HF('single-HF-' + feature_names[i], 
                                                                          mvsa_single_features[i][0], 
                                                                          mvsa_single_features[i][1], 
                                                                          mvsa_single_multimodal_labels, 
                                                                          verbose=0)
    else:
        score_text, score_image, score_IF, score_HF = run_and_evaluate_HF('single-HF-' + feature_names[i], 
                                                                          mvsa_single_features[i][0], 
                                                                          mvsa_single_features[i][1], 
                                                                          mvsa_single_multimodal_labels, 
                                                                          verbose=0, lstm=False)
    scores_text.append(score_text)
    scores_image.append(score_image)
    scores_IF.append(score_IF)
    scores_HF.append(score_HF)
    print()

df2_single_scores_text = pd.DataFrame(scores_text, columns=['Accuracy', 'F1-macro', 'F1-weighted'], index=textual_feature_names)
df2_single_scores_image = pd.DataFrame(scores_image, columns=['Accuracy', 'F1-macro', 'F1-weighted'], index=visual_feature_names)
df2_single_scores_IF = pd.DataFrame(scores_IF, columns=['Accuracy', 'F1-macro', 'F1-weighted'], index=feature_names)
df2_single_scores_HF = pd.DataFrame(scores_HF, columns=['Accuracy', 'F1-macro', 'F1-weighted'], index=feature_names)

print('----------------------------------')
print('\nMVSA-Multiple with Hybrid Fusion')
scores_text = []
scores_image = []
scores_IF = []
scores_HF = []
for i in range(len(feature_names)):
    print('MVSA-Multiple:', feature_names[i])
    if 'lstm' in feature_names[i]:
        score_text, score_image, score_IF, score_HF = run_and_evaluate_HF('multiple-HF-' + feature_names[i], 
                                                                          mvsa_multiple_features[i][0], 
                                                                          mvsa_multiple_features[i][1], 
                                                                          mvsa_multiple_multimodal_labels, 
                                                                          verbose=0)
    else:
        score_text, score_image, score_IF, score_HF = run_and_evaluate_HF('multiple-HF-' + feature_names[i], 
                                                                          mvsa_multiple_features[i][0], 
                                                                          mvsa_multiple_features[i][1], 
                                                                          mvsa_multiple_multimodal_labels, 
                                                                          verbose=0, lstm=False)
    scores_text.append(score_text)
    scores_image.append(score_image)
    scores_IF.append(score_IF)
    scores_HF.append(score_HF)
    print()

df2_multiple_scores_text = pd.DataFrame(scores_text, columns=['Accuracy', 'F1-macro', 'F1-weighted'], index=textual_feature_names)
df2_multiple_scores_image = pd.DataFrame(scores_image, columns=['Accuracy', 'F1-macro', 'F1-weighted'], index=visual_feature_names)
df2_multiple_scores_IF = pd.DataFrame(scores_IF, columns=['Accuracy', 'F1-macro', 'F1-weighted'], index=feature_names)
df2_multiple_scores_HF = pd.DataFrame(scores_HF, columns=['Accuracy', 'F1-macro', 'F1-weighted'], index=feature_names)

MVSA-Single with Hybrid Fusion
MVSA-Single: resnet101-bert
Model text checkpoint loaded at epoch: 17
Model image checkpoint loaded at epoch: 4
Model IF checkpoint loaded at epoch: 10
Model weights (text, image, IF): [0.3513824917974442, 0.29953916165842837, 0.34907834654412745]

MVSA-Single: resnet101-bert-lstm
Model text checkpoint loaded at epoch: 4
Model image checkpoint loaded at epoch: 4
Model IF checkpoint loaded at epoch: 6
Model weights (text, image, IF): [0.35039818395954275, 0.29692832138249836, 0.3526734946579589]

MVSA-Single: densenet201-bert-lstm
Model text checkpoint loaded at epoch: 4
Model image checkpoint loaded at epoch: 1
Model IF checkpoint loaded at epoch: 1
Model weights (text, image, IF): [0.33995585349203306, 0.3211920447273425, 0.33885210178062447]

MVSA-Single: densenet201-bert-pos-lstm
Model text checkpoint loaded at epoch: 4
Model image checkpoint loaded at epoch: 1
Model IF checkpoint loaded at epoch: 1
Model weights (text, image, IF): [0.33966480839569085

# Display results

In [24]:
print('Intermediate Fusion')
display_dataframes((style_dataframe(df0_single_scores_IF), style_dataframe(df0_multiple_scores_IF)), 
                   names=['MVSA-Single', 'MVSA-Multiple'])

Intermediate Fusion


Unnamed: 0_level_0,Accuracy,F1-macro,F1-weighted
Unnamed: 0_level_1,Accuracy,F1-macro,F1-weighted
resnet101-bert,0.685144,0.484852,0.665077
resnet101-bert-lstm,0.691796,0.507728,0.686279
densenet201-bert-lstm,0.702882,0.529247,0.685298
densenet201-bert-pos-lstm,0.696231,0.515765,0.662206
densenet201-bert-pos-ner-lstm,0.694013,0.482955,0.659877
resnet101-bert,0.658049,0.425614,0.588077
resnet101-bert-lstm,0.660400,0.458568,0.607102
densenet201-bert-lstm,0.653937,0.383523,0.578272
densenet201-bert-pos-lstm,0.653937,0.3688,0.570745
densenet201-bert-pos-ner-lstm,0.662750,0.42914,0.598846

Unnamed: 0,Accuracy,F1-macro,F1-weighted
resnet101-bert,0.685144,0.484852,0.665077
resnet101-bert-lstm,0.691796,0.507728,0.686279
densenet201-bert-lstm,0.702882,0.529247,0.685298
densenet201-bert-pos-lstm,0.696231,0.515765,0.662206
densenet201-bert-pos-ner-lstm,0.694013,0.482955,0.659877

Unnamed: 0,Accuracy,F1-macro,F1-weighted
resnet101-bert,0.658049,0.425614,0.588077
resnet101-bert-lstm,0.6604,0.458568,0.607102
densenet201-bert-lstm,0.653937,0.383523,0.578272
densenet201-bert-pos-lstm,0.653937,0.3688,0.570745
densenet201-bert-pos-ner-lstm,0.66275,0.42914,0.598846


In [25]:
print('LATE FUSION \n')
print('MVSA-Single')
print(display_dataframes((style_dataframe(df1_single_scores_text), style_dataframe(df1_single_scores_image), 
                          style_dataframe(df1_single_scores_LF)), 
                         names=['Model Text', 'Model Image', 'Model LF']))
print('\nMVSA-Multiple')
print(display_dataframes((style_dataframe(df1_multiple_scores_text), style_dataframe(df1_multiple_scores_image), style_dataframe(df1_multiple_scores_LF)), 
                   names=['Model Text', 'Model Image', 'Model LF']))

LATE FUSION 

MVSA-Single


Unnamed: 0_level_0,Accuracy,F1-macro,F1-weighted
Unnamed: 0_level_1,Accuracy,F1-macro,F1-weighted
Unnamed: 0_level_2,Accuracy,F1-macro,F1-weighted
bert,0.678492,0.498274,0.668883
bert-lstm,0.682927,0.496424,0.676699
bert-lstm-2,0.682927,0.496424,0.676699
bert-pos-lstm,0.689579,0.513381,0.683022
bert-pos-ner-lstm,0.678492,0.494491,0.670036
resnet101,0.603104,0.250807,0.453788
resnet101-2,0.605322,0.255455,0.458822
densenet201,0.631929,0.403440,0.577151
densenet201-2,0.631929,0.403440,0.577151
densenet201-3,0.631929,0.403440,0.577151

Unnamed: 0,Accuracy,F1-macro,F1-weighted
bert,0.678492,0.498274,0.668883
bert-lstm,0.682927,0.496424,0.676699
bert-lstm-2,0.682927,0.496424,0.676699
bert-pos-lstm,0.689579,0.513381,0.683022
bert-pos-ner-lstm,0.678492,0.494491,0.670036

Unnamed: 0,Accuracy,F1-macro,F1-weighted
resnet101,0.603104,0.250807,0.453788
resnet101-2,0.605322,0.255455,0.458822
densenet201,0.631929,0.40344,0.577151
densenet201-2,0.631929,0.40344,0.577151
densenet201-3,0.631929,0.40344,0.577151

Unnamed: 0,Accuracy,F1-macro,F1-weighted
resnet101-bert,0.696231,0.4681,0.649377
resnet101-bert-lstm,0.696231,0.457255,0.65538
densenet201-bert-lstm,0.707317,0.515738,0.680127
densenet201-bert-pos-lstm,0.698448,0.511811,0.670896
densenet201-bert-pos-ner-lstm,0.700665,0.500778,0.670977


None

MVSA-Multiple


Unnamed: 0_level_0,Accuracy,F1-macro,F1-weighted
Unnamed: 0_level_1,Accuracy,F1-macro,F1-weighted
Unnamed: 0_level_2,Accuracy,F1-macro,F1-weighted
bert,0.652174,0.395419,0.569745
bert-lstm,0.659812,0.469962,0.615442
bert-lstm-2,0.659812,0.469962,0.615442
bert-pos-lstm,0.663925,0.430271,0.587615
bert-pos-ner-lstm,0.655112,0.471063,0.611821
resnet101,0.639248,0.259976,0.499026
resnet101-2,0.639835,0.260122,0.499306
densenet201,0.641011,0.283595,0.514231
densenet201-2,0.641011,0.283595,0.514231
densenet201-3,0.641011,0.283595,0.514231

Unnamed: 0,Accuracy,F1-macro,F1-weighted
bert,0.652174,0.395419,0.569745
bert-lstm,0.659812,0.469962,0.615442
bert-lstm-2,0.659812,0.469962,0.615442
bert-pos-lstm,0.663925,0.430271,0.587615
bert-pos-ner-lstm,0.655112,0.471063,0.611821

Unnamed: 0,Accuracy,F1-macro,F1-weighted
resnet101,0.639248,0.259976,0.499026
resnet101-2,0.639835,0.260122,0.499306
densenet201,0.641011,0.283595,0.514231
densenet201-2,0.641011,0.283595,0.514231
densenet201-3,0.641011,0.283595,0.514231

Unnamed: 0,Accuracy,F1-macro,F1-weighted
resnet101-bert,0.641011,0.268291,0.504117
resnet101-bert-lstm,0.646298,0.295803,0.517949
densenet201-bert-lstm,0.654524,0.343297,0.546975
densenet201-bert-pos-lstm,0.651586,0.323651,0.531924
densenet201-bert-pos-ner-lstm,0.656287,0.350686,0.549574


None


In [26]:
print('HYBRID FUSION \n')
print('MVSA-Single')
print(display_dataframes((style_dataframe(df2_single_scores_text), style_dataframe(df2_single_scores_image), 
                          style_dataframe(df2_single_scores_IF), style_dataframe(df2_single_scores_HF)), 
                   names=['Model Text', 'Model Image', 'Model IF', 'Model HF']))
print('\nMVSA-Multiple')
print(display_dataframes((style_dataframe(df2_multiple_scores_text), style_dataframe(df2_multiple_scores_image), 
                          style_dataframe(df2_multiple_scores_IF), style_dataframe(df2_multiple_scores_HF)), 
                   names=['Model Text', 'Model Image', 'Model IF', 'Model HF']))

HYBRID FUSION 

MVSA-Single


Unnamed: 0_level_0,Accuracy,F1-macro,F1-weighted
Unnamed: 0_level_1,Accuracy,F1-macro,F1-weighted
Unnamed: 0_level_2,Accuracy,F1-macro,F1-weighted
Unnamed: 0_level_3,Accuracy,F1-macro,F1-weighted
bert,0.678492,0.498274,0.668883
bert-lstm,0.682927,0.496424,0.676699
bert-lstm-2,0.682927,0.496424,0.676699
bert-pos-lstm,0.689579,0.513381,0.683022
bert-pos-ner-lstm,0.678492,0.494491,0.670036
resnet101,0.603104,0.250807,0.453788
resnet101-2,0.605322,0.255455,0.458822
densenet201,0.631929,0.403440,0.577151
densenet201-2,0.631929,0.403440,0.577151
densenet201-3,0.631929,0.403440,0.577151

Unnamed: 0,Accuracy,F1-macro,F1-weighted
bert,0.678492,0.498274,0.668883
bert-lstm,0.682927,0.496424,0.676699
bert-lstm-2,0.682927,0.496424,0.676699
bert-pos-lstm,0.689579,0.513381,0.683022
bert-pos-ner-lstm,0.678492,0.494491,0.670036

Unnamed: 0,Accuracy,F1-macro,F1-weighted
resnet101,0.603104,0.250807,0.453788
resnet101-2,0.605322,0.255455,0.458822
densenet201,0.631929,0.40344,0.577151
densenet201-2,0.631929,0.40344,0.577151
densenet201-3,0.631929,0.40344,0.577151

Unnamed: 0,Accuracy,F1-macro,F1-weighted
resnet101-bert,0.685144,0.484852,0.665077
resnet101-bert-lstm,0.691796,0.507728,0.686279
densenet201-bert-lstm,0.702882,0.529247,0.685298
densenet201-bert-pos-lstm,0.696231,0.515765,0.662206
densenet201-bert-pos-ner-lstm,0.694013,0.482955,0.659877

Unnamed: 0,Accuracy,F1-macro,F1-weighted
resnet101-bert,0.685144,0.482956,0.651235
resnet101-bert-lstm,0.709534,0.511889,0.688697
densenet201-bert-lstm,0.718404,0.52728,0.696729
densenet201-bert-pos-lstm,0.7051,0.511388,0.676691
densenet201-bert-pos-ner-lstm,0.7051,0.496143,0.673685


None

MVSA-Multiple


Unnamed: 0_level_0,Accuracy,F1-macro,F1-weighted
Unnamed: 0_level_1,Accuracy,F1-macro,F1-weighted
Unnamed: 0_level_2,Accuracy,F1-macro,F1-weighted
Unnamed: 0_level_3,Accuracy,F1-macro,F1-weighted
bert,0.652174,0.395419,0.569745
bert-lstm,0.659812,0.469962,0.615442
bert-lstm-2,0.659812,0.469962,0.615442
bert-pos-lstm,0.663925,0.430271,0.587615
bert-pos-ner-lstm,0.655112,0.471063,0.611821
resnet101,0.639248,0.259976,0.499026
resnet101-2,0.639835,0.260122,0.499306
densenet201,0.641011,0.283595,0.514231
densenet201-2,0.641011,0.283595,0.514231
densenet201-3,0.641011,0.283595,0.514231

Unnamed: 0,Accuracy,F1-macro,F1-weighted
bert,0.652174,0.395419,0.569745
bert-lstm,0.659812,0.469962,0.615442
bert-lstm-2,0.659812,0.469962,0.615442
bert-pos-lstm,0.663925,0.430271,0.587615
bert-pos-ner-lstm,0.655112,0.471063,0.611821

Unnamed: 0,Accuracy,F1-macro,F1-weighted
resnet101,0.639248,0.259976,0.499026
resnet101-2,0.639835,0.260122,0.499306
densenet201,0.641011,0.283595,0.514231
densenet201-2,0.641011,0.283595,0.514231
densenet201-3,0.641011,0.283595,0.514231

Unnamed: 0,Accuracy,F1-macro,F1-weighted
resnet101-bert,0.658049,0.425614,0.588077
resnet101-bert-lstm,0.6604,0.458568,0.607102
densenet201-bert-lstm,0.653937,0.383523,0.578272
densenet201-bert-pos-lstm,0.653937,0.3688,0.570745
densenet201-bert-pos-ner-lstm,0.66275,0.42914,0.598846

Unnamed: 0,Accuracy,F1-macro,F1-weighted
resnet101-bert,0.649236,0.326564,0.531965
resnet101-bert-lstm,0.6604,0.369435,0.558693
densenet201-bert-lstm,0.656287,0.355545,0.557187
densenet201-bert-pos-lstm,0.651586,0.332082,0.538461
densenet201-bert-pos-ner-lstm,0.662162,0.387319,0.572778


None


In [27]:
# get max values of late and intermediate fusion
df_single_higher_fusion = pd.DataFrame(np.where(df1_single_scores_LF.gt(df0_single_scores_IF.values), 
                                                df1_single_scores_LF.values, df0_single_scores_IF.values),
                                       columns=['Accuracy','F1-macro','F1-weighted'], index=feature_names)

df_multiple_higher_fusion = pd.DataFrame(np.where(df1_multiple_scores_LF.gt(df0_multiple_scores_IF.values),
                                                  df1_multiple_scores_LF.values, df0_multiple_scores_IF.values),
                                         columns=['Accuracy','F1-macro','F1-weighted'], index=feature_names)

df_single_subtract = df2_single_scores_HF.subtract(df_single_higher_fusion)
df_multiple_subtract = df2_multiple_scores_HF.subtract(df_multiple_higher_fusion)

In [28]:
print('Compare Hybrid Fusion with other Fusion Models (>scores)')
display_dataframes((df_single_subtract.style.applymap(highlight_neg), df_multiple_subtract.style.applymap(highlight_neg)), 
                   names=['MVSA-Single', 'MVSA-Multiple'])

Compare Hybrid Fusion with other Fusion Models (>scores)


Unnamed: 0_level_0,Accuracy,F1-macro,F1-weighted
Unnamed: 0_level_1,Accuracy,F1-macro,F1-weighted
resnet101-bert,-0.011086,-0.001897,-0.013842
resnet101-bert-lstm,0.013304,0.004161,0.002418
densenet201-bert-lstm,0.011086,-0.001967,0.011431
densenet201-bert-pos-lstm,0.006652,-0.004377,0.005795
densenet201-bert-pos-ner-lstm,0.004435,-0.004636,0.002708
resnet101-bert,-0.008813,-0.09905,-0.056112
resnet101-bert-lstm,-0.000000,-0.089132,-0.04841
densenet201-bert-lstm,0.001763,-0.027978,-0.021085
densenet201-bert-pos-lstm,-0.002350,-0.036718,-0.032284
densenet201-bert-pos-ner-lstm,-0.000588,-0.041821,-0.026068

Unnamed: 0,Accuracy,F1-macro,F1-weighted
resnet101-bert,-0.011086,-0.001897,-0.013842
resnet101-bert-lstm,0.013304,0.004161,0.002418
densenet201-bert-lstm,0.011086,-0.001967,0.011431
densenet201-bert-pos-lstm,0.006652,-0.004377,0.005795
densenet201-bert-pos-ner-lstm,0.004435,-0.004636,0.002708

Unnamed: 0,Accuracy,F1-macro,F1-weighted
resnet101-bert,-0.008813,-0.09905,-0.056112
resnet101-bert-lstm,-0.0,-0.089132,-0.04841
densenet201-bert-lstm,0.001763,-0.027978,-0.021085
densenet201-bert-pos-lstm,-0.00235,-0.036718,-0.032284
densenet201-bert-pos-ner-lstm,-0.000588,-0.041821,-0.026068


# Drafts

In [29]:
# import shutil
# def remove_folder(path):
#     # check if folder exists
#     if os.path.exists(path):
#          # remove if exists
#          shutil.rmtree(path)
#     else:
#          # throw your exception to handle this special scenario
#          raise XXError("your exception") 
# remove_folder("./model_checkpoint")
# remove_folder("./model_history")

In [30]:
# ## Choose best
# # Load text feature
# mvsa_single_bert, mvsa_multiple_bert = load_mvsa_feature('bert-base')
# mvsa_single_pos_bow, mvsa_multiple_pos_bow = load_mvsa_feature('pos-bow')
# mvsa_single_pos_tfidf, mvsa_multiple_pos_tfidf = load_mvsa_feature('pos-tfidf')
# mvsa_single_ner_bow, mvsa_multiple_ner_bow = load_mvsa_feature('ner-bow')
# mvsa_single_ner_tfidf, mvsa_multiple_ner_tfidf = load_mvsa_feature('ner-tfidf')

# ## Load image feature
# mvsa_single_vgg16, mvsa_multiple_vgg16 = load_mvsa_feature('vgg16')
# mvsa_single_vgg19, mvsa_multiple_vgg19 = load_mvsa_feature('vgg19')
# mvsa_single_resnet50, mvsa_multiple_resnet50 = load_mvsa_feature('resnet50')
# mvsa_single_resnet101, mvsa_multiple_resnet101 = load_mvsa_feature('resnet101')
# mvsa_single_resnet152, mvsa_multiple_resnet152 = load_mvsa_feature('resnet152')
# mvsa_single_densenet121, mvsa_multiple_densenet121 = load_mvsa_feature('densenet121')
# mvsa_single_densenet169, mvsa_multiple_densenet169 = load_mvsa_feature('densenet169')
# mvsa_single_densenet201, mvsa_multiple_densenet201 = load_mvsa_feature('densenet201')

# mvsa_single_bert_pos = np.concatenate((mvsa_single_bert, mvsa_single_pos_tfidf), axis=1)
# mvsa_single_bert_ner = np.concatenate((mvsa_single_bert, mvsa_single_ner_tfidf), axis=1)
# mvsa_single_bert_pos_ner = np.concatenate((mvsa_single_bert, mvsa_single_pos_tfidf, mvsa_single_ner_tfidf), axis=1)

# mvsa_multiple_bert_pos = np.concatenate((mvsa_multiple_bert, mvsa_multiple_pos_tfidf), axis=1)
# mvsa_multiple_bert_ner = np.concatenate((mvsa_multiple_bert, mvsa_multiple_ner_tfidf), axis=1)
# mvsa_multiple_bert_pos_ner = np.concatenate((mvsa_multiple_bert, mvsa_multiple_pos_tfidf, mvsa_multiple_ner_tfidf), axis=1)

In [31]:
# mvsa_single_features_split = get_preprocess_input(feature_names, mvsa_single_features, mvsa_single_multimodal_labels)
# mvsa_multiple_features_split = get_preprocess_input(feature_names, mvsa_multiple_features, mvsa_multiple_multimodal_labels)
# mvsa_single_features, mvsa_single_multimodal_labels = shuffle_mvsa(mvsa_single_features, mvsa_single_multimodal_labels)
# mvsa_multiple_features, mvsa_multiple_multimodal_labels = shuffle_mvsa(mvsa_multiple_features, mvsa_multiple_multimodal_labels)

In [32]:
# # save shuffle indices for other experiments consistency (temporary fix)
# random_idx_single = np.random.permutation(len(mvsa_single_multimodal_labels))
# random_idx_multiple = np.random.permutation(len(mvsa_multiple_multimodal_labels))
# np.save('mvsa-single-shuffle-indices.npy', random_idx_single)
# np.save('mvsa-multiple-shuffle-indices.npy', random_idx_multiple)