In [1]:
SEED = 61

import numpy as np
import tensorflow as tf
import random as python_random
import os

def reset_seeds():
    np.random.seed(SEED) 
    python_random.seed(SEED)
    tf.random.set_seed(SEED)
    os.environ["PYTHONHASHSEED"] = str(SEED)
    
import os
import re
import gc
import tensorflow_addons as tfa
import h5py
import torch
import matplotlib.pyplot as plt
import pandas as pd

from tqdm import tqdm
from nltk import tokenize

from sklearn import preprocessing
from sklearn.decomposition import PCA
from sklearn.metrics import f1_score, accuracy_score, confusion_matrix, ConfusionMatrixDisplay, classification_report

from keras import backend as K
from keras import initializers,regularizers,constraints
from keras.preprocessing.text import Tokenizer, text_to_word_sequence
from keras.preprocessing.sequence import pad_sequences
from keras.utils.np_utils import to_categorical
from keras.layers import Reshape, Input, Embedding, Flatten, Dense, Dropout, BatchNormalization, Activation #, merge
from keras.layers import TimeDistributed, LSTM, GRU, Bidirectional, Convolution1D, MaxPooling1D, MaxPooling2D
from keras.layers.core import RepeatVector #, Reshape
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from keras.models import Sequential, Model, load_model
from sklearn.model_selection import RepeatedKFold, KFold
from sklearn.model_selection import cross_val_score

from tensorflow.python.keras.layers import Layer, InputSpec, Lambda

from transformers import BertTokenizer, BertForMaskedLM, BertModel

# from tensorflow.keras import Model
# from attention import Attention_input1, Attention_input2
# from keras.optimizers import SGD, RMSprop, Adagrad

In [2]:
def read_hdf5(path):
    read_file = h5py.File(path, 'r')
    
    feature_names = list(read_file.keys())
    loaded_data = []
    
    for name in feature_names:
        dataset = read_file[name][:]
        if dataset.dtype == np.dtype('object'):
            dataset = np.array([x.decode('UTF-8') for x in dataset])            
        loaded_data.append((name, dataset))

    return loaded_data

def loadz(path):
    data = np.load(path)['arr_0']
    return data

In [3]:
def load_labels(path):
    data = read_hdf5(path)

    for x in data:
        if x[0] == 'multimodal-labels':
            labels = x[1]
        if x[0] == 'text-labels':
            text_labels = x[1]
        if x[0] == 'image-labels':
            image_labels = x[1]
        
    return labels, text_labels, image_labels

def merge_mvsa(mvsa_single, mvsa_multiple):
    mvsa = np.concatenate((mvsa_single, mvsa_multiple), axis=0)
    return mvsa

def load_mvsa_feature(feature_name, merge=False):
    folder_path = os.path.join('../input/mvsa-features/', feature_name)
    single_file = 'mvsa-single-{}.npz'.format(feature_name)
    multiple_file = 'mvsa-multiple-{}.npz'.format(feature_name)
    mvsa_single = loadz(os.path.join(folder_path, single_file))
    mvsa_multiple = loadz(os.path.join(folder_path, multiple_file))
    
    if merge == True:
        return merge_mvsa(mvsa_single, mvsa_multiple)
    
    return mvsa_single, mvsa_multiple

In [4]:
# e.g. validation_split=0.1 -----> 8:1:1 ratio of train, val, test
def split_data(data, validation_split):
    num_val = int(validation_split * data.shape[0])
    data_train = data[:-(num_val*2)]
    data_val = data[-(num_val*2):-(num_val)]
    data_test = data[-num_val:]
    return data_train, data_val, data_test

In [5]:
def weighted_average(weights, probs):
    '''
    weights: weights list (or array)
    probs: probability distributions array
    '''
    output_probs = []
    weighted_probs = [probs[i] * weights[i] for i in range(len(weights))]
    for i in range(len(probs[0])):
        sum_prob = np.zeros(len(probs[0][0]))
        for j in range(len(weights)):
            sum_prob = np.sum((sum_prob, weighted_probs[j][i]), axis=0)
        output_probs.append(sum_prob)
    return np.asarray(output_probs, dtype='float32')

In [6]:
def get_average_weights(*scores, inverse=False):
    # inverse weights in case of the smaller score value, the bigger weight
    weights = []
    for score in scores:
        weights.append(score/np.sum(scores))
    
    if inverse == True:
        inverse_weights = []
        inverse = [1/weight for weight in weights]
        for inv in inverse:
            inverse_weights.append(inv/np.sum(inverse))
        weights = inverse_weights

    return weights

In [7]:
def create_model_text(input_shape):
    f1_score = tfa.metrics.F1Score(num_classes=NUM_CLASSES, average='macro')
    
    text_input = Input(shape=input_shape)
    reshape_text = Reshape((1, -1)) (text_input)
    lstm = LSTM(NUM_LSTM) (reshape_text)
    outputs = Dense(NUM_CLASSES, activation='softmax') (lstm)
    
    model = Model(text_input, outputs)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy', f1_score]) # f1 #tf.keras.metrics.AUC()
    
    return model

In [8]:
def create_model_text_no_lstm(input_shape):
    f1_score = tfa.metrics.F1Score(num_classes=NUM_CLASSES, average='macro')
    
    text_input = Input(shape=input_shape)
    outputs = Dense(NUM_CLASSES, activation='softmax') (text_input)
    
    model = Model(text_input, outputs)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy', f1_score]) # f1 #tf.keras.metrics.AUC()
    
    return model

In [9]:
def create_model_image(input_shape):
    f1_score = tfa.metrics.F1Score(num_classes=NUM_CLASSES, average='macro')
    
    image_input = Input(shape=input_shape)
    outputs = Dense(NUM_CLASSES, activation='softmax') (image_input)
    
    model = Model(image_input, outputs)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy', f1_score]) # f1 #tf.keras.metrics.AUC()
    return model

In [10]:
def create_model_IF(text_shape, image_shape):
    f1_score = tfa.metrics.F1Score(num_classes=NUM_CLASSES, average='macro')
    
    image_input = Input(shape=image_shape)
    text_input = Input(shape=text_shape)
    text_reshape = Reshape((1, -1)) (text_input)
    text_lstm = LSTM(NUM_LSTM) (text_reshape)
#     text_lstm = Dropout(DROPOUT_RATE) (text_lstm)
    text_image_concat = tf.keras.layers.Concatenate(axis=1)([text_lstm, image_input])
    concat_self_attention = tf.keras.layers.Attention() ([text_image_concat, text_image_concat])
#     concat_self_attention = Dropout(DROPOUT_RATE) (concat_self_attention)
    concat_softmax = Dense(NUM_CLASSES, activation='softmax') (concat_self_attention)
    
    model = Model([text_input, image_input], concat_softmax)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy', f1_score]) # f1 #tf.keras.metrics.AUC()
    
    return model

In [11]:
def create_model_IF_no_lstm(text_shape, image_shape):
    f1_score = tfa.metrics.F1Score(num_classes=NUM_CLASSES, average='macro')
    
    image_input = Input(shape=image_shape)
    text_input = Input(shape=text_shape)
#     text_reshape = Reshape((1, -1)) (text_input)
#     text_lstm = LSTM(NUM_LSTM) (text_reshape)
#     text_lstm = Dropout(DROPOUT_RATE) (text_lstm)
    text_image_concat = tf.keras.layers.Concatenate(axis=1)([text_input, image_input])
    concat_self_attention = tf.keras.layers.Attention() ([text_image_concat, text_image_concat])
#     concat_self_attention = Dropout(DROPOUT_RATE) (concat_self_attention)
    concat_softmax = Dense(NUM_CLASSES, activation='softmax') (concat_self_attention)
    
    model = Model([text_input, image_input], concat_softmax)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy', f1_score]) # f1 #tf.keras.metrics.AUC()
    
    return model

In [12]:
def old_run_and_evaluate_HF(name, X1, X2, y, verbose=0):
    # train and test only with multimodal labels
    ''' 
    X1: text input
    X2: image input
    y: labels
    verbose: 0 or 1 to print tracking on progress
    '''
    y = le.fit_transform(y)
    y = to_categorical(np.asarray(y))
    
    X1_train, X1_val, X1_test = split_data(X1, VALIDATION_SPLIT)
    X2_train, X2_val, X2_test = split_data(X2, VALIDATION_SPLIT)
    y_train, y_val, y_test = split_data(y, VALIDATION_SPLIT)
    
    model_text = create_model_text(X1_train.shape[1:])
    model_image = create_model_image(X2_train.shape[1:])
    model_IF = create_model_IF(X1_train.shape[1:], X2_train.shape[1:])

    early_stopping1 = EarlyStopping(monitor='val_loss', min_delta=1e-4, patience=EARLY_STOPPING)
    early_stopping2 = EarlyStopping(monitor='val_loss', min_delta=1e-4, patience=EARLY_STOPPING)
    early_stopping3 = EarlyStopping(monitor='val_loss', min_delta=1e-4, patience=EARLY_STOPPING)

    checkpoint_text = ModelCheckpoint('./model_checkpoint/{}-text.h5'.format(name), save_best_only=True, verbose=verbose)
    checkpoint_image = ModelCheckpoint('./model_checkpoint/{}-image.h5'.format(name), save_best_only=True, verbose=verbose)
    checkpoint_IF = ModelCheckpoint('./model_checkpoint/{}-IF.h5'.format(name), save_best_only=True, verbose=verbose)

    history_text = model_text.fit(X1_train, y_train, validation_data=(X1_val, y_val), 
                        epochs=EPOCHS, batch_size=BATCH_SIZE, verbose=verbose,
                        callbacks=[checkpoint_text, early_stopping1])
    
    history_image = model_image.fit(X2_train, y_train, validation_data=(X2_val, y_val), 
                        epochs=EPOCHS, batch_size=BATCH_SIZE, verbose=verbose,
                        callbacks=[checkpoint_image, early_stopping2])
    
    history_IF = model_IF.fit([X1_train, X2_train], y_train, validation_data=([X1_val, X2_val], y_val), 
                        epochs=EPOCHS, batch_size=BATCH_SIZE, verbose=verbose,
                        callbacks=[checkpoint_IF, early_stopping3])
    
    best_val_loss_text = np.min(history_text.history['val_loss'])
    best_val_loss_image = np.min(history_image.history['val_loss'])
    best_val_loss_IF = np.min(history_IF.history['val_loss'])
    
    weights = get_average_weights(best_val_loss_text, best_val_loss_image, best_val_loss_IF,
                                  inverse=True)

    model_text = load_model('./model_checkpoint/{}-text.h5'.format(name))
    model_image = load_model('./model_checkpoint/{}-image.h5'.format(name))
    model_IF = load_model('./model_checkpoint/{}-IF.h5'.format(name))

    y_pred_text = model_text.predict(X1_test)
    y_pred_image = model_image.predict(X2_test)
    y_pred_IF = model_IF.predict([X1_test, X2_test])
    
    y_pred = weighted_average(weights, np.asarray([y_pred_text, y_pred_image, y_pred_IF], dtype='float32'))
    
    best_epoch_text = np.argmin(history_text.history['val_loss'])
    best_epoch_image = np.argmin(history_image.history['val_loss'])
    best_epoch_IF = np.argmin(history_IF.history['val_loss'])

    print('Checkpoint of text model loaded at epoch:', best_epoch_text)
    print('Checkpoint of image model loaded at epoch:', best_epoch_image)
    print('Checkpoint of IF model loaded at epoch:', best_epoch_IF)

    return evaluate_LF(y_test, y_pred, verbose=verbose)

In [13]:
def run_and_evaluate_HF(name, X1, X2, y, y1, y2, verbose=0):
    # train with separate labels of each modality
    # test with multimodal labels
    ''' 
    X1: text input
    X2: image input
    y: multimodal labels
    y1: text labels
    y2: image labels
    verbose: 0 or 1 to print tracking on progress
    '''
    y = le.fit_transform(y)
    y = to_categorical(np.asarray(y))
    
    y1 = le.fit_transform(y1)
    y1 = to_categorical(np.asarray(y1))
    
    y2 = le.fit_transform(y2)
    y2 = to_categorical(np.asarray(y2))
    
    X1_train, X1_val, X1_test = split_data(X1, VALIDATION_SPLIT)
    X2_train, X2_val, X2_test = split_data(X2, VALIDATION_SPLIT)
    y_train, y_val, y_test = split_data(y, VALIDATION_SPLIT)
    y1_train, y1_val, y1_test = split_data(y1, VALIDATION_SPLIT)
    y2_train, y2_val, y2_test = split_data(y2, VALIDATION_SPLIT)

    model_text = create_model_text(X1_train.shape[1:])
    model_image = create_model_image(X2_train.shape[1:])
    model_IF = create_model_IF(X1_train.shape[1:], X2_train.shape[1:])

    early_stopping1 = EarlyStopping(monitor='val_loss', min_delta=1e-4, patience=EARLY_STOPPING)
    early_stopping2 = EarlyStopping(monitor='val_loss', min_delta=1e-4, patience=EARLY_STOPPING)
    early_stopping3 = EarlyStopping(monitor='val_loss', min_delta=1e-4, patience=EARLY_STOPPING)

    checkpoint_text = ModelCheckpoint('./model_checkpoint/{}-text.h5'.format(name), save_best_only=True, verbose=verbose)
    checkpoint_image = ModelCheckpoint('./model_checkpoint/{}-image.h5'.format(name), save_best_only=True, verbose=verbose)
    checkpoint_IF = ModelCheckpoint('./model_checkpoint/{}-IF.h5'.format(name), save_best_only=True, verbose=verbose)

    history_text = model_text.fit(X1_train, y1_train, validation_data=(X1_val, y1_val), 
                        epochs=EPOCHS, batch_size=BATCH_SIZE, verbose=verbose,
                        callbacks=[checkpoint_text, early_stopping1])
    
    history_image = model_image.fit(X2_train, y2_train, validation_data=(X2_val, y2_val), 
                        epochs=EPOCHS, batch_size=BATCH_SIZE, verbose=verbose,
                        callbacks=[checkpoint_image, early_stopping2])
    
    history_IF = model_IF.fit([X1_train, X2_train], y_train, validation_data=([X1_val, X2_val], y_val), 
                        epochs=EPOCHS, batch_size=BATCH_SIZE, verbose=verbose,
                        callbacks=[checkpoint_IF, early_stopping3])
    
    best_val_loss_text = np.min(history_text.history['val_loss'])
    best_val_loss_image = np.min(history_image.history['val_loss'])
    best_val_loss_IF = np.min(history_IF.history['val_loss'])
    
    weights = get_average_weights(best_val_loss_text, best_val_loss_image, best_val_loss_IF,
                                  inverse=True)

    model_text = load_model('./model_checkpoint/{}-text.h5'.format(name))
    model_image = load_model('./model_checkpoint/{}-image.h5'.format(name))
    model_IF = load_model('./model_checkpoint/{}-IF.h5'.format(name))

    y_pred_text = model_text.predict(X1_test)
    y_pred_image = model_image.predict(X2_test)
    y_pred_IF = model_IF.predict([X1_test, X2_test])
    
    y_pred = weighted_average(weights, np.asarray([y_pred_text, y_pred_image, y_pred_IF], dtype='float32'))
    
    best_epoch_text = np.argmin(history_text.history['val_loss'])
    best_epoch_image = np.argmin(history_image.history['val_loss'])
    best_epoch_IF = np.argmin(history_IF.history['val_loss'])

    print('Checkpoint of text model loaded at epoch:', best_epoch_text)
    print('Checkpoint of image model loaded at epoch:', best_epoch_image)
    print('Checkpoint of IF model loaded at epoch:', best_epoch_IF)

    return evaluate_LF(y_test, y_pred, verbose=verbose)

In [14]:
def run_and_evaluate_HF_no_lstm(name, X1, X2, y, y1, y2, verbose=0):
    ''' 
    X1: text input
    X2: image input
    y: multimodal labels
    y1: text labels
    y2: image labels
    verbose: 0 or 1 to print tracking on progress
    '''
    y = le.fit_transform(y)
    y = to_categorical(np.asarray(y))
    
    y1 = le.fit_transform(y1)
    y1 = to_categorical(np.asarray(y1))
    
    y2 = le.fit_transform(y2)
    y2 = to_categorical(np.asarray(y2))
    
    X1_train, X1_val, X1_test = split_data(X1, VALIDATION_SPLIT)
    X2_train, X2_val, X2_test = split_data(X2, VALIDATION_SPLIT)
    y_train, y_val, y_test = split_data(y, VALIDATION_SPLIT)
    y1_train, y1_val, y1_test = split_data(y1, VALIDATION_SPLIT)
    y2_train, y2_val, y2_test = split_data(y2, VALIDATION_SPLIT)

    model_text = create_model_text_no_lstm(X1_train.shape[1:])
    model_image = create_model_image(X2_train.shape[1:])
    model_IF = create_model_IF_no_lstm(X1_train.shape[1:], X2_train.shape[1:])

    early_stopping1 = EarlyStopping(monitor='val_loss', min_delta=1e-4, patience=EARLY_STOPPING)
    early_stopping2 = EarlyStopping(monitor='val_loss', min_delta=1e-4, patience=EARLY_STOPPING)
    early_stopping3 = EarlyStopping(monitor='val_loss', min_delta=1e-4, patience=EARLY_STOPPING)

    checkpoint_text = ModelCheckpoint('./model_checkpoint/{}-text.h5'.format(name), save_best_only=True, verbose=verbose)
    checkpoint_image = ModelCheckpoint('./model_checkpoint/{}-image.h5'.format(name), save_best_only=True, verbose=verbose)
    checkpoint_IF = ModelCheckpoint('./model_checkpoint/{}-IF.h5'.format(name), save_best_only=True, verbose=verbose)

    history_text = model_text.fit(X1_train, y1_train, validation_data=(X1_val, y1_val), 
                        epochs=EPOCHS, batch_size=BATCH_SIZE, verbose=verbose,
                        callbacks=[checkpoint_text, early_stopping1])
    
    history_image = model_image.fit(X2_train, y2_train, validation_data=(X2_val, y2_val), 
                        epochs=EPOCHS, batch_size=BATCH_SIZE, verbose=verbose,
                        callbacks=[checkpoint_image, early_stopping2])
    
    history_IF = model_IF.fit([X1_train, X2_train], y_train, validation_data=([X1_val, X2_val], y_val), 
                        epochs=EPOCHS, batch_size=BATCH_SIZE, verbose=verbose,
                        callbacks=[checkpoint_IF, early_stopping3])
    
    best_val_loss_text = np.min(history_text.history['val_loss'])
    best_val_loss_image = np.min(history_image.history['val_loss'])
    best_val_loss_IF = np.min(history_IF.history['val_loss'])
    
    weights = get_average_weights(best_val_loss_text, best_val_loss_image, best_val_loss_IF,
                                  inverse=True)

    model_text = load_model('./model_checkpoint/{}-text.h5'.format(name))
    model_image = load_model('./model_checkpoint/{}-image.h5'.format(name))
    model_IF = load_model('./model_checkpoint/{}-IF.h5'.format(name))

    y_pred_text = model_text.predict(X1_test)
    y_pred_image = model_image.predict(X2_test)
    y_pred_IF = model_IF.predict([X1_test, X2_test])
    
    y_pred = weighted_average(weights, np.asarray([y_pred_text, y_pred_image, y_pred_IF], dtype='float32'))
    
    best_epoch_text = np.argmin(history_text.history['val_loss'])
    best_epoch_image = np.argmin(history_image.history['val_loss'])
    best_epoch_IF = np.argmin(history_IF.history['val_loss'])

    print('Checkpoint of text model loaded at epoch:', best_epoch_text)
    print('Checkpoint of image model loaded at epoch:', best_epoch_image)
    print('Checkpoint of IF model loaded at epoch:', best_epoch_IF)

    return evaluate_LF(y_test, y_pred, verbose=verbose)

In [15]:
def evaluate_LF(y_true, y_pred, verbose=0):
    
    y_pred = le.inverse_transform(y_pred.argmax(axis=1))
    y_true = le.inverse_transform(y_true.argmax(axis=1))

    acc = accuracy_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred, average='macro')

    if verbose == 1:
        print(classification_report(y_true, y_pred))
        matrix = confusion_matrix(y_true, y_pred,
                                  labels=list(le.classes_))
        cm_disp = ConfusionMatrixDisplay(confusion_matrix=matrix,
                                  display_labels=list(le.classes_))
        cm_disp.plot()
        plt.show()

    return acc, f1

In [16]:
def plot_metrics(history):
    fig = plt.figure(figsize=(20, 5))

    fig.add_subplot(1, 3, 1)
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('LOSS')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'val'], loc='best')

    fig.add_subplot(1, 3, 2)
    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title('ACCURACY')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'val'], loc='best')

    fig.add_subplot(1, 3, 3)
    plt.plot(history.history['f1_score'])
    plt.plot(history.history['val_f1_score'])
    plt.title('F1-SCORE')
    plt.ylabel('f1-score')
    plt.xlabel('epoch')
    plt.legend(['train', 'val'], loc='best')

    plt.show()

In [17]:
def style_dataframe(dataframe):
    return dataframe.style.highlight_max(subset=['Accuracy', 'F1-score'], props='color:lawngreen', axis=0)\
                          .highlight_min(subset=['Accuracy', 'F1-score'], props='color:tomato', axis=0)

In [18]:
from IPython.display import display_html

def display_dataframes(dfs, names=[], index=False):
    def to_df(x):
        if isinstance(x, pd.Series):
            return pd.DataFrame(x)
        else:
            return x
    html_str = ''
    if names:
        html_str += ('<tr>' + 
                     ''.join(f'<td style="text-align:center">{name}</td>' for name in names) + 
                     '</tr>')
    html_str += ('<tr>' + 
                 ''.join(f'<td style="vertical-align:top"> {to_df(df).to_html()}</td>' 
                         for df in dfs) + 
                 '</tr>')
    html_str = f'<table>{html_str}</table>'
    html_str = html_str.replace('table','table style="display:inline"')
    display_html(html_str, raw=True)

# Load data

In [19]:
## Choose best
# Load text feature
mvsa_single_bert, mvsa_multiple_bert = load_mvsa_feature('bert-base')
mvsa_single_pos_bow, mvsa_multiple_pos_bow = load_mvsa_feature('pos-bow')
mvsa_single_pos_tfidf, mvsa_multiple_pos_tfidf = load_mvsa_feature('pos-tfidf')
mvsa_single_ner_bow, mvsa_multiple_ner_bow = load_mvsa_feature('ner-bow')
mvsa_single_ner_tfidf, mvsa_multiple_ner_tfidf = load_mvsa_feature('ner-tfidf')

## Load image feature
mvsa_single_vgg16, mvsa_multiple_vgg16 = load_mvsa_feature('vgg16')
mvsa_single_vgg19, mvsa_multiple_vgg19 = load_mvsa_feature('vgg19')
mvsa_single_resnet50, mvsa_multiple_resnet50 = load_mvsa_feature('resnet50')
mvsa_single_resnet101, mvsa_multiple_resnet101 = load_mvsa_feature('resnet101')
mvsa_single_resnet152, mvsa_multiple_resnet152 = load_mvsa_feature('resnet152')
mvsa_single_densenet121, mvsa_multiple_densenet121 = load_mvsa_feature('densenet121')
mvsa_single_densenet169, mvsa_multiple_densenet169 = load_mvsa_feature('densenet169')
mvsa_single_densenet201, mvsa_multiple_densenet201 = load_mvsa_feature('densenet201')

In [20]:
mvsa_single_bert_pos = np.concatenate((mvsa_single_bert, mvsa_single_pos_tfidf), axis=1)
mvsa_single_bert_pos_ner = np.concatenate((mvsa_single_bert, mvsa_single_pos_tfidf, mvsa_single_ner_tfidf), axis=1)

mvsa_multiple_bert_pos = np.concatenate((mvsa_multiple_bert, mvsa_multiple_pos_tfidf), axis=1)
mvsa_multiple_bert_pos_ner = np.concatenate((mvsa_multiple_bert, mvsa_multiple_pos_tfidf, mvsa_multiple_ner_tfidf), axis=1)

In [21]:
mvsa_single_multimodal_labels, mvsa_single_text_labels, mvsa_single_image_labels = load_labels('../input/mvsa-features/labels/mvsa-single-labels.hdf5')
mvsa_multiple_multimodal_labels, mvsa_multiple_text_labels, mvsa_multiple_image_labels = load_labels('../input/mvsa-features/labels/mvsa-multiple-labels.hdf5')

In [22]:
mvsa_single_text_features = np.concatenate((mvsa_single_bert, mvsa_single_pos_tfidf), axis=1)
mvsa_multiple_text_features = np.concatenate((mvsa_multiple_bert, mvsa_multiple_pos_tfidf), axis=1)

mvsa_single_image_features = mvsa_single_densenet201
mvsa_multiple_image_features = mvsa_multiple_densenet201

print('Text vector shape:', mvsa_single_text_features.shape[1:])
print('Image vector shape:', mvsa_single_image_features.shape[1:])

Text vector shape: (802,)
Image vector shape: (1920,)


In [23]:
# prepare all features data

feature_names = ['resnet152-bert', 'resnet152-bert-lstm', 'densenet201-bert-pos-lstm', 'densenet201-bert-pos-ner-lstm']

mvsa_single_features = [[mvsa_single_bert, mvsa_single_resnet152],
                        [mvsa_single_bert, mvsa_single_resnet152],
                        [mvsa_single_bert_pos, mvsa_single_densenet201],
                        [mvsa_single_bert_pos_ner, mvsa_single_densenet201]]

mvsa_multiple_features = [[mvsa_multiple_bert, mvsa_multiple_resnet152],
                          [mvsa_multiple_bert, mvsa_multiple_resnet152],
                          [mvsa_multiple_bert_pos, mvsa_multiple_densenet201],
                          [mvsa_multiple_bert_pos_ner, mvsa_multiple_densenet201]]

In [24]:
le = preprocessing.LabelEncoder()
le.fit(mvsa_single_multimodal_labels)
NUM_CLASSES = len(le.classes_) # = 3

In [25]:
reset_seeds()
EPOCHS = 100
BATCH_SIZE = 128
VALIDATION_SPLIT = 0.1
EARLY_STOPPING = 100
NUM_LSTM = 128
DROPOUT_RATE = 0.1

In [26]:
print('MVSA-Single\n')
# run model with all features data of MVSA-Single
mvsa_single_scores = []

for i in range(len(feature_names)):
    print('MVSA-Single:', feature_names[i])
    if 'lstm' in feature_names[i]:
        scores = run_and_evaluate_HF('single-' + feature_names[i], mvsa_single_features[i][0], mvsa_single_features[i][1], 
                                     mvsa_single_multimodal_labels, mvsa_single_text_labels, mvsa_single_image_labels,
                                     verbose=0)
    else:
        scores = run_and_evaluate_HF_no_lstm('single-' + feature_names[i], mvsa_single_features[i][0], mvsa_single_features[i][1],
                                             mvsa_single_multimodal_labels, mvsa_single_text_labels, mvsa_single_image_labels,
                                             verbose=0)
    mvsa_single_scores.append(scores)
    print()

df_single_scores = pd.DataFrame(mvsa_single_scores, columns=['Accuracy', 'F1-score'], index=feature_names)

MVSA-Single

MVSA-Single: resnet152-bert


2022-07-01 14:14:12.440562: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-01 14:14:12.441719: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-01 14:14:12.442405: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-01 14:14:12.443315: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compil

Checkpoint of text model loaded at epoch: 37
Checkpoint of image model loaded at epoch: 93
Checkpoint of IF model loaded at epoch: 57

MVSA-Single: resnet152-bert-lstm


2022-07-01 14:15:03.816508: I tensorflow/stream_executor/cuda/cuda_dnn.cc:369] Loaded cuDNN version 8005


Checkpoint of text model loaded at epoch: 10
Checkpoint of image model loaded at epoch: 93
Checkpoint of IF model loaded at epoch: 62

MVSA-Single: densenet201-bert-pos-lstm
Checkpoint of text model loaded at epoch: 6
Checkpoint of image model loaded at epoch: 6
Checkpoint of IF model loaded at epoch: 5

MVSA-Single: densenet201-bert-pos-ner-lstm
Checkpoint of text model loaded at epoch: 6
Checkpoint of image model loaded at epoch: 5
Checkpoint of IF model loaded at epoch: 8



In [27]:
print('MVSA-Multiple\n')
# run model with all features data of MVSA-Single
mvsa_multiple_scores = []

for i in range(len(feature_names)):
    print('MVSA-Multiple:', feature_names[i])
    if 'lstm' in feature_names[i]:
        scores = run_and_evaluate_HF('multiple-' + feature_names[i], mvsa_multiple_features[i][0], mvsa_multiple_features[i][1], 
                                     mvsa_multiple_multimodal_labels, mvsa_multiple_text_labels, mvsa_multiple_image_labels,
                                     verbose=0)
    else:
        scores = run_and_evaluate_HF_no_lstm('multiple-' + feature_names[i], mvsa_multiple_features[i][0], mvsa_multiple_features[i][1],
                                             mvsa_multiple_multimodal_labels, mvsa_multiple_text_labels, mvsa_multiple_image_labels,
                                             verbose=0)
    mvsa_multiple_scores.append(scores)
    print()

df_multiple_scores = pd.DataFrame(mvsa_multiple_scores, columns=['Accuracy', 'F1-score'], index=feature_names)

MVSA-Multiple

MVSA-Multiple: resnet152-bert
Checkpoint of text model loaded at epoch: 13
Checkpoint of image model loaded at epoch: 24
Checkpoint of IF model loaded at epoch: 10

MVSA-Multiple: resnet152-bert-lstm
Checkpoint of text model loaded at epoch: 2
Checkpoint of image model loaded at epoch: 24
Checkpoint of IF model loaded at epoch: 76

MVSA-Multiple: densenet201-bert-pos-lstm
Checkpoint of text model loaded at epoch: 2
Checkpoint of image model loaded at epoch: 2
Checkpoint of IF model loaded at epoch: 2

MVSA-Multiple: densenet201-bert-pos-ner-lstm
Checkpoint of text model loaded at epoch: 3
Checkpoint of image model loaded at epoch: 2
Checkpoint of IF model loaded at epoch: 2



In [28]:
display_dataframes((style_dataframe(df_single_scores), style_dataframe(df_multiple_scores)), 
                   names=['MVSA-Single', 'MVSA-Multiple'])

Unnamed: 0_level_0,Accuracy,F1-score
Unnamed: 0_level_1,Accuracy,F1-score
resnet152-bert,0.577726,0.443661
resnet152-bert-lstm,0.535963,0.393433
densenet201-bert-pos-lstm,0.642691,0.487239
densenet201-bert-pos-ner-lstm,0.638051,0.499243
resnet152-bert,0.639194,0.286297
resnet152-bert-lstm,0.641636,0.294535
densenet201-bert-pos-lstm,0.671551,0.48446
densenet201-bert-pos-ner-lstm,0.659951,0.481986
MVSA-Single,MVSA-Multiple,
Accuracy  F1-score  resnet152-bert  0.577726  0.443661  resnet152-bert-lstm  0.535963  0.393433  densenet201-bert-pos-lstm  0.642691  0.487239  densenet201-bert-pos-ner-lstm  0.638051  0.499243,Accuracy  F1-score  resnet152-bert  0.639194  0.286297  resnet152-bert-lstm  0.641636  0.294535  densenet201-bert-pos-lstm  0.671551  0.484460  densenet201-bert-pos-ner-lstm  0.659951  0.481986,

Unnamed: 0,Accuracy,F1-score
resnet152-bert,0.577726,0.443661
resnet152-bert-lstm,0.535963,0.393433
densenet201-bert-pos-lstm,0.642691,0.487239
densenet201-bert-pos-ner-lstm,0.638051,0.499243

Unnamed: 0,Accuracy,F1-score
resnet152-bert,0.639194,0.286297
resnet152-bert-lstm,0.641636,0.294535
densenet201-bert-pos-lstm,0.671551,0.48446
densenet201-bert-pos-ner-lstm,0.659951,0.481986
