<a href="https://www.kaggle.com/code/vincemarcs/mvsa-image-models?scriptVersionId=101004654" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [1]:
SEED = 61


import os
import re
import gc
import h5py
import torch
import numpy as np
import pandas as pd
import tensorflow as tf
import random as python_random
import matplotlib.pyplot as plt
import tensorflow_addons as tfa

from tqdm import tqdm
from nltk import tokenize

from sklearn import preprocessing
from sklearn.decomposition import PCA
from IPython.display import display_html
from imblearn.over_sampling import SMOTE
from imblearn.over_sampling import BorderlineSMOTE

from keras import backend as K
from keras import initializers,regularizers,constraints
from keras.preprocessing.text import Tokenizer, text_to_word_sequence
from keras.preprocessing.sequence import pad_sequences
from keras.utils.np_utils import to_categorical
from keras.layers import Reshape, Input, Embedding, Flatten, Dense, Dropout, BatchNormalization, Activation, RepeatVector, Permute
from keras.layers import TimeDistributed, LSTM, GRU, Bidirectional, Convolution1D, MaxPooling1D, MaxPool2D, Convolution2D
from keras.layers.core import RepeatVector, Reshape
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from keras.models import Sequential, Model, load_model
from sklearn.model_selection import cross_val_score

def reset_seeds():
    np.random.seed(SEED) 
    python_random.seed(SEED)
    tf.random.set_seed(SEED)
    os.environ["PYTHONHASHSEED"] = str(SEED)

# from tensorflow.python.keras.layers import Layer, InputSpec, Lambda
# from tensorflow.keras import Model
# from attention import Attention_input1, Attention_input2
# from keras.optimizers import SGD, RMSprop, Adagrad

In [2]:
def read_hdf5(path):
    read_file = h5py.File(path, 'r')

    feature_names = list(read_file.keys())
    loaded_data = []
    
    for name in feature_names:
        dataset = read_file[name][:]
        if dataset.dtype == np.dtype('object'):
            dataset = np.array([x.decode('UTF-8') for x in dataset])            
        loaded_data.append((name, dataset))

    return loaded_data

def loadz(path):
    data = np.load(path)['arr_0']
    return data

In [3]:
def load_labels(path):
    data = read_hdf5(path)

    for x in data:
        if x[0] == 'multimodal-labels':
            labels = x[1]
        if x[0] == 'text-labels':
            text_labels = x[1]
        if x[0] == 'image-labels':
            image_labels = x[1]

    return labels, text_labels, image_labels

def load_mvsa_feature(feature_name, merge=False):
    folder_path = os.path.join('../input/mvsa-features/', feature_name)
    single_file = 'mvsa-single-{}.npz'.format(feature_name)
    multiple_file = 'mvsa-multiple-{}.npz'.format(feature_name)
    mvsa_single = loadz(os.path.join(folder_path, single_file))
    mvsa_multiple = loadz(os.path.join(folder_path, multiple_file))
    
    if merge == True:
        return merge_mvsa(mvsa_single, mvsa_multiple)
    
    return mvsa_single, mvsa_multiple

def load_mvsa_images(merge=False):
    folder_path = '../input/mvsa-data'
    file_paths = os.listdir(folder_path)
    for path in file_paths:
        file_name = os.path.split(path)[1]
        if file_name.split('.')[1] == 'npz':
            if file_name.split('-')[1] == 'single':
                mvsa_single_images_path = os.path.join(folder_path, path)
            else:
                mvsa_multiple_images_path = os.path.join(folder_path, path)
    
    mvsa_single = loadz(mvsa_single_images_path)
    mvsa_multiple = loadz(mvsa_multiple_images_path)
    
    if merge == True:
        return merge_mvsa(mvsa_single, mvsa_multiple)
    
    return mvsa_single, mvsa_multiple

def merge_mvsa(mvsa_single, mvsa_multiple):
    mvsa = np.concatenate((mvsa_single, mvsa_multiple), axis=0)
    return mvsa

In [4]:
def plot_metrics(history):
    fig = plt.figure(figsize=(20, 5))

    fig.add_subplot(1, 4, 1)
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('LOSS')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'val'], loc='best')

    fig.add_subplot(1, 4, 2)
    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title('ACCURACY')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'val'], loc='best')

    fig.add_subplot(1, 4, 3)
    plt.plot(history.history['f1_macro'])
    plt.plot(history.history['val_f1_macro'])
    plt.title('Macro F1-SCORE')
    plt.ylabel('f1-macro')
    plt.xlabel('epoch')
    plt.legend(['train', 'val'], loc='best')
    
    fig.add_subplot(1, 4, 4)
    plt.plot(history.history['f1_weighted'])
    plt.plot(history.history['val_f1_weighted'])
    plt.title('Weighted F1-SCORE')
    plt.ylabel('f1-weighted')
    plt.xlabel('epoch')
    plt.legend(['train', 'val'], loc='best')

    plt.show()

In [5]:
# e.g. validation_split=0.1 -----> 8:1:1 ratio of train, val, test
def split_data(data, validation_split):
    num_val = int(validation_split * len(data))
    data_train = data[:-(num_val*2)]
    data_val = data[-(num_val*2):-(num_val)]
    data_test = data[-num_val:]
    return data_train, data_val, data_test

# e.g. validation_split=0.1 -----> 8:1:1 ratio of train, val, test
def split_tf_data(data, validation_split):
    DATASET_SIZE = len(data)
    train_size = int((1-validation_split*2) * DATASET_SIZE)
    val_size = int(validation_split * DATASET_SIZE)
    test_size = int(validation_split * DATASET_SIZE)

#     full_dataset = tf.data.TFRecordDataset(FLAGS.input_file)
#     full_dataset = full_dataset.shuffle()
    train_dataset = data.take(train_size)
    test_dataset = data.skip(train_size)
    val_dataset = test_dataset.skip(test_size)
    test_dataset = test_dataset.take(test_size)
    return train_dataset, val_dataset, test_dataset

In [6]:
def create_model_pretrained(input_shape):
    f1_macro = tfa.metrics.F1Score(num_classes=NUM_CLASSES, average='macro', name='f1_macro')
    f1_weighted = tfa.metrics.F1Score(num_classes=NUM_CLASSES, average='weighted', name='f1_weighted')

    image_input = Input(shape=input_shape)
    
    # soft attention
    attention = Dense(1, activation='tanh') (image_input)
    attention = Flatten() (attention)
    attention = Activation('softmax') (attention)
    attention = RepeatVector(NUM_HIDDEN) (attention)
    attention = Permute([2, 1]) (attention)
    attention = Flatten() (attention)
    
    outputs = Dense(NUM_CLASSES, activation='softmax') (attention)
    
    model = Model(image_input, outputs)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy', f1_macro, f1_weighted])
    return model

def create_model_cnn(input_shape):
    f1_macro = tfa.metrics.F1Score(num_classes=NUM_CLASSES, average='macro', name='f1_macro')
    f1_weighted = tfa.metrics.F1Score(num_classes=NUM_CLASSES, average='weighted', name='f1_weighted')

    image_inputs = Input(shape=input_shape)
#     x = Reshape((1, input_shape[0], input_shape[1], input_shape[2])) (image_inputs)
    x = Convolution2D(32, kernel_size=(5,5), strides=(1,1), activation='relu') (image_inputs)
#     x = Reshape((x.shape[2], x.shape[3], x.shape[4])) (x)
    x = MaxPool2D(pool_size=(2,2)) (x)
    x = Convolution2D(64, kernel_size=(5,5), strides=(1,1), activation='relu') (x)
    x = MaxPool2D(pool_size=(2,2)) (x)
    x = Convolution2D(128, kernel_size=(5,5), strides=(1,1), activation='relu') (x)
    x = MaxPool2D(pool_size=(2,2)) (x)
    x = Flatten() (x)
    x = Dense(64, activation='relu') (x)
    outputs = Dense(NUM_CLASSES, activation='softmax') (x)

    model = Model(image_inputs, outputs)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy', f1_macro, f1_weighted])
    return model

In [7]:
def evaluate_model(model, X_test, y_test, checkpoint=None, verbose=1):
    if checkpoint is not None:
        model = load_model('./model_checkpoint/{}.h5'.format(checkpoint))
    
    loss, acc, f1_macro, f1_weighted = model.evaluate(X_test, y_test, verbose=verbose)
    
    if verbose == 1:
        print('Loss:', loss)
        print('Accuracy:', acc)
        print('Macro F1-score:', f1_macro)
        print('Weighted F1-score:', f1_weighted)
        
    return loss, acc, f1_macro, f1_weighted

In [8]:
def run_and_evaluate(name, X, y, verbose=0):
    y = le.fit_transform(y)
    y = to_categorical(np.asarray(y))
    
    random_idx = np.random.permutation(len(y))
    X, y = X[random_idx], y[random_idx]

    X_train, X_val, X_test = split_data(X, VALIDATION_SPLIT)
    y_train, y_val, y_test = split_data(y, VALIDATION_SPLIT)
    
    oversample = BorderlineSMOTE(sampling_strategy='all', random_state=SEED, kind='borderline-1')
#     oversample = SMOTE(sampling_strategy='minority', random_state=SEED)
    X_train, y_train = oversample.fit_resample(X_train, y_train)
    
    model = create_model_pretrained(X_train.shape[1:])
    early_stopping = EarlyStopping(monitor='val_loss', min_delta=1e-4, patience=EARLY_STOPPING)
    checkpoint = ModelCheckpoint('./model_checkpoint/{}.h5'.format(name), save_best_only=True, verbose=verbose)
    
    history = model.fit(X_train, y_train, validation_data=(X_val, y_val), 
                                   epochs=EPOCHS, batch_size=BATCH_SIZE, verbose=verbose,
                                   callbacks=[checkpoint, early_stopping])
    if verbose == 1:
        best_epoch = np.argmin(history.history['val_loss'])
        print('Checkpoint loaded at epoch:', best_epoch)
    
    return history, evaluate_model(model, X_test, y_test, checkpoint=name, verbose=verbose)

In [9]:
def run_and_evaluate_cnn(name, X, y, verbose=0):
    y = le.fit_transform(y)
    y = to_categorical(np.asarray(y))
    
    random_idx = np.random.permutation(len(y))
    X, y = X[random_idx], y[random_idx]

    X_train, X_val, X_test = split_data(X, VALIDATION_SPLIT)
    y_train, y_val, y_test = split_data(y, VALIDATION_SPLIT)
    
    oversample = BorderlineSMOTE(sampling_strategy='minority', random_state=SEED, kind='borderline-1')
#     oversample = SMOTE(sampling_strategy='minority', random_state=SEED)
    X_train, y_train = oversample.fit_resample(X_train, y_train)
    
    train_set = tf.data.Dataset.from_tensor_slices((X_train, y_train)).batch(BATCH_SIZE)
    val_set = tf.data.Dataset.from_tensor_slices((X_val, y_val)).batch(BATCH_SIZE)
    test_set = tf.data.Dataset.from_tensor_slices((X_test, y_test)).batch(BATCH_SIZE)
    
    model = create_model_cnn(X_train.shape[1:])
    early_stopping = EarlyStopping(monitor='val_loss', min_delta=1e-4, patience=EARLY_STOPPING)
    checkpoint = ModelCheckpoint('./model_checkpoint/{}.h5'.format(name), save_best_only=True, verbose=verbose)
    
    history = model.fit(train_set, validation_data=val_set, 
                                   epochs=EPOCHS, batch_size=BATCH_SIZE, verbose=verbose,
                                   callbacks=[checkpoint, early_stopping])
    if verbose == 1:
        best_epoch = np.argmin(history.history['val_loss'])
        print('Checkpoint loaded at epoch:', best_epoch)
    
#     return history, evaluate_model(model, X_test, y_test, checkpoint=name, verbose=verbose)

In [10]:
def style_dataframe(dataframe):
    return dataframe.style.highlight_max(subset=['Accuracy', 'F1-macro', 'F1-weighted'], props='color:lawngreen', axis=0)\
                          .highlight_min(subset=['Accuracy', 'F1-macro', 'F1-weighted'], props='color:tomato', axis=0)\
                          .highlight_min(subset=['Loss'], props='color:lawngreen', axis=0)\
                          .highlight_max(subset=['Loss'], props='color:tomato', axis=0)

def display_dataframes(dfs, names=[], index=False):
    def to_df(x):
        if isinstance(x, pd.Series):
            return pd.DataFrame(x)
        else:
            return x
    html_str = ''
    if names:
        html_str += ('<tr>' + 
                     ''.join(f'<td style="text-align:center">{name}</td>' for name in names) + 
                     '</tr>')
    html_str += ('<tr>' + 
                 ''.join(f'<td style="vertical-align:top"> {to_df(df).to_html()}</td>' 
                         for df in dfs) + 
                 '</tr>')
    html_str = f'<table>{html_str}</table>'
    html_str = html_str.replace('table','table style="display:inline"')
    display_html(html_str, raw=True)

# Load data

In [11]:
mvsa_single_multimodal_labels, mvsa_single_text_labels, mvsa_single_image_labels = load_labels('../input/mvsa-features/labels/mvsa-single-labels.hdf5')
mvsa_multiple_multimodal_labels, mvsa_multiple_text_labels, mvsa_multiple_image_labels = load_labels('../input/mvsa-features/labels/mvsa-multiple-labels.hdf5')

mvsa_multimodal_labels = merge_mvsa(mvsa_single_multimodal_labels, mvsa_multiple_multimodal_labels)
mvsa_text_labels = merge_mvsa(mvsa_single_text_labels, mvsa_multiple_text_labels)
mvsa_image_labels = merge_mvsa(mvsa_single_image_labels, mvsa_multiple_image_labels)

le = preprocessing.LabelEncoder()
le.fit(mvsa_multimodal_labels)
NUM_CLASSES = len(le.classes_) # =3

In [12]:
# prepare all features data
feature_names = ['xception', 'vgg16', 'vgg19', 'resnet50', 'resnet101', 'resnet152', 'densenet121', 'densenet169', 'densenet201']

mvsa_single_features = []
mvsa_multiple_features = []
mvsa_features = []

for name in tqdm(feature_names):
    data = load_mvsa_feature(name)
    merge_data = merge_mvsa(data[0], data[1])

    mvsa_single_features.append(data[0])
    mvsa_multiple_features.append(data[1])
    mvsa_features.append(merge_data)

100%|██████████| 9/9 [00:15<00:00,  1.67s/it]


# Run models and Evalution display

In [13]:
reset_seeds()
EPOCHS = 100
BATCH_SIZE = 128
VALIDATION_SPLIT = 0.1
EARLY_STOPPING = 10
NUM_HIDDEN = 128

## With original image labels

In [14]:
print('MVSA-Single: With original image labels')
mvsa_single_histories = []
mvsa_single_scores = []
for i in range(len(feature_names)):        
    print('MVSA-Single:', feature_names[i])
    if feature_names[i] == 'cnn':
        history, score = run_and_evaluate_cnn('single-OL-' + feature_names[i], mvsa_single_features[i], mvsa_single_image_labels, verbose=0)
    else:
        history, score = run_and_evaluate('single-OL-' + feature_names[i], mvsa_single_features[i], mvsa_single_image_labels, verbose=0)
    mvsa_single_histories.append(history)
    mvsa_single_scores.append(score)
    print()
df_single_scores = pd.DataFrame(mvsa_single_scores, columns=['Loss', 'Accuracy', 'F1-macro', 'F1-weighted'], index=feature_names)

print('MVSA-Multiple: With original image labels')
mvsa_multiple_histories = []
mvsa_multiple_scores = []
for i in range(len(feature_names)):
#     print('MVSA-Multiple:', feature_names[i])
    if feature_names[i] == 'cnn':
        history, score = run_and_evaluate_cnn('multiple-OL-' + feature_names[i], mvsa_multiple_features[i], mvsa_multiple_image_labels, verbose=1)
    else:
        history, score = run_and_evaluate('multiple-OL-' + feature_names[i], mvsa_multiple_features[i], mvsa_multiple_image_labels, verbose=0)
    mvsa_multiple_histories.append(history)
    mvsa_multiple_scores.append(score)
    print()
df_multiple_scores = pd.DataFrame(mvsa_multiple_scores, columns=['Loss', 'Accuracy', 'F1-macro', 'F1-weighted'], index=feature_names)

mvsa_average_scores = np.mean([mvsa_single_scores, mvsa_multiple_scores], axis=0)
df_average_scores = pd.DataFrame(mvsa_average_scores, columns=['Loss', 'Accuracy', 'F1-macro', 'F1-weighted'], index=feature_names)

MVSA-Single: With original image labels
MVSA-Single: xception


2022-07-16 19:11:00.146817: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-16 19:11:00.147999: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-16 19:11:00.148713: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-16 19:11:00.150018: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compil


MVSA-Single: vgg16

MVSA-Single: vgg19

MVSA-Single: resnet50

MVSA-Single: resnet101

MVSA-Single: resnet152

MVSA-Single: densenet121

MVSA-Single: densenet169

MVSA-Single: densenet201

MVSA-Multiple: With original image labels



2022-07-16 19:12:47.153854: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 373161984 exceeds 10% of free system memory.
2022-07-16 19:12:47.550397: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 373161984 exceeds 10% of free system memory.





2022-07-16 19:13:29.348007: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 375963648 exceeds 10% of free system memory.
2022-07-16 19:13:29.743758: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 375963648 exceeds 10% of free system memory.











## With multimodal labels

In [15]:
print('MVSA-Single: With multimodal labels')
mvsa_single_histories2 = []
mvsa_single_scores2 = []
for i in range(len(feature_names)):
    print('MVSA-Single:', feature_names[i])
    history, score = run_and_evaluate('single-ML-' + feature_names[i], mvsa_single_features[i], mvsa_single_multimodal_labels, verbose=0)
    mvsa_single_histories2.append(history)
    mvsa_single_scores2.append(score)
    print()
df_single_scores2 = pd.DataFrame(mvsa_single_scores2, columns=['Loss', 'Accuracy', 'F1-macro', 'F1-weighted'], index=feature_names)

print('MVSA-Multiple: With multimodal labels')
mvsa_multiple_histories2 = []
mvsa_multiple_scores2 = []
for i in range(len(feature_names)):
    print('MVSA-Multiple:', feature_names[i])
    history, score = run_and_evaluate('multiple-ML-' + feature_names[i], mvsa_multiple_features[i], mvsa_multiple_multimodal_labels, verbose=0)
    mvsa_multiple_histories2.append(history)
    mvsa_multiple_scores2.append(score)
    print()
df_multiple_scores2 = pd.DataFrame(mvsa_multiple_scores2, columns=['Loss', 'Accuracy', 'F1-macro', 'F1-weighted'], index=feature_names)

mvsa_average_scores2 = np.mean([mvsa_single_scores2, mvsa_multiple_scores2], axis=0)
df_average_scores2 = pd.DataFrame(mvsa_average_scores2, columns=['Loss', 'Accuracy', 'F1-macro', 'F1-weighted'], index=feature_names)

MVSA-Single: With multimodal labels
MVSA-Single: xception

MVSA-Single: vgg16

MVSA-Single: vgg19

MVSA-Single: resnet50

MVSA-Single: resnet101

MVSA-Single: resnet152

MVSA-Single: densenet121

MVSA-Single: densenet169

MVSA-Single: densenet201

MVSA-Multiple: With multimodal labels
MVSA-Multiple: xception

MVSA-Multiple: vgg16


2022-07-16 19:17:42.664529: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 445956096 exceeds 10% of free system memory.



MVSA-Multiple: vgg19

MVSA-Multiple: resnet50

MVSA-Multiple: resnet101

MVSA-Multiple: resnet152

MVSA-Multiple: densenet121

MVSA-Multiple: densenet169

MVSA-Multiple: densenet201



## With merge MVSA data

In [16]:
print('Both MVSA: With original image labels')
mvsa_histories3 = []
mvsa_scores3 = []
for i in range(len(feature_names)):
    print('Both MVSA:', feature_names[i])
    history, score = run_and_evaluate('merge-OL-' + feature_names[i], mvsa_features[i], mvsa_image_labels, verbose=0)
    mvsa_histories3.append(history)
    mvsa_scores3.append(score)
    print()
df_scores3 = pd.DataFrame(mvsa_scores3, columns=['Loss', 'Accuracy', 'F1-macro', 'F1-weighted'], index=feature_names)

print('Both MVSA: With multimodal labels')
mvsa_histories4 = []
mvsa_scores4 = []
for i in range(len(feature_names)):
    print('Both MVSA:', feature_names[i])
    history, score = run_and_evaluate('merge-ML-' + feature_names[i], mvsa_features[i], mvsa_multimodal_labels, verbose=0)
    mvsa_histories4.append(history)
    mvsa_scores4.append(score)
    print()
df_scores4 = pd.DataFrame(mvsa_scores4, columns=['Loss', 'Accuracy', 'F1-macro', 'F1-weighted'], index=feature_names)

Both MVSA: With original image labels
Both MVSA: xception

Both MVSA: vgg16

Both MVSA: vgg19

Both MVSA: resnet50

Both MVSA: resnet101

Both MVSA: resnet152

Both MVSA: densenet121

Both MVSA: densenet169

Both MVSA: densenet201

Both MVSA: With multimodal labels
Both MVSA: xception

Both MVSA: vgg16

Both MVSA: vgg19

Both MVSA: resnet50

Both MVSA: resnet101

Both MVSA: resnet152

Both MVSA: densenet121

Both MVSA: densenet169

Both MVSA: densenet201



# Display results

In [17]:
print('With original image labels\n')
display_dataframes((style_dataframe(df_single_scores), style_dataframe(df_multiple_scores), style_dataframe(df_average_scores)), 
                   names=['MVSA-Single', 'MVSA-Multiple', 'Average'])

With original image labels



Unnamed: 0_level_0,Loss,Accuracy,F1-macro,F1-weighted
Unnamed: 0_level_1,Loss,Accuracy,F1-macro,F1-weighted
Unnamed: 0_level_2,Loss,Accuracy,F1-macro,F1-weighted
xception,1.054548,0.538803,0.233429,0.377317
vgg16,1.062425,0.569845,0.241996,0.413701
vgg19,1.052269,0.541020,0.234053,0.379882
resnet50,1.074689,0.521064,0.228377,0.356997
resnet101,1.045285,0.545455,0.235294,0.385027
resnet152,1.053954,0.534368,0.232177,0.372204
densenet121,1.082569,0.527716,0.230285,0.364576
densenet169,1.063227,0.574279,0.243192,0.418981
densenet201,1.077033,0.560976,0.239583,0.403201
xception,1.029477,0.565805,0.240901,0.408908

Unnamed: 0,Loss,Accuracy,F1-macro,F1-weighted
xception,1.054548,0.538803,0.233429,0.377317
vgg16,1.062425,0.569845,0.241996,0.413701
vgg19,1.052269,0.54102,0.234053,0.379882
resnet50,1.074689,0.521064,0.228377,0.356997
resnet101,1.045285,0.545455,0.235294,0.385027
resnet152,1.053954,0.534368,0.232177,0.372204
densenet121,1.082569,0.527716,0.230285,0.364576
densenet169,1.063227,0.574279,0.243192,0.418981
densenet201,1.077033,0.560976,0.239583,0.403201

Unnamed: 0,Loss,Accuracy,F1-macro,F1-weighted
xception,1.029477,0.565805,0.240901,0.408908
vgg16,1.019414,0.576381,0.243757,0.42149
vgg19,1.033594,0.555817,0.238167,0.397132
resnet50,1.012134,0.554642,0.237843,0.395753
resnet101,1.056545,0.384841,0.185264,0.213891
resnet152,1.043184,0.548179,0.236053,0.388198
densenet121,1.017849,0.554054,0.237681,0.395065
densenet169,1.043015,0.554642,0.237843,0.395753
densenet201,1.01663,0.554642,0.237843,0.395753

Unnamed: 0,Loss,Accuracy,F1-macro,F1-weighted
xception,1.042013,0.552304,0.237165,0.393113
vgg16,1.04092,0.573113,0.242877,0.417596
vgg19,1.042931,0.548418,0.23611,0.388507
resnet50,1.043412,0.537853,0.23311,0.376375
resnet101,1.050915,0.465148,0.210279,0.299459
resnet152,1.048569,0.541273,0.234115,0.380201
densenet121,1.050209,0.540885,0.233983,0.37982
densenet169,1.053121,0.56446,0.240518,0.407367
densenet201,1.046831,0.557809,0.238713,0.399477


In [18]:
print('With multimodal labels\n')
display_dataframes((style_dataframe(df_single_scores2), style_dataframe(df_multiple_scores2), style_dataframe(df_average_scores2)), 
                   names=['MVSA-Single', 'MVSA-Multiple', 'Average'])

With multimodal labels



Unnamed: 0_level_0,Loss,Accuracy,F1-macro,F1-weighted
Unnamed: 0_level_1,Loss,Accuracy,F1-macro,F1-weighted
Unnamed: 0_level_2,Loss,Accuracy,F1-macro,F1-weighted
xception,1.037735,0.549889,0.236528,0.390193
vgg16,1.063173,0.576497,0.243788,0.421629
vgg19,1.034420,0.611973,0.253095,0.464662
resnet50,1.046799,0.598670,0.249653,0.448379
resnet101,1.075334,0.603104,0.250807,0.453788
resnet152,1.059452,0.636364,0.259259,0.494949
densenet121,1.042632,0.587583,0.246741,0.434943
densenet169,1.054547,0.589800,0.247327,0.43762
densenet201,1.041065,0.589800,0.247327,0.43762
xception,1.033827,0.667450,0.266855,0.534336

Unnamed: 0,Loss,Accuracy,F1-macro,F1-weighted
xception,1.037735,0.549889,0.236528,0.390193
vgg16,1.063173,0.576497,0.243788,0.421629
vgg19,1.03442,0.611973,0.253095,0.464662
resnet50,1.046799,0.59867,0.249653,0.448379
resnet101,1.075334,0.603104,0.250807,0.453788
resnet152,1.059452,0.636364,0.259259,0.494949
densenet121,1.042632,0.587583,0.246741,0.434943
densenet169,1.054547,0.5898,0.247327,0.43762
densenet201,1.041065,0.5898,0.247327,0.43762

Unnamed: 0,Loss,Accuracy,F1-macro,F1-weighted
xception,1.033827,0.66745,0.266855,0.534336
vgg16,1.033603,0.6604,0.265157,0.525328
vgg19,0.986844,0.670388,0.267558,0.538102
resnet50,1.042523,0.66745,0.266855,0.534336
resnet101,1.059898,0.671563,0.267838,0.539611
resnet152,1.008665,0.678613,0.269513,0.548686
densenet121,1.01773,0.673913,0.268398,0.542631
densenet169,1.019091,0.653349,0.263445,0.516364
densenet201,1.048005,0.680964,0.270069,0.551721

Unnamed: 0,Loss,Accuracy,F1-macro,F1-weighted
xception,1.035781,0.60867,0.251691,0.462265
vgg16,1.048388,0.618448,0.254473,0.473479
vgg19,1.010632,0.641181,0.260326,0.501382
resnet50,1.044661,0.63306,0.258254,0.491358
resnet101,1.067616,0.637334,0.259323,0.496699
resnet152,1.034058,0.657489,0.264386,0.521818
densenet121,1.030181,0.630748,0.25757,0.488787
densenet169,1.036819,0.621575,0.255386,0.476992
densenet201,1.044535,0.635382,0.258698,0.494671


In [19]:
print('With both MVSA merged together\n')
display_dataframes((style_dataframe(df_scores3), style_dataframe(df_scores4)), 
                   names=['Original image labels', 'Multimodal labels'])

With both MVSA merged together



Unnamed: 0_level_0,Loss,Accuracy,F1-macro,F1-weighted
Unnamed: 0_level_1,Loss,Accuracy,F1-macro,F1-weighted
xception,1.049950,0.552253,0.237183,0.392955
vgg16,1.063368,0.354854,0.174609,0.185882
vgg19,1.030849,0.563864,0.240372,0.406612
resnet50,1.051347,0.558755,0.238975,0.400586
resnet101,1.054521,0.558291,0.238848,0.400039
resnet152,1.042467,0.572225,0.242639,0.416532
densenet121,1.058003,0.570831,0.242263,0.414874
densenet169,1.042707,0.557826,0.23872,0.399493
densenet201,1.075828,0.559684,0.23923,0.401679
xception,1.019876,0.660938,0.265287,0.526015

Unnamed: 0,Loss,Accuracy,F1-macro,F1-weighted
xception,1.04995,0.552253,0.237183,0.392955
vgg16,1.063368,0.354854,0.174609,0.185882
vgg19,1.030849,0.563864,0.240372,0.406612
resnet50,1.051347,0.558755,0.238975,0.400586
resnet101,1.054521,0.558291,0.238848,0.400039
resnet152,1.042467,0.572225,0.242639,0.416532
densenet121,1.058003,0.570831,0.242263,0.414874
densenet169,1.042707,0.557826,0.23872,0.399493
densenet201,1.075828,0.559684,0.23923,0.401679

Unnamed: 0,Loss,Accuracy,F1-macro,F1-weighted
xception,1.019876,0.660938,0.265287,0.526015
vgg16,1.025927,0.640037,0.260172,0.499559
vgg19,1.031437,0.655365,0.263936,0.518922
resnet50,1.015492,0.654436,0.26371,0.517743
resnet101,1.0617,0.660474,0.265175,0.525423
resnet152,1.061815,0.637715,0.259595,0.496643
densenet121,1.036565,0.640502,0.260287,0.500143
densenet169,1.039863,0.647004,0.261891,0.508334
densenet201,1.024191,0.668834,0.267186,0.53611


# Dratfs

In [20]:
# # load separate
# mvsa_single_images, mvsa_multiple_images = load_mvsa_images()
# mvsa_single_xception, mvsa_multiple_xception = load_mvsa_feature('xception')
# mvsa_single_vgg16, mvsa_multiple_vgg16 = load_mvsa_feature('vgg16')
# mvsa_single_vgg19, mvsa_multiple_vgg19 = load_mvsa_feature('vgg19')
# mvsa_single_resnet50, mvsa_multiple_resnet50 = load_mvsa_feature('resnet50')
# mvsa_single_resnet101, mvsa_multiple_resnet101 = load_mvsa_feature('resnet101')
# mvsa_single_resnet152, mvsa_multiple_resnet152 = load_mvsa_feature('resnet152')
# mvsa_single_densenet121, mvsa_multiple_densenet121 = load_mvsa_feature('densenet121')
# mvsa_single_densenet169, mvsa_multiple_densenet169 = load_mvsa_feature('densenet169')
# mvsa_single_densenet201, mvsa_multiple_densenet201 = load_mvsa_feature('densenet201')

# # load merge
# mvsa_images = merge_mvsa(mvsa_single_images, mvsa_multiple_images)
# mvsa_xception = merge_mvsa(mvsa_single_xception, mvsa_multiple_xception)
# mvsa_vgg16 = merge_mvsa(mvsa_single_vgg16, mvsa_multiple_vgg16)
# mvsa_vgg19 = merge_mvsa(mvsa_single_vgg19, mvsa_multiple_vgg19)
# mvsa_resnet50 = merge_mvsa(mvsa_single_resnet50, mvsa_multiple_resnet50)
# mvsa_resnet101 = merge_mvsa(mvsa_single_resnet101, mvsa_multiple_resnet101)
# mvsa_resnet152 = merge_mvsa(mvsa_single_resnet152, mvsa_multiple_resnet152)
# mvsa_densenet121 = merge_mvsa(mvsa_single_densenet121, mvsa_multiple_densenet121)
# mvsa_densenet169 = merge_mvsa(mvsa_single_densenet169, mvsa_multiple_densenet169)
# mvsa_densenet201 = merge_mvsa(mvsa_single_densenet201, mvsa_multiple_densenet201)

# # prepare all features data
# feature_names = ['cnn', 'xception', 'vgg16', 'vgg19', 'resnet50', 'resnet101', 'resnet152', 'densenet121', 'densenet169', 'densenet201']

# mvsa_single_features = [mvsa_single_images,
#                         mvsa_single_xception,
#                         mvsa_single_vgg16, mvsa_single_vgg19, 
#                         mvsa_single_resnet50, mvsa_single_resnet101, mvsa_single_resnet152, 
#                         mvsa_single_densenet121, mvsa_single_densenet169, mvsa_single_densenet201]

# mvsa_multiple_features = [mvsa_multiple_images,
#                           mvsa_multiple_xception,
#                           mvsa_multiple_vgg16, mvsa_multiple_vgg19, 
#                           mvsa_multiple_resnet50, mvsa_multiple_resnet101, mvsa_multiple_resnet152, 
#                           mvsa_multiple_densenet121, mvsa_multiple_densenet169, mvsa_multiple_densenet201]

# mvsa_features = [mvsa_images,
#                  mvsa_xception,
#                  mvsa_vgg16, mvsa_vgg19, 
#                  mvsa_resnet50, mvsa_resnet101, mvsa_resnet152, 
#                  mvsa_densenet121, mvsa_densenet169, mvsa_densenet201]