<a href="https://www.kaggle.com/code/vincemarcs/mvsa-image-models?scriptVersionId=101006479" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [1]:
SEED = 61


import os
import re
import gc
import h5py
import torch
import numpy as np
import pandas as pd
import tensorflow as tf
import random as python_random
import matplotlib.pyplot as plt
import tensorflow_addons as tfa

from tqdm import tqdm
from nltk import tokenize

from sklearn import preprocessing
from sklearn.decomposition import PCA
from IPython.display import display_html
from imblearn.over_sampling import SMOTE
from imblearn.over_sampling import BorderlineSMOTE

from keras import backend as K
from keras import initializers,regularizers,constraints
from keras.preprocessing.text import Tokenizer, text_to_word_sequence
from keras.preprocessing.sequence import pad_sequences
from keras.utils.np_utils import to_categorical
from keras.layers import Reshape, Input, Embedding, Flatten, Dense, Dropout, BatchNormalization, Activation, RepeatVector, Permute
from keras.layers import TimeDistributed, LSTM, GRU, Bidirectional, Convolution1D, MaxPooling1D, MaxPool2D, Convolution2D
from keras.layers.core import RepeatVector, Reshape
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from keras.models import Sequential, Model, load_model
from sklearn.model_selection import cross_val_score

def reset_seeds():
    np.random.seed(SEED) 
    python_random.seed(SEED)
    tf.random.set_seed(SEED)
    os.environ["PYTHONHASHSEED"] = str(SEED)

# from tensorflow.python.keras.layers import Layer, InputSpec, Lambda
# from tensorflow.keras import Model
# from attention import Attention_input1, Attention_input2
# from keras.optimizers import SGD, RMSprop, Adagrad

In [2]:
def read_hdf5(path):
    read_file = h5py.File(path, 'r')

    feature_names = list(read_file.keys())
    loaded_data = []
    
    for name in feature_names:
        dataset = read_file[name][:]
        if dataset.dtype == np.dtype('object'):
            dataset = np.array([x.decode('UTF-8') for x in dataset])            
        loaded_data.append((name, dataset))

    return loaded_data

def loadz(path):
    data = np.load(path)['arr_0']
    return data

In [3]:
def load_labels(path):
    data = read_hdf5(path)

    for x in data:
        if x[0] == 'multimodal-labels':
            labels = x[1]
        if x[0] == 'text-labels':
            text_labels = x[1]
        if x[0] == 'image-labels':
            image_labels = x[1]

    return labels, text_labels, image_labels

def load_mvsa_feature(feature_name, merge=False):
    folder_path = os.path.join('../input/mvsa-features/', feature_name)
    single_file = 'mvsa-single-{}.npz'.format(feature_name)
    multiple_file = 'mvsa-multiple-{}.npz'.format(feature_name)
    mvsa_single = loadz(os.path.join(folder_path, single_file))
    mvsa_multiple = loadz(os.path.join(folder_path, multiple_file))
    
    if merge == True:
        return merge_mvsa(mvsa_single, mvsa_multiple)
    
    return mvsa_single, mvsa_multiple

def load_mvsa_images(merge=False):
    folder_path = '../input/mvsa-data'
    file_paths = os.listdir(folder_path)
    for path in file_paths:
        file_name = os.path.split(path)[1]
        if file_name.split('.')[1] == 'npz':
            if file_name.split('-')[1] == 'single':
                mvsa_single_images_path = os.path.join(folder_path, path)
            else:
                mvsa_multiple_images_path = os.path.join(folder_path, path)
    
    mvsa_single = loadz(mvsa_single_images_path)
    mvsa_multiple = loadz(mvsa_multiple_images_path)
    
    if merge == True:
        return merge_mvsa(mvsa_single, mvsa_multiple)
    
    return mvsa_single, mvsa_multiple

def merge_mvsa(mvsa_single, mvsa_multiple):
    mvsa = np.concatenate((mvsa_single, mvsa_multiple), axis=0)
    return mvsa

In [4]:
def plot_metrics(history):
    fig = plt.figure(figsize=(20, 5))

    fig.add_subplot(1, 4, 1)
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('LOSS')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'val'], loc='best')

    fig.add_subplot(1, 4, 2)
    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title('ACCURACY')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'val'], loc='best')

    fig.add_subplot(1, 4, 3)
    plt.plot(history.history['f1_macro'])
    plt.plot(history.history['val_f1_macro'])
    plt.title('Macro F1-SCORE')
    plt.ylabel('f1-macro')
    plt.xlabel('epoch')
    plt.legend(['train', 'val'], loc='best')
    
    fig.add_subplot(1, 4, 4)
    plt.plot(history.history['f1_weighted'])
    plt.plot(history.history['val_f1_weighted'])
    plt.title('Weighted F1-SCORE')
    plt.ylabel('f1-weighted')
    plt.xlabel('epoch')
    plt.legend(['train', 'val'], loc='best')

    plt.show()

In [5]:
# e.g. validation_split=0.1 -----> 8:1:1 ratio of train, val, test
def split_data(data, validation_split):
    num_val = int(validation_split * len(data))
    data_train = data[:-(num_val*2)]
    data_val = data[-(num_val*2):-(num_val)]
    data_test = data[-num_val:]
    return data_train, data_val, data_test

# e.g. validation_split=0.1 -----> 8:1:1 ratio of train, val, test
def split_tf_data(data, validation_split):
    DATASET_SIZE = len(data)
    train_size = int((1-validation_split*2) * DATASET_SIZE)
    val_size = int(validation_split * DATASET_SIZE)
    test_size = int(validation_split * DATASET_SIZE)

#     full_dataset = tf.data.TFRecordDataset(FLAGS.input_file)
#     full_dataset = full_dataset.shuffle()
    train_dataset = data.take(train_size)
    test_dataset = data.skip(train_size)
    val_dataset = test_dataset.skip(test_size)
    test_dataset = test_dataset.take(test_size)
    return train_dataset, val_dataset, test_dataset

In [6]:
def create_model_pretrained(input_shape):
    f1_macro = tfa.metrics.F1Score(num_classes=NUM_CLASSES, average='macro', name='f1_macro')
    f1_weighted = tfa.metrics.F1Score(num_classes=NUM_CLASSES, average='weighted', name='f1_weighted')

    image_input = Input(shape=input_shape)
    
#     # soft attention
#     attention = Dense(1, activation='tanh') (image_input)
#     attention = Flatten() (attention)
#     attention = Activation('softmax') (attention)
#     attention = RepeatVector(NUM_HIDDEN) (attention)
#     attention = Permute([2, 1]) (attention)
#     attention = Flatten() (attention)
    lstm = Reshape((1, -1)) (image_input)
    lstm = LSTM(NUM_HIDDEN) (lstm)
    
    outputs = Dense(NUM_CLASSES, activation='softmax') (lstm)
    
    model = Model(image_input, outputs)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy', f1_macro, f1_weighted])
    return model

def create_model_cnn(input_shape):
    f1_macro = tfa.metrics.F1Score(num_classes=NUM_CLASSES, average='macro', name='f1_macro')
    f1_weighted = tfa.metrics.F1Score(num_classes=NUM_CLASSES, average='weighted', name='f1_weighted')

    image_inputs = Input(shape=input_shape)
#     x = Reshape((1, input_shape[0], input_shape[1], input_shape[2])) (image_inputs)
    x = Convolution2D(32, kernel_size=(5,5), strides=(1,1), activation='relu') (image_inputs)
#     x = Reshape((x.shape[2], x.shape[3], x.shape[4])) (x)
    x = MaxPool2D(pool_size=(2,2)) (x)
    x = Convolution2D(64, kernel_size=(5,5), strides=(1,1), activation='relu') (x)
    x = MaxPool2D(pool_size=(2,2)) (x)
    x = Convolution2D(128, kernel_size=(5,5), strides=(1,1), activation='relu') (x)
    x = MaxPool2D(pool_size=(2,2)) (x)
    x = Flatten() (x)
    x = Dense(64, activation='relu') (x)
    outputs = Dense(NUM_CLASSES, activation='softmax') (x)

    model = Model(image_inputs, outputs)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy', f1_macro, f1_weighted])
    return model

In [7]:
def evaluate_model(model, X_test, y_test, checkpoint=None, verbose=1):
    if checkpoint is not None:
        model = load_model('./model_checkpoint/{}.h5'.format(checkpoint))
    
    loss, acc, f1_macro, f1_weighted = model.evaluate(X_test, y_test, verbose=verbose)
    
    if verbose == 1:
        print('Loss:', loss)
        print('Accuracy:', acc)
        print('Macro F1-score:', f1_macro)
        print('Weighted F1-score:', f1_weighted)
        
    return loss, acc, f1_macro, f1_weighted

In [8]:
def run_and_evaluate(name, X, y, verbose=0):
    y = le.fit_transform(y)
    y = to_categorical(np.asarray(y))
    
    random_idx = np.random.permutation(len(y))
    X, y = X[random_idx], y[random_idx]

    X_train, X_val, X_test = split_data(X, VALIDATION_SPLIT)
    y_train, y_val, y_test = split_data(y, VALIDATION_SPLIT)
    
    oversample = BorderlineSMOTE(sampling_strategy='all', random_state=SEED, kind='borderline-1')
#     oversample = SMOTE(sampling_strategy='minority', random_state=SEED)
    X_train, y_train = oversample.fit_resample(X_train, y_train)
    
    model = create_model_pretrained(X_train.shape[1:])
    early_stopping = EarlyStopping(monitor='val_loss', min_delta=1e-4, patience=EARLY_STOPPING)
    checkpoint = ModelCheckpoint('./model_checkpoint/{}.h5'.format(name), save_best_only=True, verbose=verbose)
    
    history = model.fit(X_train, y_train, validation_data=(X_val, y_val), 
                                   epochs=EPOCHS, batch_size=BATCH_SIZE, verbose=verbose,
                                   callbacks=[checkpoint, early_stopping])
    if verbose == 1:
        best_epoch = np.argmin(history.history['val_loss'])
        print('Checkpoint loaded at epoch:', best_epoch)
    
    return history, evaluate_model(model, X_test, y_test, checkpoint=name, verbose=verbose)

In [9]:
def run_and_evaluate_cnn(name, X, y, verbose=0):
    y = le.fit_transform(y)
    y = to_categorical(np.asarray(y))
    
    random_idx = np.random.permutation(len(y))
    X, y = X[random_idx], y[random_idx]

    X_train, X_val, X_test = split_data(X, VALIDATION_SPLIT)
    y_train, y_val, y_test = split_data(y, VALIDATION_SPLIT)
    
    oversample = BorderlineSMOTE(sampling_strategy='minority', random_state=SEED, kind='borderline-1')
#     oversample = SMOTE(sampling_strategy='minority', random_state=SEED)
    X_train, y_train = oversample.fit_resample(X_train, y_train)
    
    train_set = tf.data.Dataset.from_tensor_slices((X_train, y_train)).batch(BATCH_SIZE)
    val_set = tf.data.Dataset.from_tensor_slices((X_val, y_val)).batch(BATCH_SIZE)
    test_set = tf.data.Dataset.from_tensor_slices((X_test, y_test)).batch(BATCH_SIZE)
    
    model = create_model_cnn(X_train.shape[1:])
    early_stopping = EarlyStopping(monitor='val_loss', min_delta=1e-4, patience=EARLY_STOPPING)
    checkpoint = ModelCheckpoint('./model_checkpoint/{}.h5'.format(name), save_best_only=True, verbose=verbose)
    
    history = model.fit(train_set, validation_data=val_set, 
                                   epochs=EPOCHS, batch_size=BATCH_SIZE, verbose=verbose,
                                   callbacks=[checkpoint, early_stopping])
    if verbose == 1:
        best_epoch = np.argmin(history.history['val_loss'])
        print('Checkpoint loaded at epoch:', best_epoch)
    
#     return history, evaluate_model(model, X_test, y_test, checkpoint=name, verbose=verbose)

In [10]:
def style_dataframe(dataframe):
    return dataframe.style.highlight_max(subset=['Accuracy', 'F1-macro', 'F1-weighted'], props='color:lawngreen', axis=0)\
                          .highlight_min(subset=['Accuracy', 'F1-macro', 'F1-weighted'], props='color:tomato', axis=0)\
                          .highlight_min(subset=['Loss'], props='color:lawngreen', axis=0)\
                          .highlight_max(subset=['Loss'], props='color:tomato', axis=0)

def display_dataframes(dfs, names=[], index=False):
    def to_df(x):
        if isinstance(x, pd.Series):
            return pd.DataFrame(x)
        else:
            return x
    html_str = ''
    if names:
        html_str += ('<tr>' + 
                     ''.join(f'<td style="text-align:center">{name}</td>' for name in names) + 
                     '</tr>')
    html_str += ('<tr>' + 
                 ''.join(f'<td style="vertical-align:top"> {to_df(df).to_html()}</td>' 
                         for df in dfs) + 
                 '</tr>')
    html_str = f'<table>{html_str}</table>'
    html_str = html_str.replace('table','table style="display:inline"')
    display_html(html_str, raw=True)

# Load data

In [11]:
mvsa_single_multimodal_labels, mvsa_single_text_labels, mvsa_single_image_labels = load_labels('../input/mvsa-features/labels/mvsa-single-labels.hdf5')
mvsa_multiple_multimodal_labels, mvsa_multiple_text_labels, mvsa_multiple_image_labels = load_labels('../input/mvsa-features/labels/mvsa-multiple-labels.hdf5')

mvsa_multimodal_labels = merge_mvsa(mvsa_single_multimodal_labels, mvsa_multiple_multimodal_labels)
mvsa_text_labels = merge_mvsa(mvsa_single_text_labels, mvsa_multiple_text_labels)
mvsa_image_labels = merge_mvsa(mvsa_single_image_labels, mvsa_multiple_image_labels)

le = preprocessing.LabelEncoder()
le.fit(mvsa_multimodal_labels)
NUM_CLASSES = len(le.classes_) # =3

In [12]:
# prepare all features data
feature_names = ['xception', 'vgg16', 'vgg19', 'resnet50', 'resnet101', 'resnet152', 'densenet121', 'densenet169', 'densenet201']

mvsa_single_features = []
mvsa_multiple_features = []
mvsa_features = []

for name in tqdm(feature_names):
    data = load_mvsa_feature(name)
    merge_data = merge_mvsa(data[0], data[1])

    mvsa_single_features.append(data[0])
    mvsa_multiple_features.append(data[1])
    mvsa_features.append(merge_data)

100%|██████████| 9/9 [00:17<00:00,  1.93s/it]


# Run models and Evalution display

In [13]:
reset_seeds()
EPOCHS = 100
BATCH_SIZE = 128
VALIDATION_SPLIT = 0.1
EARLY_STOPPING = 10
NUM_HIDDEN = 128

## With original image labels

In [14]:
print('MVSA-Single: With original image labels')
mvsa_single_histories = []
mvsa_single_scores = []
for i in range(len(feature_names)):        
    print('MVSA-Single:', feature_names[i])
    if feature_names[i] == 'cnn':
        history, score = run_and_evaluate_cnn('single-OL-' + feature_names[i], mvsa_single_features[i], mvsa_single_image_labels, verbose=0)
    else:
        history, score = run_and_evaluate('single-OL-' + feature_names[i], mvsa_single_features[i], mvsa_single_image_labels, verbose=0)
    mvsa_single_histories.append(history)
    mvsa_single_scores.append(score)
    print()
df_single_scores = pd.DataFrame(mvsa_single_scores, columns=['Loss', 'Accuracy', 'F1-macro', 'F1-weighted'], index=feature_names)

print('MVSA-Multiple: With original image labels')
mvsa_multiple_histories = []
mvsa_multiple_scores = []
for i in range(len(feature_names)):
#     print('MVSA-Multiple:', feature_names[i])
    if feature_names[i] == 'cnn':
        history, score = run_and_evaluate_cnn('multiple-OL-' + feature_names[i], mvsa_multiple_features[i], mvsa_multiple_image_labels, verbose=1)
    else:
        history, score = run_and_evaluate('multiple-OL-' + feature_names[i], mvsa_multiple_features[i], mvsa_multiple_image_labels, verbose=0)
    mvsa_multiple_histories.append(history)
    mvsa_multiple_scores.append(score)
    print()
df_multiple_scores = pd.DataFrame(mvsa_multiple_scores, columns=['Loss', 'Accuracy', 'F1-macro', 'F1-weighted'], index=feature_names)

mvsa_average_scores = np.mean([mvsa_single_scores, mvsa_multiple_scores], axis=0)
df_average_scores = pd.DataFrame(mvsa_average_scores, columns=['Loss', 'Accuracy', 'F1-macro', 'F1-weighted'], index=feature_names)

MVSA-Single: With original image labels
MVSA-Single: xception


2022-07-16 19:42:57.452884: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-16 19:42:57.454530: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-16 19:42:57.455602: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-16 19:42:57.456862: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compil


MVSA-Single: vgg16

MVSA-Single: vgg19

MVSA-Single: resnet50

MVSA-Single: resnet101

MVSA-Single: resnet152

MVSA-Single: densenet121

MVSA-Single: densenet169

MVSA-Single: densenet201

MVSA-Multiple: With original image labels



2022-07-16 19:45:26.191011: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 373161984 exceeds 10% of free system memory.
2022-07-16 19:45:26.575347: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 373161984 exceeds 10% of free system memory.





2022-07-16 19:46:08.609605: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 375963648 exceeds 10% of free system memory.
2022-07-16 19:46:08.990032: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 375963648 exceeds 10% of free system memory.











## With multimodal labels

In [15]:
print('MVSA-Single: With multimodal labels')
mvsa_single_histories2 = []
mvsa_single_scores2 = []
for i in range(len(feature_names)):
    print('MVSA-Single:', feature_names[i])
    history, score = run_and_evaluate('single-ML-' + feature_names[i], mvsa_single_features[i], mvsa_single_multimodal_labels, verbose=0)
    mvsa_single_histories2.append(history)
    mvsa_single_scores2.append(score)
    print()
df_single_scores2 = pd.DataFrame(mvsa_single_scores2, columns=['Loss', 'Accuracy', 'F1-macro', 'F1-weighted'], index=feature_names)

print('MVSA-Multiple: With multimodal labels')
mvsa_multiple_histories2 = []
mvsa_multiple_scores2 = []
for i in range(len(feature_names)):
    print('MVSA-Multiple:', feature_names[i])
    history, score = run_and_evaluate('multiple-ML-' + feature_names[i], mvsa_multiple_features[i], mvsa_multiple_multimodal_labels, verbose=0)
    mvsa_multiple_histories2.append(history)
    mvsa_multiple_scores2.append(score)
    print()
df_multiple_scores2 = pd.DataFrame(mvsa_multiple_scores2, columns=['Loss', 'Accuracy', 'F1-macro', 'F1-weighted'], index=feature_names)

mvsa_average_scores2 = np.mean([mvsa_single_scores2, mvsa_multiple_scores2], axis=0)
df_average_scores2 = pd.DataFrame(mvsa_average_scores2, columns=['Loss', 'Accuracy', 'F1-macro', 'F1-weighted'], index=feature_names)

MVSA-Single: With multimodal labels
MVSA-Single: xception

MVSA-Single: vgg16

MVSA-Single: vgg19

MVSA-Single: resnet50

MVSA-Single: resnet101

MVSA-Single: resnet152

MVSA-Single: densenet121

MVSA-Single: densenet169

MVSA-Single: densenet201

MVSA-Multiple: With multimodal labels
MVSA-Multiple: xception

MVSA-Multiple: vgg16


2022-07-16 19:51:22.180750: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 445956096 exceeds 10% of free system memory.



MVSA-Multiple: vgg19

MVSA-Multiple: resnet50

MVSA-Multiple: resnet101

MVSA-Multiple: resnet152

MVSA-Multiple: densenet121

MVSA-Multiple: densenet169

MVSA-Multiple: densenet201



## With merge MVSA data

In [16]:
print('Both MVSA: With original image labels')
mvsa_histories3 = []
mvsa_scores3 = []
for i in range(len(feature_names)):
    print('Both MVSA:', feature_names[i])
    history, score = run_and_evaluate('merge-OL-' + feature_names[i], mvsa_features[i], mvsa_image_labels, verbose=0)
    mvsa_histories3.append(history)
    mvsa_scores3.append(score)
    print()
df_scores3 = pd.DataFrame(mvsa_scores3, columns=['Loss', 'Accuracy', 'F1-macro', 'F1-weighted'], index=feature_names)

print('Both MVSA: With multimodal labels')
mvsa_histories4 = []
mvsa_scores4 = []
for i in range(len(feature_names)):
    print('Both MVSA:', feature_names[i])
    history, score = run_and_evaluate('merge-ML-' + feature_names[i], mvsa_features[i], mvsa_multimodal_labels, verbose=0)
    mvsa_histories4.append(history)
    mvsa_scores4.append(score)
    print()
df_scores4 = pd.DataFrame(mvsa_scores4, columns=['Loss', 'Accuracy', 'F1-macro', 'F1-weighted'], index=feature_names)

Both MVSA: With original image labels
Both MVSA: xception

Both MVSA: vgg16

Both MVSA: vgg19

Both MVSA: resnet50

Both MVSA: resnet101

Both MVSA: resnet152

Both MVSA: densenet121

Both MVSA: densenet169

Both MVSA: densenet201

Both MVSA: With multimodal labels
Both MVSA: xception

Both MVSA: vgg16

Both MVSA: vgg19

Both MVSA: resnet50

Both MVSA: resnet101

Both MVSA: resnet152

Both MVSA: densenet121

Both MVSA: densenet169

Both MVSA: densenet201



# Display results

In [17]:
print('With original image labels\n')
display_dataframes((style_dataframe(df_single_scores), style_dataframe(df_multiple_scores), style_dataframe(df_average_scores)), 
                   names=['MVSA-Single', 'MVSA-Multiple', 'Average'])

With original image labels



Unnamed: 0_level_0,Loss,Accuracy,F1-macro,F1-weighted
Unnamed: 0_level_1,Loss,Accuracy,F1-macro,F1-weighted
Unnamed: 0_level_2,Loss,Accuracy,F1-macro,F1-weighted
xception,0.953520,0.560976,0.531861,0.569564
vgg16,0.916867,0.596452,0.505293,0.580729
vgg19,0.978294,0.529933,0.498268,0.538547
resnet50,1.008507,0.498891,0.332415,0.416406
resnet101,0.982459,0.532151,0.442925,0.511399
resnet152,0.994798,0.538803,0.419391,0.499177
densenet121,0.868874,0.549889,0.513789,0.552958
densenet169,0.857228,0.636364,0.585635,0.638592
densenet201,0.959363,0.569845,0.543733,0.585588
xception,0.858131,0.571680,0.477453,0.575763

Unnamed: 0,Loss,Accuracy,F1-macro,F1-weighted
xception,0.95352,0.560976,0.531861,0.569564
vgg16,0.916867,0.596452,0.505293,0.580729
vgg19,0.978294,0.529933,0.498268,0.538547
resnet50,1.008507,0.498891,0.332415,0.416406
resnet101,0.982459,0.532151,0.442925,0.511399
resnet152,0.994798,0.538803,0.419391,0.499177
densenet121,0.868874,0.549889,0.513789,0.552958
densenet169,0.857228,0.636364,0.585635,0.638592
densenet201,0.959363,0.569845,0.543733,0.585588

Unnamed: 0,Loss,Accuracy,F1-macro,F1-weighted
xception,0.858131,0.57168,0.477453,0.575763
vgg16,0.871745,0.53349,0.437929,0.546178
vgg19,0.898001,0.554054,0.375552,0.499784
resnet50,0.90795,0.556404,0.421271,0.534963
resnet101,0.975784,0.5,0.389056,0.503855
resnet152,0.9246,0.541128,0.379076,0.533158
densenet121,0.890865,0.561692,0.48448,0.568501
densenet169,0.880656,0.545828,0.457345,0.559372
densenet201,0.864251,0.603408,0.477076,0.599595

Unnamed: 0,Loss,Accuracy,F1-macro,F1-weighted
xception,0.905825,0.566328,0.504657,0.572663
vgg16,0.894306,0.564971,0.471611,0.563453
vgg19,0.938147,0.541994,0.43691,0.519165
resnet50,0.958228,0.527648,0.376843,0.475685
resnet101,0.979122,0.516075,0.415991,0.507627
resnet152,0.959699,0.539965,0.399234,0.516167
densenet121,0.87987,0.555791,0.499134,0.56073
densenet169,0.868942,0.591096,0.52149,0.598982
densenet201,0.911807,0.586626,0.510404,0.592591


In [18]:
print('With multimodal labels\n')
display_dataframes((style_dataframe(df_single_scores2), style_dataframe(df_multiple_scores2), style_dataframe(df_average_scores2)), 
                   names=['MVSA-Single', 'MVSA-Multiple', 'Average'])

With multimodal labels



Unnamed: 0_level_0,Loss,Accuracy,F1-macro,F1-weighted
Unnamed: 0_level_1,Loss,Accuracy,F1-macro,F1-weighted
Unnamed: 0_level_2,Loss,Accuracy,F1-macro,F1-weighted
xception,0.898307,0.572062,0.500156,0.573365
vgg16,0.943572,0.538803,0.461145,0.539843
vgg19,0.926491,0.543237,0.45221,0.550175
resnet50,0.953522,0.532151,0.381141,0.5241
resnet101,0.957130,0.505543,0.407557,0.514297
resnet152,0.895749,0.605322,0.30476,0.521664
densenet121,0.911945,0.600887,0.520859,0.595529
densenet169,0.884795,0.576497,0.501874,0.585185
densenet201,0.880227,0.614191,0.514332,0.623527
xception,0.859723,0.572268,0.409153,0.572313

Unnamed: 0,Loss,Accuracy,F1-macro,F1-weighted
xception,0.898307,0.572062,0.500156,0.573365
vgg16,0.943572,0.538803,0.461145,0.539843
vgg19,0.926491,0.543237,0.45221,0.550175
resnet50,0.953522,0.532151,0.381141,0.5241
resnet101,0.95713,0.505543,0.407557,0.514297
resnet152,0.895749,0.605322,0.30476,0.521664
densenet121,0.911945,0.600887,0.520859,0.595529
densenet169,0.884795,0.576497,0.501874,0.585185
densenet201,0.880227,0.614191,0.514332,0.623527

Unnamed: 0,Loss,Accuracy,F1-macro,F1-weighted
xception,0.859723,0.572268,0.409153,0.572313
vgg16,0.969184,0.537603,0.395749,0.549945
vgg19,0.918293,0.515276,0.407444,0.541495
resnet50,0.927926,0.515276,0.39639,0.534212
resnet101,0.94674,0.493537,0.358663,0.509674
resnet152,0.949516,0.584606,0.342747,0.556335
densenet121,0.950572,0.558167,0.403344,0.575242
densenet169,0.922678,0.549941,0.426917,0.565618
densenet201,0.853365,0.576381,0.465418,0.595763

Unnamed: 0,Loss,Accuracy,F1-macro,F1-weighted
xception,0.879015,0.572165,0.454655,0.572839
vgg16,0.956378,0.538203,0.428447,0.544894
vgg19,0.922392,0.529257,0.429827,0.545835
resnet50,0.940724,0.523713,0.388766,0.529156
resnet101,0.951935,0.49954,0.38311,0.511985
resnet152,0.922633,0.594964,0.323753,0.538999
densenet121,0.931259,0.579527,0.462102,0.585386
densenet169,0.903736,0.563219,0.464395,0.575401
densenet201,0.866796,0.595286,0.489875,0.609645


In [19]:
print('With both MVSA merged together\n')
display_dataframes((style_dataframe(df_scores3), style_dataframe(df_scores4)), 
                   names=['Original image labels', 'Multimodal labels'])

With both MVSA merged together



Unnamed: 0_level_0,Loss,Accuracy,F1-macro,F1-weighted
Unnamed: 0_level_1,Loss,Accuracy,F1-macro,F1-weighted
xception,0.936544,0.536925,0.486821,0.549433
vgg16,0.982316,0.503484,0.430298,0.518337
vgg19,0.952107,0.513237,0.438547,0.517521
resnet50,0.977985,0.527171,0.426778,0.528823
resnet101,1.011878,0.462146,0.388083,0.474739
resnet152,0.971296,0.561542,0.310676,0.460967
densenet121,0.889181,0.580121,0.495993,0.580945
densenet169,0.920011,0.548537,0.483797,0.561374
densenet201,0.923955,0.555504,0.45797,0.534378
xception,0.943975,0.546679,0.449757,0.5653

Unnamed: 0,Loss,Accuracy,F1-macro,F1-weighted
xception,0.936544,0.536925,0.486821,0.549433
vgg16,0.982316,0.503484,0.430298,0.518337
vgg19,0.952107,0.513237,0.438547,0.517521
resnet50,0.977985,0.527171,0.426778,0.528823
resnet101,1.011878,0.462146,0.388083,0.474739
resnet152,0.971296,0.561542,0.310676,0.460967
densenet121,0.889181,0.580121,0.495993,0.580945
densenet169,0.920011,0.548537,0.483797,0.561374
densenet201,0.923955,0.555504,0.45797,0.534378

Unnamed: 0,Loss,Accuracy,F1-macro,F1-weighted
xception,0.943975,0.546679,0.449757,0.5653
vgg16,0.955552,0.516953,0.404278,0.525018
vgg19,0.955785,0.519275,0.421734,0.540098
resnet50,1.004247,0.557362,0.399758,0.552644
resnet101,1.028642,0.442638,0.3674,0.470361
resnet152,0.990702,0.495123,0.383205,0.50649
densenet121,0.958433,0.541105,0.433392,0.556408
densenet169,0.891592,0.594519,0.464529,0.593099
densenet201,0.938151,0.535532,0.469568,0.562476


# Dratfs

In [20]:
# # load separate
# mvsa_single_images, mvsa_multiple_images = load_mvsa_images()
# mvsa_single_xception, mvsa_multiple_xception = load_mvsa_feature('xception')
# mvsa_single_vgg16, mvsa_multiple_vgg16 = load_mvsa_feature('vgg16')
# mvsa_single_vgg19, mvsa_multiple_vgg19 = load_mvsa_feature('vgg19')
# mvsa_single_resnet50, mvsa_multiple_resnet50 = load_mvsa_feature('resnet50')
# mvsa_single_resnet101, mvsa_multiple_resnet101 = load_mvsa_feature('resnet101')
# mvsa_single_resnet152, mvsa_multiple_resnet152 = load_mvsa_feature('resnet152')
# mvsa_single_densenet121, mvsa_multiple_densenet121 = load_mvsa_feature('densenet121')
# mvsa_single_densenet169, mvsa_multiple_densenet169 = load_mvsa_feature('densenet169')
# mvsa_single_densenet201, mvsa_multiple_densenet201 = load_mvsa_feature('densenet201')

# # load merge
# mvsa_images = merge_mvsa(mvsa_single_images, mvsa_multiple_images)
# mvsa_xception = merge_mvsa(mvsa_single_xception, mvsa_multiple_xception)
# mvsa_vgg16 = merge_mvsa(mvsa_single_vgg16, mvsa_multiple_vgg16)
# mvsa_vgg19 = merge_mvsa(mvsa_single_vgg19, mvsa_multiple_vgg19)
# mvsa_resnet50 = merge_mvsa(mvsa_single_resnet50, mvsa_multiple_resnet50)
# mvsa_resnet101 = merge_mvsa(mvsa_single_resnet101, mvsa_multiple_resnet101)
# mvsa_resnet152 = merge_mvsa(mvsa_single_resnet152, mvsa_multiple_resnet152)
# mvsa_densenet121 = merge_mvsa(mvsa_single_densenet121, mvsa_multiple_densenet121)
# mvsa_densenet169 = merge_mvsa(mvsa_single_densenet169, mvsa_multiple_densenet169)
# mvsa_densenet201 = merge_mvsa(mvsa_single_densenet201, mvsa_multiple_densenet201)

# # prepare all features data
# feature_names = ['cnn', 'xception', 'vgg16', 'vgg19', 'resnet50', 'resnet101', 'resnet152', 'densenet121', 'densenet169', 'densenet201']

# mvsa_single_features = [mvsa_single_images,
#                         mvsa_single_xception,
#                         mvsa_single_vgg16, mvsa_single_vgg19, 
#                         mvsa_single_resnet50, mvsa_single_resnet101, mvsa_single_resnet152, 
#                         mvsa_single_densenet121, mvsa_single_densenet169, mvsa_single_densenet201]

# mvsa_multiple_features = [mvsa_multiple_images,
#                           mvsa_multiple_xception,
#                           mvsa_multiple_vgg16, mvsa_multiple_vgg19, 
#                           mvsa_multiple_resnet50, mvsa_multiple_resnet101, mvsa_multiple_resnet152, 
#                           mvsa_multiple_densenet121, mvsa_multiple_densenet169, mvsa_multiple_densenet201]

# mvsa_features = [mvsa_images,
#                  mvsa_xception,
#                  mvsa_vgg16, mvsa_vgg19, 
#                  mvsa_resnet50, mvsa_resnet101, mvsa_resnet152, 
#                  mvsa_densenet121, mvsa_densenet169, mvsa_densenet201]