<a href="https://www.kaggle.com/code/vincemarcs/mvsa-image-models?scriptVersionId=101000872" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [1]:
SEED = 61


import os
import re
import gc
import h5py
import torch
import numpy as np
import pandas as pd
import tensorflow as tf
import random as python_random
import matplotlib.pyplot as plt
import tensorflow_addons as tfa

from tqdm import tqdm
from nltk import tokenize

from sklearn import preprocessing
from sklearn.decomposition import PCA
from IPython.display import display_html
from imblearn.over_sampling import SMOTE
from imblearn.over_sampling import BorderlineSMOTE

from keras import backend as K
from keras import initializers,regularizers,constraints
from keras.preprocessing.text import Tokenizer, text_to_word_sequence
from keras.preprocessing.sequence import pad_sequences
from keras.utils.np_utils import to_categorical
from keras.layers import Reshape, Input, Embedding, Flatten, Dense, Dropout, BatchNormalization, Activation #, merge
from keras.layers import TimeDistributed, LSTM, GRU, Bidirectional, Convolution1D, MaxPooling1D, MaxPool2D, Convolution2D
from keras.layers.core import RepeatVector, Reshape
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from keras.models import Sequential, Model, load_model
from sklearn.model_selection import cross_val_score

def reset_seeds():
    np.random.seed(SEED) 
    python_random.seed(SEED)
    tf.random.set_seed(SEED)
    os.environ["PYTHONHASHSEED"] = str(SEED)

# from tensorflow.python.keras.layers import Layer, InputSpec, Lambda
# from tensorflow.keras import Model
# from attention import Attention_input1, Attention_input2
# from keras.optimizers import SGD, RMSprop, Adagrad

In [2]:
def read_hdf5(path):
    read_file = h5py.File(path, 'r')

    feature_names = list(read_file.keys())
    loaded_data = []
    
    for name in feature_names:
        dataset = read_file[name][:]
        if dataset.dtype == np.dtype('object'):
            dataset = np.array([x.decode('UTF-8') for x in dataset])            
        loaded_data.append((name, dataset))

    return loaded_data

def loadz(path):
    data = np.load(path)['arr_0']
    return data

In [3]:
def load_labels(path):
    data = read_hdf5(path)

    for x in data:
        if x[0] == 'multimodal-labels':
            labels = x[1]
        if x[0] == 'text-labels':
            text_labels = x[1]
        if x[0] == 'image-labels':
            image_labels = x[1]

    return labels, text_labels, image_labels

def load_mvsa_feature(feature_name, merge=False):
    folder_path = os.path.join('../input/mvsa-features/', feature_name)
    single_file = 'mvsa-single-{}.npz'.format(feature_name)
    multiple_file = 'mvsa-multiple-{}.npz'.format(feature_name)
    mvsa_single = loadz(os.path.join(folder_path, single_file))
    mvsa_multiple = loadz(os.path.join(folder_path, multiple_file))
    
    if merge == True:
        return merge_mvsa(mvsa_single, mvsa_multiple)
    
    return mvsa_single, mvsa_multiple

def load_mvsa_images(merge=False):
    folder_path = '../input/mvsa-data'
    file_paths = os.listdir(folder_path)
    for path in file_paths:
        file_name = os.path.split(path)[1]
        if file_name.split('.')[1] == 'npz':
            if file_name.split('-')[1] == 'single':
                mvsa_single_images_path = os.path.join(folder_path, path)
            else:
                mvsa_multiple_images_path = os.path.join(folder_path, path)
    
    mvsa_single = loadz(mvsa_single_images_path)
    mvsa_multiple = loadz(mvsa_multiple_images_path)
    
    if merge == True:
        return merge_mvsa(mvsa_single, mvsa_multiple)
    
    return mvsa_single, mvsa_multiple

def merge_mvsa(mvsa_single, mvsa_multiple):
    mvsa = np.concatenate((mvsa_single, mvsa_multiple), axis=0)
    return mvsa

In [4]:
def plot_metrics(history):
    fig = plt.figure(figsize=(20, 5))

    fig.add_subplot(1, 4, 1)
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('LOSS')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'val'], loc='best')

    fig.add_subplot(1, 4, 2)
    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title('ACCURACY')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'val'], loc='best')

    fig.add_subplot(1, 4, 3)
    plt.plot(history.history['f1_macro'])
    plt.plot(history.history['val_f1_macro'])
    plt.title('Macro F1-SCORE')
    plt.ylabel('f1-macro')
    plt.xlabel('epoch')
    plt.legend(['train', 'val'], loc='best')
    
    fig.add_subplot(1, 4, 4)
    plt.plot(history.history['f1_weighted'])
    plt.plot(history.history['val_f1_weighted'])
    plt.title('Weighted F1-SCORE')
    plt.ylabel('f1-weighted')
    plt.xlabel('epoch')
    plt.legend(['train', 'val'], loc='best')

    plt.show()

In [5]:
# e.g. validation_split=0.1 -----> 8:1:1 ratio of train, val, test
def split_data(data, validation_split):
    num_val = int(validation_split * len(data))
    data_train = data[:-(num_val*2)]
    data_val = data[-(num_val*2):-(num_val)]
    data_test = data[-num_val:]
    return data_train, data_val, data_test

# e.g. validation_split=0.1 -----> 8:1:1 ratio of train, val, test
def split_tf_data(data, validation_split):
    DATASET_SIZE = len(data)
    train_size = int((1-validation_split*2) * DATASET_SIZE)
    val_size = int(validation_split * DATASET_SIZE)
    test_size = int(validation_split * DATASET_SIZE)

#     full_dataset = tf.data.TFRecordDataset(FLAGS.input_file)
#     full_dataset = full_dataset.shuffle()
    train_dataset = data.take(train_size)
    test_dataset = data.skip(train_size)
    val_dataset = test_dataset.skip(test_size)
    test_dataset = test_dataset.take(test_size)
    return train_dataset, val_dataset, test_dataset

In [6]:
def create_model_pretrained(input_shape):
    f1_macro = tfa.metrics.F1Score(num_classes=NUM_CLASSES, average='macro', name='f1_macro')
    f1_weighted = tfa.metrics.F1Score(num_classes=NUM_CLASSES, average='weighted', name='f1_weighted')

    image_input = Input(shape=input_shape)
    outputs = Dense(NUM_CLASSES, activation='softmax') (image_input)
    
    model = Model(image_input, outputs)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy', f1_macro, f1_weighted])
    return model

def create_model_cnn(input_shape):
    f1_macro = tfa.metrics.F1Score(num_classes=NUM_CLASSES, average='macro', name='f1_macro')
    f1_weighted = tfa.metrics.F1Score(num_classes=NUM_CLASSES, average='weighted', name='f1_weighted')

    image_inputs = Input(shape=input_shape)
#     x = Reshape((1, input_shape[0], input_shape[1], input_shape[2])) (image_inputs)
    x = Convolution2D(32, kernel_size=(5,5), strides=(1,1), activation='relu') (image_inputs)
#     x = Reshape((x.shape[2], x.shape[3], x.shape[4])) (x)
    x = MaxPool2D(pool_size=(2,2)) (x)
    x = Convolution2D(64, kernel_size=(5,5), strides=(1,1), activation='relu') (x)
    x = MaxPool2D(pool_size=(2,2)) (x)
    x = Convolution2D(128, kernel_size=(5,5), strides=(1,1), activation='relu') (x)
    x = MaxPool2D(pool_size=(2,2)) (x)
    x = Flatten() (x)
    x = Dense(64, activation='relu') (x)
    outputs = Dense(NUM_CLASSES, activation='softmax') (x)

    model = Model(image_inputs, outputs)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy', f1_macro, f1_weighted])
    return model

In [7]:
def evaluate_model(model, X_test, y_test, checkpoint=None, verbose=1):
    if checkpoint is not None:
        model = load_model('./model_checkpoint/{}.h5'.format(checkpoint))
    
    loss, acc, f1_macro, f1_weighted = model.evaluate(X_test, y_test, verbose=verbose)
    
    if verbose == 1:
        print('Loss:', loss)
        print('Accuracy:', acc)
        print('Macro F1-score:', f1_macro)
        print('Weighted F1-score:', f1_weighted)
        
    return loss, acc, f1_macro, f1_weighted

In [8]:
def run_and_evaluate(name, X, y, verbose=0):
    y = le.fit_transform(y)
    y = to_categorical(np.asarray(y))
    
    random_idx = np.random.permutation(len(y))
    X, y = X[random_idx], y[random_idx]

    X_train, X_val, X_test = split_data(X, VALIDATION_SPLIT)
    y_train, y_val, y_test = split_data(y, VALIDATION_SPLIT)
    
    oversample = BorderlineSMOTE(sampling_strategy='minority', random_state=SEED, kind='borderline-2')
#     oversample = SMOTE(sampling_strategy='minority', random_state=SEED)
    X_train, y_train = oversample.fit_resample(X_train, y_train)
    
    model = create_model_pretrained(X_train.shape[1:])
    early_stopping = EarlyStopping(monitor='val_loss', min_delta=1e-4, patience=EARLY_STOPPING)
    checkpoint = ModelCheckpoint('./model_checkpoint/{}.h5'.format(name), save_best_only=True, verbose=verbose)
    
    history = model.fit(X_train, y_train, validation_data=(X_val, y_val), 
                                   epochs=EPOCHS, batch_size=BATCH_SIZE, verbose=verbose,
                                   callbacks=[checkpoint, early_stopping])
    if verbose == 1:
        best_epoch = np.argmin(history.history['val_loss'])
        print('Checkpoint loaded at epoch:', best_epoch)
    
    return history, evaluate_model(model, X_test, y_test, checkpoint=name, verbose=verbose)

In [9]:
def run_and_evaluate_cnn(name, X, y, verbose=0):
    y = le.fit_transform(y)
    y = to_categorical(np.asarray(y))
    
    random_idx = np.random.permutation(len(y))
    X, y = X[random_idx], y[random_idx]

    X_train, X_val, X_test = split_data(X, VALIDATION_SPLIT)
    y_train, y_val, y_test = split_data(y, VALIDATION_SPLIT)
    
    oversample = BorderlineSMOTE(sampling_strategy='minority', random_state=SEED, kind='borderline-1')
#     oversample = SMOTE(sampling_strategy='minority', random_state=SEED)
    X_train, y_train = oversample.fit_resample(X_train, y_train)
    
    train_set = tf.data.Dataset.from_tensor_slices((X_train, y_train)).batch(BATCH_SIZE)
    val_set = tf.data.Dataset.from_tensor_slices((X_val, y_val)).batch(BATCH_SIZE)
    test_set = tf.data.Dataset.from_tensor_slices((X_test, y_test)).batch(BATCH_SIZE)
    
    model = create_model_cnn(X_train.shape[1:])
    early_stopping = EarlyStopping(monitor='val_loss', min_delta=1e-4, patience=EARLY_STOPPING)
    checkpoint = ModelCheckpoint('./model_checkpoint/{}.h5'.format(name), save_best_only=True, verbose=verbose)
    
    history = model.fit(train_set, validation_data=val_set, 
                                   epochs=EPOCHS, batch_size=BATCH_SIZE, verbose=verbose,
                                   callbacks=[checkpoint, early_stopping])
    if verbose == 1:
        best_epoch = np.argmin(history.history['val_loss'])
        print('Checkpoint loaded at epoch:', best_epoch)
    
#     return history, evaluate_model(model, X_test, y_test, checkpoint=name, verbose=verbose)

In [10]:
def style_dataframe(dataframe):
    return dataframe.style.highlight_max(subset=['Accuracy', 'F1-macro', 'F1-weighted'], props='color:lawngreen', axis=0)\
                          .highlight_min(subset=['Accuracy', 'F1-macro', 'F1-weighted'], props='color:tomato', axis=0)\
                          .highlight_min(subset=['Loss'], props='color:lawngreen', axis=0)\
                          .highlight_max(subset=['Loss'], props='color:tomato', axis=0)

def display_dataframes(dfs, names=[], index=False):
    def to_df(x):
        if isinstance(x, pd.Series):
            return pd.DataFrame(x)
        else:
            return x
    html_str = ''
    if names:
        html_str += ('<tr>' + 
                     ''.join(f'<td style="text-align:center">{name}</td>' for name in names) + 
                     '</tr>')
    html_str += ('<tr>' + 
                 ''.join(f'<td style="vertical-align:top"> {to_df(df).to_html()}</td>' 
                         for df in dfs) + 
                 '</tr>')
    html_str = f'<table>{html_str}</table>'
    html_str = html_str.replace('table','table style="display:inline"')
    display_html(html_str, raw=True)

# Load data

In [11]:
mvsa_single_multimodal_labels, mvsa_single_text_labels, mvsa_single_image_labels = load_labels('../input/mvsa-features/labels/mvsa-single-labels.hdf5')
mvsa_multiple_multimodal_labels, mvsa_multiple_text_labels, mvsa_multiple_image_labels = load_labels('../input/mvsa-features/labels/mvsa-multiple-labels.hdf5')

mvsa_multimodal_labels = merge_mvsa(mvsa_single_multimodal_labels, mvsa_multiple_multimodal_labels)
mvsa_text_labels = merge_mvsa(mvsa_single_text_labels, mvsa_multiple_text_labels)
mvsa_image_labels = merge_mvsa(mvsa_single_image_labels, mvsa_multiple_image_labels)

le = preprocessing.LabelEncoder()
le.fit(mvsa_multimodal_labels)
NUM_CLASSES = len(le.classes_) # =3

In [12]:
# prepare all features data
feature_names = ['xception', 'vgg16', 'vgg19', 'resnet50', 'resnet101', 'resnet152', 'densenet121', 'densenet169', 'densenet201']

mvsa_single_features = []
mvsa_multiple_features = []
mvsa_features = []

for name in tqdm(feature_names):
    data = load_mvsa_feature(name)
    merge_data = merge_mvsa(data[0], data[1])

    mvsa_single_features.append(data[0])
    mvsa_multiple_features.append(data[1])
    mvsa_features.append(merge_data)

100%|██████████| 9/9 [00:25<00:00,  2.87s/it]


# Run models and Evalution display

In [13]:
reset_seeds()
EPOCHS = 100
BATCH_SIZE = 128
VALIDATION_SPLIT = 0.1
EARLY_STOPPING = 10

## With original image labels

In [14]:
print('MVSA-Single: With original image labels')
mvsa_single_histories = []
mvsa_single_scores = []
for i in range(len(feature_names)):        
    print('MVSA-Single:', feature_names[i])
    if feature_names[i] == 'cnn':
        history, score = run_and_evaluate_cnn('single-OL-' + feature_names[i], mvsa_single_features[i], mvsa_single_image_labels, verbose=0)
    else:
        history, score = run_and_evaluate('single-OL-' + feature_names[i], mvsa_single_features[i], mvsa_single_image_labels, verbose=0)
    mvsa_single_histories.append(history)
    mvsa_single_scores.append(score)
    print()
df_single_scores = pd.DataFrame(mvsa_single_scores, columns=['Loss', 'Accuracy', 'F1-macro', 'F1-weighted'], index=feature_names)

print('MVSA-Multiple: With original image labels')
mvsa_multiple_histories = []
mvsa_multiple_scores = []
for i in range(len(feature_names)):
#     print('MVSA-Multiple:', feature_names[i])
    if feature_names[i] == 'cnn':
        history, score = run_and_evaluate_cnn('multiple-OL-' + feature_names[i], mvsa_multiple_features[i], mvsa_multiple_image_labels, verbose=1)
    else:
        history, score = run_and_evaluate('multiple-OL-' + feature_names[i], mvsa_multiple_features[i], mvsa_multiple_image_labels, verbose=0)
    mvsa_multiple_histories.append(history)
    mvsa_multiple_scores.append(score)
    print()
df_multiple_scores = pd.DataFrame(mvsa_multiple_scores, columns=['Loss', 'Accuracy', 'F1-macro', 'F1-weighted'], index=feature_names)

mvsa_average_scores = np.mean([mvsa_single_scores, mvsa_multiple_scores], axis=0)
df_average_scores = pd.DataFrame(mvsa_average_scores, columns=['Loss', 'Accuracy', 'F1-macro', 'F1-weighted'], index=feature_names)

MVSA-Single: With original image labels
MVSA-Single: xception


2022-07-16 17:50:47.672129: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-16 17:50:47.673311: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-16 17:50:47.673987: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-16 17:50:47.674848: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compil


MVSA-Single: vgg16

MVSA-Single: vgg19

MVSA-Single: resnet50

MVSA-Single: resnet101

MVSA-Single: resnet152

MVSA-Single: densenet121

MVSA-Single: densenet169

MVSA-Single: densenet201

MVSA-Multiple: With original image labels



2022-07-16 17:52:00.292788: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 333414400 exceeds 10% of free system memory.
2022-07-16 17:52:00.636674: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 333414400 exceeds 10% of free system memory.





2022-07-16 17:52:15.503587: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 334757888 exceeds 10% of free system memory.
2022-07-16 17:52:15.847135: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 334757888 exceeds 10% of free system memory.











## With multimodal labels

In [15]:
print('MVSA-Single: With multimodal labels')
mvsa_single_histories2 = []
mvsa_single_scores2 = []
for i in range(len(feature_names)):
    print('MVSA-Single:', feature_names[i])
    history, score = run_and_evaluate('single-ML-' + feature_names[i], mvsa_single_features[i], mvsa_single_multimodal_labels, verbose=0)
    mvsa_single_histories2.append(history)
    mvsa_single_scores2.append(score)
    print()
df_single_scores2 = pd.DataFrame(mvsa_single_scores2, columns=['Loss', 'Accuracy', 'F1-macro', 'F1-weighted'], index=feature_names)

print('MVSA-Multiple: With multimodal labels')
mvsa_multiple_histories2 = []
mvsa_multiple_scores2 = []
for i in range(len(feature_names)):
    print('MVSA-Multiple:', feature_names[i])
    history, score = run_and_evaluate('multiple-ML-' + feature_names[i], mvsa_multiple_features[i], mvsa_multiple_multimodal_labels, verbose=0)
    mvsa_multiple_histories2.append(history)
    mvsa_multiple_scores2.append(score)
    print()
df_multiple_scores2 = pd.DataFrame(mvsa_multiple_scores2, columns=['Loss', 'Accuracy', 'F1-macro', 'F1-weighted'], index=feature_names)

mvsa_average_scores2 = np.mean([mvsa_single_scores2, mvsa_multiple_scores2], axis=0)
df_average_scores2 = pd.DataFrame(mvsa_average_scores2, columns=['Loss', 'Accuracy', 'F1-macro', 'F1-weighted'], index=feature_names)

MVSA-Single: With multimodal labels
MVSA-Single: xception

MVSA-Single: vgg16

MVSA-Single: vgg19

MVSA-Single: resnet50

MVSA-Single: resnet101

MVSA-Single: resnet152

MVSA-Single: densenet121

MVSA-Single: densenet169

MVSA-Single: densenet201

MVSA-Multiple: With multimodal labels
MVSA-Multiple: xception

MVSA-Multiple: vgg16


2022-07-16 17:54:59.653131: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 354746368 exceeds 10% of free system memory.



MVSA-Multiple: vgg19

MVSA-Multiple: resnet50

MVSA-Multiple: resnet101

MVSA-Multiple: resnet152

MVSA-Multiple: densenet121

MVSA-Multiple: densenet169

MVSA-Multiple: densenet201



## With merge MVSA data

In [16]:
print('Both MVSA: With original image labels')
mvsa_histories3 = []
mvsa_scores3 = []
for i in range(len(feature_names)):
    print('Both MVSA:', feature_names[i])
    history, score = run_and_evaluate('merge-OL-' + feature_names[i], mvsa_features[i], mvsa_image_labels, verbose=0)
    mvsa_histories3.append(history)
    mvsa_scores3.append(score)
    print()
df_scores3 = pd.DataFrame(mvsa_scores3, columns=['Loss', 'Accuracy', 'F1-macro', 'F1-weighted'], index=feature_names)

print('Both MVSA: With multimodal labels')
mvsa_histories4 = []
mvsa_scores4 = []
for i in range(len(feature_names)):
    print('Both MVSA:', feature_names[i])
    history, score = run_and_evaluate('merge-ML-' + feature_names[i], mvsa_features[i], mvsa_multimodal_labels, verbose=0)
    mvsa_histories4.append(history)
    mvsa_scores4.append(score)
    print()
df_scores4 = pd.DataFrame(mvsa_scores4, columns=['Loss', 'Accuracy', 'F1-macro', 'F1-weighted'], index=feature_names)

Both MVSA: With original image labels
Both MVSA: xception

Both MVSA: vgg16

Both MVSA: vgg19

Both MVSA: resnet50

Both MVSA: resnet101

Both MVSA: resnet152

Both MVSA: densenet121

Both MVSA: densenet169

Both MVSA: densenet201

Both MVSA: With multimodal labels
Both MVSA: xception

Both MVSA: vgg16

Both MVSA: vgg19

Both MVSA: resnet50

Both MVSA: resnet101

Both MVSA: resnet152

Both MVSA: densenet121

Both MVSA: densenet169

Both MVSA: densenet201



# Display results

In [17]:
print('With original image labels\n')
display_dataframes((style_dataframe(df_single_scores), style_dataframe(df_multiple_scores), style_dataframe(df_average_scores)), 
                   names=['MVSA-Single', 'MVSA-Multiple', 'Average'])

With original image labels



Unnamed: 0_level_0,Loss,Accuracy,F1-macro,F1-weighted
Unnamed: 0_level_1,Loss,Accuracy,F1-macro,F1-weighted
Unnamed: 0_level_2,Loss,Accuracy,F1-macro,F1-weighted
xception,0.887988,0.607539,0.546979,0.601659
vgg16,0.903607,0.587583,0.491755,0.570818
vgg19,0.916898,0.558758,0.475969,0.539335
resnet50,1.018781,0.503326,0.353013,0.429299
resnet101,1.001084,0.541020,0.34843,0.454531
resnet152,1.013095,0.518847,0.363695,0.452422
densenet121,0.903301,0.572062,0.495933,0.552022
densenet169,0.859692,0.600887,0.524414,0.595031
densenet201,0.905453,0.596452,0.536464,0.595234
xception,0.914043,0.547591,0.452507,0.556152

Unnamed: 0,Loss,Accuracy,F1-macro,F1-weighted
xception,0.887988,0.607539,0.546979,0.601659
vgg16,0.903607,0.587583,0.491755,0.570818
vgg19,0.916898,0.558758,0.475969,0.539335
resnet50,1.018781,0.503326,0.353013,0.429299
resnet101,1.001084,0.54102,0.34843,0.454531
resnet152,1.013095,0.518847,0.363695,0.452422
densenet121,0.903301,0.572062,0.495933,0.552022
densenet169,0.859692,0.600887,0.524414,0.595031
densenet201,0.905453,0.596452,0.536464,0.595234

Unnamed: 0,Loss,Accuracy,F1-macro,F1-weighted
xception,0.914043,0.547591,0.452507,0.556152
vgg16,0.840471,0.605758,0.46606,0.59085
vgg19,0.93307,0.509988,0.412105,0.520945
resnet50,1.038456,0.482374,0.346481,0.46645
resnet101,1.02999,0.503525,0.332154,0.438396
resnet152,0.990647,0.516451,0.320346,0.452676
densenet121,0.970941,0.522914,0.424116,0.517904
densenet169,0.938201,0.525264,0.43756,0.541825
densenet201,0.94963,0.540541,0.426351,0.53827

Unnamed: 0,Loss,Accuracy,F1-macro,F1-weighted
xception,0.901015,0.577565,0.499743,0.578905
vgg16,0.872039,0.596671,0.478908,0.580834
vgg19,0.924984,0.534373,0.444037,0.53014
resnet50,1.028618,0.49285,0.349747,0.447874
resnet101,1.015537,0.522273,0.340292,0.446464
resnet152,1.001871,0.517649,0.34202,0.452549
densenet121,0.937121,0.547488,0.460025,0.534963
densenet169,0.898946,0.563076,0.480987,0.568428
densenet201,0.927542,0.568496,0.481407,0.566752


In [18]:
print('With multimodal labels\n')
display_dataframes((style_dataframe(df_single_scores2), style_dataframe(df_multiple_scores2), style_dataframe(df_average_scores2)), 
                   names=['MVSA-Single', 'MVSA-Multiple', 'Average'])

With multimodal labels



Unnamed: 0_level_0,Loss,Accuracy,F1-macro,F1-weighted
Unnamed: 0_level_1,Loss,Accuracy,F1-macro,F1-weighted
Unnamed: 0_level_2,Loss,Accuracy,F1-macro,F1-weighted
xception,0.909944,0.569845,0.509404,0.577527
vgg16,0.996586,0.467849,0.425536,0.479079
vgg19,0.885152,0.594235,0.47339,0.582586
resnet50,1.007847,0.549889,0.321148,0.457789
resnet101,1.052743,0.501109,0.301205,0.431831
resnet152,0.975433,0.587583,0.310481,0.493008
densenet121,1.015507,0.525499,0.424076,0.523508
densenet169,0.907787,0.598670,0.489359,0.591734
densenet201,0.956529,0.572062,0.473297,0.59172
xception,0.919657,0.584606,0.381185,0.559949

Unnamed: 0,Loss,Accuracy,F1-macro,F1-weighted
xception,0.909944,0.569845,0.509404,0.577527
vgg16,0.996586,0.467849,0.425536,0.479079
vgg19,0.885152,0.594235,0.47339,0.582586
resnet50,1.007847,0.549889,0.321148,0.457789
resnet101,1.052743,0.501109,0.301205,0.431831
resnet152,0.975433,0.587583,0.310481,0.493008
densenet121,1.015507,0.525499,0.424076,0.523508
densenet169,0.907787,0.59867,0.489359,0.591734
densenet201,0.956529,0.572062,0.473297,0.59172

Unnamed: 0,Loss,Accuracy,F1-macro,F1-weighted
xception,0.919657,0.584606,0.381185,0.559949
vgg16,0.86647,0.632785,0.387727,0.57838
vgg19,0.852905,0.612221,0.407279,0.592017
resnet50,1.024351,0.508813,0.318721,0.503505
resnet101,0.966605,0.648061,0.305231,0.542404
resnet152,1.043892,0.509988,0.305257,0.504502
densenet121,0.955472,0.567568,0.401657,0.573957
densenet169,0.933714,0.544066,0.406356,0.55392
densenet201,0.901157,0.582844,0.397892,0.576672

Unnamed: 0,Loss,Accuracy,F1-macro,F1-weighted
xception,0.914801,0.577226,0.445294,0.568738
vgg16,0.931528,0.550317,0.406631,0.528729
vgg19,0.869029,0.603228,0.440335,0.587302
resnet50,1.016099,0.529351,0.319934,0.480647
resnet101,1.009674,0.574585,0.303218,0.487118
resnet152,1.009663,0.548786,0.307869,0.498755
densenet121,0.985489,0.546533,0.412866,0.548732
densenet169,0.92075,0.571368,0.447858,0.572827
densenet201,0.928843,0.577453,0.435594,0.584196


In [19]:
print('With both MVSA merged together\n')
display_dataframes((style_dataframe(df_scores3), style_dataframe(df_scores4)), 
                   names=['Original image labels', 'Multimodal labels'])

With both MVSA merged together



Unnamed: 0_level_0,Loss,Accuracy,F1-macro,F1-weighted
Unnamed: 0_level_1,Loss,Accuracy,F1-macro,F1-weighted
xception,0.961857,0.533674,0.455483,0.534095
vgg16,0.967555,0.533674,0.442044,0.538002
vgg19,0.942434,0.550395,0.430834,0.534109
resnet50,1.007776,0.522991,0.340862,0.457771
resnet101,1.027039,0.492801,0.350375,0.457364
resnet152,1.023788,0.508128,0.350771,0.470219
densenet121,0.932301,0.544357,0.46611,0.54997
densenet169,0.996813,0.513237,0.435398,0.523603
densenet201,0.923965,0.559684,0.500194,0.566983
xception,0.937073,0.574083,0.423385,0.565906

Unnamed: 0,Loss,Accuracy,F1-macro,F1-weighted
xception,0.961857,0.533674,0.455483,0.534095
vgg16,0.967555,0.533674,0.442044,0.538002
vgg19,0.942434,0.550395,0.430834,0.534109
resnet50,1.007776,0.522991,0.340862,0.457771
resnet101,1.027039,0.492801,0.350375,0.457364
resnet152,1.023788,0.508128,0.350771,0.470219
densenet121,0.932301,0.544357,0.46611,0.54997
densenet169,0.996813,0.513237,0.435398,0.523603
densenet201,0.923965,0.559684,0.500194,0.566983

Unnamed: 0,Loss,Accuracy,F1-macro,F1-weighted
xception,0.937073,0.574083,0.423385,0.565906
vgg16,0.946262,0.583836,0.383249,0.53988
vgg19,0.943145,0.576405,0.393763,0.554823
resnet50,0.981454,0.579192,0.356707,0.53746
resnet101,0.98117,0.58523,0.350985,0.538101
resnet152,0.97403,0.61124,0.331921,0.52602
densenet121,0.952404,0.582443,0.392807,0.548511
densenet169,0.92457,0.584765,0.408633,0.558617
densenet201,0.910621,0.582908,0.440174,0.583103


# Dratfs

In [20]:
# # load separate
# mvsa_single_images, mvsa_multiple_images = load_mvsa_images()
# mvsa_single_xception, mvsa_multiple_xception = load_mvsa_feature('xception')
# mvsa_single_vgg16, mvsa_multiple_vgg16 = load_mvsa_feature('vgg16')
# mvsa_single_vgg19, mvsa_multiple_vgg19 = load_mvsa_feature('vgg19')
# mvsa_single_resnet50, mvsa_multiple_resnet50 = load_mvsa_feature('resnet50')
# mvsa_single_resnet101, mvsa_multiple_resnet101 = load_mvsa_feature('resnet101')
# mvsa_single_resnet152, mvsa_multiple_resnet152 = load_mvsa_feature('resnet152')
# mvsa_single_densenet121, mvsa_multiple_densenet121 = load_mvsa_feature('densenet121')
# mvsa_single_densenet169, mvsa_multiple_densenet169 = load_mvsa_feature('densenet169')
# mvsa_single_densenet201, mvsa_multiple_densenet201 = load_mvsa_feature('densenet201')

# # load merge
# mvsa_images = merge_mvsa(mvsa_single_images, mvsa_multiple_images)
# mvsa_xception = merge_mvsa(mvsa_single_xception, mvsa_multiple_xception)
# mvsa_vgg16 = merge_mvsa(mvsa_single_vgg16, mvsa_multiple_vgg16)
# mvsa_vgg19 = merge_mvsa(mvsa_single_vgg19, mvsa_multiple_vgg19)
# mvsa_resnet50 = merge_mvsa(mvsa_single_resnet50, mvsa_multiple_resnet50)
# mvsa_resnet101 = merge_mvsa(mvsa_single_resnet101, mvsa_multiple_resnet101)
# mvsa_resnet152 = merge_mvsa(mvsa_single_resnet152, mvsa_multiple_resnet152)
# mvsa_densenet121 = merge_mvsa(mvsa_single_densenet121, mvsa_multiple_densenet121)
# mvsa_densenet169 = merge_mvsa(mvsa_single_densenet169, mvsa_multiple_densenet169)
# mvsa_densenet201 = merge_mvsa(mvsa_single_densenet201, mvsa_multiple_densenet201)

# # prepare all features data
# feature_names = ['cnn', 'xception', 'vgg16', 'vgg19', 'resnet50', 'resnet101', 'resnet152', 'densenet121', 'densenet169', 'densenet201']

# mvsa_single_features = [mvsa_single_images,
#                         mvsa_single_xception,
#                         mvsa_single_vgg16, mvsa_single_vgg19, 
#                         mvsa_single_resnet50, mvsa_single_resnet101, mvsa_single_resnet152, 
#                         mvsa_single_densenet121, mvsa_single_densenet169, mvsa_single_densenet201]

# mvsa_multiple_features = [mvsa_multiple_images,
#                           mvsa_multiple_xception,
#                           mvsa_multiple_vgg16, mvsa_multiple_vgg19, 
#                           mvsa_multiple_resnet50, mvsa_multiple_resnet101, mvsa_multiple_resnet152, 
#                           mvsa_multiple_densenet121, mvsa_multiple_densenet169, mvsa_multiple_densenet201]

# mvsa_features = [mvsa_images,
#                  mvsa_xception,
#                  mvsa_vgg16, mvsa_vgg19, 
#                  mvsa_resnet50, mvsa_resnet101, mvsa_resnet152, 
#                  mvsa_densenet121, mvsa_densenet169, mvsa_densenet201]