In [1]:
import os
import re
import gc
import cv2
import h5py
import torch
import string
import random
import numpy as np
import tensorflow as tf

from tqdm import tqdm
from nltk import tokenize
from sklearn import preprocessing

from keras.preprocessing.text import Tokenizer, text_to_word_sequence
from keras.preprocessing.sequence import pad_sequences
from keras.utils.np_utils import to_categorical
from keras.layers import Rescaling, Resizing
from keras import Input
from keras.applications.vgg16 import VGG16
from keras.applications.vgg19 import VGG19
from keras.applications.xception import Xception
from tensorflow.keras.applications import ResNet50, ResNet101, ResNet152
from tensorflow.keras.applications import DenseNet121, DenseNet169, DenseNet201
from transformers import BertTokenizer, BertForMaskedLM, BertModel
from tensorflow.keras import Model

In [2]:
IMAGE_SIZE = (224, 224)
NUM_CHANNELS = 3

In [3]:
mvsa_single_path = '../input/mvsa-data/mvsa-single-4511.hdf5'
mvsa_multiple_path = '../input/mvsa-data/mvsa-multiple-17024.hdf5'

In [4]:
def read_hdf5(path):
    read_file = h5py.File(path, 'r')
    
    feature_names = list(read_file.keys())
    loaded_data = []
    
    for name in feature_names:
        dataset = read_file[name][:]
        if dataset.dtype == np.dtype('object'):
            dataset = np.array([x.decode('UTF-8') for x in dataset])            
        loaded_data.append((name, dataset))

    return loaded_data

In [5]:
def load_mvsa_data(path, multiple=False):
    data = read_hdf5(path)
    for x in data:
        if x[0] == 'texts':
            texts = x[1]
        if x[0] == 'multimodal-labels':
            labels = x[1]
        if x[0] == 'text-labels':
            text_labels = x[1]
        if x[0] == 'image-labels':
            image_labels = x[1]
            
    images_path = os.path.join(os.path.split(path)[0], os.path.split(path)[1].split('.')[0] + '-images.npz')
    images = loadz(images_path)
        
    return texts, images, labels, text_labels, image_labels

def loadz(path):
    data = np.load(path)['arr_0']
    return data

# Load raw data

In [6]:
mvsa_single_texts, mvsa_single_images, \
mvsa_single_multimodal_labels, mvsa_single_text_labels, \
mvsa_single_image_labels = load_mvsa_data(mvsa_single_path)
num_mvsa_single = len(mvsa_single_texts)

mvsa_multiple_texts, mvsa_multiple_images, \
mvsa_multiple_multimodal_labels, mvsa_multiple_text_labels, \
mvsa_multiple_image_labels = load_mvsa_data(mvsa_multiple_path)
num_mvsa_multiple = len(mvsa_multiple_texts)

# DenseNet169

In [7]:
densenet169 = DenseNet169(input_shape=(IMAGE_SIZE[0], IMAGE_SIZE[1], NUM_CHANNELS))
densenet169.trainable = False # Freeze pre-trained layers
densenet169_last = Model(inputs=densenet169.input, outputs=densenet169.layers[-2].output)

2022-07-03 18:39:12.485935: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/densenet/densenet169_weights_tf_dim_ordering_tf_kernels.h5


In [8]:
# Summary of DenseNet121 is hidden
image_inputs = Input(shape=(IMAGE_SIZE[0], IMAGE_SIZE[1], NUM_CHANNELS))
rescale_layer = Rescaling(scale=1./255.) (image_inputs)  # Rescale inputs
outputs = densenet169_last (rescale_layer)
model_densenet169 = Model(inputs=image_inputs, outputs=outputs)

In [9]:
print('MVSA-Single: Extracting DenseNet169 features of images')
mvsa_single_densenet169 = model_densenet169.predict(mvsa_single_images, verbose=1)
print('DenseNet169 last hidden layer dimension:', mvsa_single_densenet169.shape[1])
print('MVSA-Single with DenseNet169 last hidden layer:', mvsa_single_densenet169.shape)

# save and load check
np.savez('./mvsa-single-densenet169', mvsa_single_densenet169)
x = loadz('./mvsa-single-densenet169.npz')
print((x == mvsa_single_densenet169).all())

MVSA-Single: Extracting DenseNet169 features of images


2022-07-03 18:39:20.561942: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


DenseNet169 last hidden layer dimension: 1664
MVSA-Single with DenseNet169 last hidden layer: (4511, 1664)
True


In [10]:
print('MVSA-Multiple: Extracting DenseNet169 features of images')
mvsa_multiple_densenet169 = model_densenet169.predict(mvsa_multiple_images, verbose=1)
print('DenseNet169 last hidden layer dimension:', mvsa_multiple_densenet169.shape[1])
print('MVSA-Multiple with DenseNet169 last hidden layer:', mvsa_multiple_densenet169.shape)

# save and load check
np.savez('./mvsa-multiple-densenet169', mvsa_multiple_densenet169)
x = loadz('./mvsa-multiple-densenet169.npz')
print((x == mvsa_multiple_densenet169).all())

MVSA-Multiple: Extracting DenseNet169 features of images
DenseNet169 last hidden layer dimension: 1664
MVSA-Multiple with DenseNet169 last hidden layer: (17024, 1664)
True
