In [1]:
# !pip install --upgrade tensorflow-gpu

In [2]:
import os
import sys
import re
import imageio
import unicodedata
import ast
import itertools

import pandas as pd
import numpy as np
import tensorflow as tf

from PIL import UnidentifiedImageError
from xml.dom import minidom
from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt
plt.style.use('dark_background')

In [3]:
# Load from dynamic kernels code
sys.path.insert(0, '/project/dynamic-kernels')

from src.layers import CADenseAdd, CADenseMul
from src.optimizers import SVDAdam, SVDSGD
# from src.models import CAEncoderLayer, PositionalEncodingLayer, EncoderLayer
from src.models.utils import wrap_model
# from src.callbacks import ReduceLROPlateau

### Data

Context tags data

In [4]:
# Amount of tag chunks
n_chunks = 83
# Get tags from chunks
tags = pd.concat([
    pd.read_csv(f'tags/chunk_{chunk}', index_col=0).rename({'link': 'tags'}, axis=1) 
    for chunk in range(n_chunks)
])
# set index of tags to index of urls
tags.index = tags.index.astype(str)
# Groupby index and join tags
tags = tags.groupby(level=0).tags.apply(list)
# Evaluate str to list
tags = tags.map(lambda l: ast.literal_eval(l[0]))
# Show examples
tags.head(2)

10005385    [arizona, desert, cholla, cactus, teddybear, b...
10005386    [arizona, desert, sonoran, clouds, mountain, s...
Name: tags, dtype: object

Original Flickr urls

In [5]:
# Get urls for indices
urls = pd.read_csv('data/gps_urls.txt', header=None, delimiter=' ', names=['link'])
# # Groupby index and join links
urls = urls.groupby(level=0).link.apply(list)
# Show examples
urls.head(1)

43398    [https://www.flickr.com/photos/george/43398/]
Name: link, dtype: object

GPS coordinates

In [6]:
# Read coordinates
coordinates = pd.read_csv('data/photo2gps.txt', header=None, delimiter=' ', names=['longitude', 'latitude'], index_col=0)
# Split index
coordinates.index = coordinates.index.map(lambda s: tuple(s.split('/')))
# Create class label
coordinates['class'] = coordinates.index.get_level_values(0)
# Set index
coordinates.index = coordinates.index.get_level_values(1).map(lambda i: i[:-4])
# Show examples
coordinates.head(2)

Unnamed: 0,longitude,latitude,class
1001224523,31.349944,-105.92899,airplane
10030517043,43.123688,-77.623395,airplane


Class labels

In [7]:
# Group classes by image
labels = coordinates.groupby(level=0)['class'].apply(list)
# Get samples with only one label
labels = labels[labels.apply(len) == 1]
# Show example
labels.head(2)

10000481333    [bikini]
10005385       [cactus]
Name: class, dtype: object

Class selection

In [8]:
# Classes data directory
data_directory = 'data/classes'
# Select classes
animal_classes = [
    'ant', 
    'cow',
    'deer',
    'dolphin',  
    'horse', 
    'jellyfish',
    'lizard', 
    'lobster', 
    'sheep', 
    'whale'
]
transport_classes = [
    'airplane',
    'boat', 
    'canoe', 
    'chevrolet', 
    'corvette', 
    'ferrari', 
    'ford',
    'helicopter',
    'honda', 
    'jeep', 
    'locomotive', 
    'metro', 
    'mustang', 
    'ship', 
    'taxi', 
    'toyota', 
    'tractor',
    'trailer', 
    'volkswagen', 
    'yacht'
]
sport_classes = [
    'baseball', 
    'climbing',
    'fishing', 
    'golf', 
    'hockey', 
    'ski', 
    'surfing'
]
structure_classes = [
    'alcatraz',
    'bridge', 
    'casino', 
    'castle', 
    'cemetery',
    'greenhouse', 
    'monument', 
    'patio', 
    'pier', 
    'ranch', 
    'skyscraper', 
    'stadium', 
    'temple', 
    'tent', 
    'tower',
    'tunnel'
]
nature_classes = [
    'cactus', 
    'cave', 
    'cliff', 
    'falls', 
    'foliage', 
    'forest', 
    'lake',
    'lightning', 
    'meadow', 
    'mountain', 
    'river', 
    'rocks', 
    'sea', 
    'snow',
    'valley',
]
classes_select = [
#     'alley', 
#     'aquarium', 
#     'asian',
#     'autumn',
#     'band',
#     'beach', 
#     'bikini', 
#     'blond', 
#     'brick', 
#     'buildings',
#     'carnival',
#     'cattle' 
#     'cigar', 
#     'city',
#     'coast', 
#     'disneyland', 
#     'fireworks', 
#     'fog', 
#     'fountain', 
#     'graffiti', 
#     'highway',
#     'ipod', 
#     'pot', 
#     'museum',
#     'nail',
#     'ocean', 
#     'railroad', 
#     'scale', 
#     'sculpture',
#     'shore', 
#     'storm', 
#     'wave', 
]
# classes_select = animal_classes
# classes_select = structure_classes
# classes_select = sport_classes
# classes_select = nature_classes
classes_select = animal_classes + sport_classes + structure_classes + nature_classes

In [9]:
n_classes = len(classes_select)

In [10]:
# Read images from directories
photos, labels_ = zip(*[
    (image, class_directory) for class_directory in classes_select
    for image in os.listdir(os.path.join(data_directory, class_directory)) if image.endswith('jpg')
])
# Show example & amount of images
photos[0], len(photos)

('1031487825.jpg', 43159)

In [11]:
# Photo ids with no suffix
photos_no_suffix = [p[:-4] for p in photos]
# Select photos for which labels are available
photos_no_suffix = list(set(labels.index) & set(photos_no_suffix))
# Select photos that have tags
photos_no_suffix = list(set(tags.index) & set(photos_no_suffix))
# Photo ids with sufix
photos = [p + '.jpg' for p in photos_no_suffix]
len(photos)

15157

In [12]:
# Select tags and photos
tags = tags.loc[photos_no_suffix]
labels = labels.loc[photos_no_suffix]

In [13]:
def remove_tags(l, selection):
    return [x for x in l if x not in selection]

In [14]:
# Remove tags that contain class label
tags = tags.map(lambda t: remove_tags(t, classes_select))
# Show example
tags.head(2)

226061308                                             [water]
1501113307    [underground, boat, pillar, cavern, bonneterre]
Name: tags, dtype: object

### Load word embeddings for context

In [15]:
# Dimension of glove to be used
glove_dim = 300
# Path to glove embeddings
path_to_glove_file = f"../glove/glove.6B.{glove_dim}d.txt"
# Make dictionary with glove embeddings
embeddings_index = {}
# Fill dictionary
with open(path_to_glove_file) as f:
    for line in f:
        word, coefs = line.split(maxsplit=1)
        coefs = np.fromstring(coefs, "f", sep=" ")
        embeddings_index[word] = coefs

print("Found %s word vectors." % len(embeddings_index))

Found 400000 word vectors.


### Build context embeddings from context titles

Wanna construct context embeddings using SIF with word embeddings.

$v_s = \frac{1}{|s|} \sum_{{v_w} \in \mathcal{S}} \frac{a}{a + p(w)} v_w$

With $a$ a parameter and $p(w)$ the estimated word frequency in the corpus

In [16]:
# Sif formula for a sentence as list of words
def SIF(s, a, p):
    return np.mean([embeddings_index.get(w, np.zeros(glove_dim, dtype=np.float32)) * a / (a + p[w]) for w in s], axis=0)

# Normalization function for words
def normalize(input_str):
    nfkd_form = unicodedata.normalize('NFKD', input_str)
    only_ascii = nfkd_form.encode('ASCII', 'ignore')
    return re.sub(r'[0-9]', '', only_ascii.lower().decode('utf-8'))

def preprocess_title(title):
    return [normalize(w) for tag in title for word in re.split(r'[^\w]', tag) for w in word.split('_')]

def preprocess(image):
    image = tf.keras.applications.mobilenet.preprocess_input(image)
    image = tf.image.resize(image, (resize_shape[0], resize_shape[1]))
    return image

In [17]:
# Get word count from context titles
words = [word for words in tags for word in words]
# get unique words and counts
uniques, counts = np.unique(words, return_counts=True)
# frequencies dictionary
frequencies = dict(zip(uniques, counts/counts.sum()))

In [18]:
# Make datframe with classes and tags
df = pd.concat([tags, labels.map(lambda l: l[0])], axis=1)
# Remove no context words rows
df = df.loc[df.tags.map(len) != 0]
# Show example
df.head(20)

Unnamed: 0,tags,class
226061308,[water],tower
1501113307,"[underground, boat, pillar, cavern, bonneterre]",lake
448422155,"[vermont, covered, brattleboro, windham, dumme...",bridge
341870914,"[chicago, aquarium, illinois, shedd]",jellyfish
2109355749,"[nyc, usa, ny, newyork, reflection, building, ...",skyscraper
194955368,"[signs, sign, oregon, no, fairview, passiveagr...",rocks
401650530,"[sunset, water, point, vanishingpoint, vanishi...",bridge
111096353,"[austin, flag, universityoftexas]",tower
861115696,"[usa, sandwich, collegepark]",deer
413093354,"[windmill, texas, canvas, oil, land, top20texas]",ranch


### Build dataset from generator

### Load images from folders

In [19]:
def get_image_from_id(item, image_id):
    path = f'data/classes/{item}/{image_id}.jpg'
    # return imageio.imread(path)
    return tf.keras.utils.load_img(path)

In [20]:
def build_generator(df):
    def generator():
        for PHOTO_ID, row in df.iterrows():
            context_tags = row['tags']
            item = row['class']
            try:
                x = get_image_from_id(item, PHOTO_ID)
            except UnidentifiedImageError:
                continue
            x = tf.keras.preprocessing.image.img_to_array(x)
            x = preprocess(x).numpy()
            c = SIF(context_tags, a, frequencies)
            y = (np.array(classes_select) == item).astype(np.int32)
            yield (x, c), y
    return generator

In [21]:
# train, val and test split
def split_df(df):
    train_idx = []
    val_idx = []
    test_idx = []
    for g in df.groupby('class')['class']:
        idx = g[1].sample(frac=1, random_state=42).index
        train_idx.extend(idx[:int(0.8*idx.size)].to_list())
        val_idx.extend(idx[int(0.8*idx.size) : int((0.8 * 0.5 + 0.5)* idx.size)])
        test_idx.extend(idx[int((0.8 * 0.5 + 0.5)* idx.size) : ])
    train_df = df.loc[train_idx]
    val_df = df.loc[test_idx]
    test_df = df.loc[test_idx]
    return train_df, test_df, val_df

# split in train test and val with stratisfied
train_df, test_df, val_df = split_df(df)
# show train example
train_df.head(2)

Unnamed: 0,tags,class
481326326,"[sanfrancisco, california, usa, island, prison...",alcatraz
51087958,"[2005, sanfrancisco, october, airshow, blueang...",alcatraz


### Storage strategy

In [22]:
os.environ["CUDA_VISIBLE_DEVICES"]="1"

In [23]:
from tensorflow.python.client import device_lib
def get_available_gpus():
    local_device_protos = device_lib.list_local_devices()
    return [x.name for x in local_device_protos if x.device_type == 'GPU']
gpus = get_available_gpus()
print(gpus)
central_storage_strategy = tf.distribute.experimental.CentralStorageStrategy(compute_devices=[gpus[0]])

['/device:GPU:0']
INFO:tensorflow:ParameterServerStrategy (CentralStorageStrategy if you are using a single machine) with compute_devices = ['/job:localhost/replica:0/task:0/device:GPU:0'], variable_device = '/job:localhost/replica:0/task:0/device:GPU:0'


2022-01-31 21:58:13.970986: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-01-31 21:58:15.782695: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /device:GPU:0 with 10228 MB memory:  -> device: 0, name: GeForce GTX 1080 Ti, pci bus id: 0000:5e:00.0, compute capability: 6.1
2022-01-31 21:58:15.838411: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 10228 MB memory:  -> device: 0, name: GeForce GTX 1080 Ti, pci bus id: 0000:5e:00.0, compute capability: 6.1


In [24]:
with central_storage_strategy.scope():
    # SIF parameter
    a = 10e-4
    # Image scale parameter
    image_scale = 1

    # Shape parameters
    resize_shape = (224 * image_scale, 224 * image_scale)
    input_shape = (resize_shape[0], resize_shape[1], 3)

    # Batch size
    batch_size = 32

    # Train dataset
    train_ds = tf.data.Dataset.from_generator(
        build_generator(train_df), 
        output_shapes=((input_shape, [glove_dim]), [n_classes]),
        output_types=((tf.float32, tf.float32), tf.int32)
    ).batch(batch_size, drop_remainder=True).cache()

    # Validation dataset
    val_ds = tf.data.Dataset.from_generator(
        build_generator(val_df), 
        output_shapes=((input_shape, [glove_dim]), [n_classes]),
        output_types=((tf.float32, tf.float32), tf.int32)
    ).batch(batch_size, drop_remainder=True).cache()

    # Test dataset
    test_ds = tf.data.Dataset.from_generator(
        build_generator(test_df), 
        output_shapes=((input_shape, [glove_dim]), [n_classes]),
        output_types=((tf.float32, tf.float32), tf.int32)
    ).batch(batch_size, drop_remainder=True).cache()

In [25]:
# Disable AutoShard.
options = tf.data.Options()
options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.DATA
options.experimental_optimization.apply_default_optimizations = False
train_ds= train_ds.with_options(options)
val_ds = val_ds.with_options(options)
test_ds = test_ds.with_options(options)

In [26]:
import tqdm
for (x, c), y in tqdm.tqdm_notebook(train_ds):
    batch_size = x.shape[0]
    input_shape = tuple(x.shape[1:])
    glove_dim = c.shape[-1]
    n_classes = y.shape[-1]
    break
print(x.shape, c.shape, y.shape)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for (x, c), y in tqdm.tqdm_notebook(train_ds):


0it [00:00, ?it/s]

(32, 224, 224, 3) (32, 300) (32, 48)


2022-01-31 21:58:17.210146: W tensorflow/core/kernels/data/cache_dataset_ops.cc:768] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.


### Network parameters & loss

In [27]:
with central_storage_strategy.scope():
    # Learning parameters
    learning_rate = 10e-6/2
    nu = 10e-6/2
    epochs = 50
    decays = 3
    verbose = 1
    rank_factor = 0.5

    # ResNet50 trainable 
    trainable = False

    # MLP parameters
    n_layers = 3

    # Loss function
    loss_fn = tf.keras.losses.CategoricalCrossentropy()
    # Metric function
    metric_fn = tf.keras.metrics.CategoricalAccuracy()

    # Weights for resnet
    weights = 'imagenet'

    # Early stopping
    ES = tf.keras.callbacks.EarlyStopping('val_categorical_accuracy', patience=3)
    # Reduce learning rate on plateau
    # RLOP_lr = ReduceLROnPlateau(monitor="val_loss", attributes=['learning_rate'], factor=0.5, patience=5, verbose=1)
    # RLOP_lr_nu = ReduceLROnPlateau(monitor="val_loss", attributes=['learning_rate', 'nu'], factor=0.5, patience=5, verbose=1)
    RLOP = tf.keras.callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=5, verbose=1)

In [28]:
def save_history(history, name):
    # convert the history.history dict to a pandas DataFrame:     
    hist_df = pd.DataFrame(history) if isinstance(history, dict) else pd.DataFrame(history.history)

    # save to csv
    hist_csv_file = f'{name}_history.csv'
    with open(hist_csv_file, mode='w') as f:
        hist_df.to_csv(f)
        
def history_list_to_dict(l):
    return {k: itertools.chain.from_iterable([dic.history[k] for dic in l]) for k in l[0].history}

In [29]:
with central_storage_strategy.scope():
    # Image net pretrained efficientnet
    ImageNet = tf.keras.applications.ResNet50V2(
        include_top=False,
        weights=weights,
        input_shape=input_shape,
        pooling='max'
    )
    # Make efficientnet not trainable
    for layer in ImageNet.layers:
        layer.trainable = False

### Additive context-aware MLP

In [30]:
with central_storage_strategy.scope():
    # context model
    context_inputs = context_hidden = tf.keras.layers.Input(shape=(glove_dim))
    #Context hidden layers
    for context_layer_idx in range(n_layers):
        context_hiddens = tf.keras.layers.Dense(context_inputs.shape[-1], 'relu')(context_hidden)
    # context output
    context_outputs = context_hidden
    # Context model build
    context_model = tf.keras.models.Model(context_inputs, context_outputs)
    # Image input layer
    inputs = tf.keras.layers.Input(shape=input_shape)
    # Context embedding input layer
    contexts = tf.keras.layers.Input(shape=(glove_dim))
    # Apply context model
    contexts = context_model(contexts)
    # ResNet 50 features
    features = ImageNet(inputs)
    # Get rank such that number of parameters are identical
    add_rank = int(glove_dim + (features.shape[-1]**2)/(2*features.shape[-1] + glove_dim))
    # Number of CA MLP layers
    for layer_idx in range(n_layers):
        con = tf.keras.layers.Dense(int(add_rank * rank_factor), 'relu')(contexts)
        features = CADenseAdd(features.shape[-1], int(add_rank * rank_factor), activation='relu', use_bias=False)([features, contexts])
    # Output layer
    outputs = tf.keras.layers.Dense(n_classes, 'softmax')(features)

In [31]:
add_history_list = []
with central_storage_strategy.scope():
    # Build model
    add_model = tf.keras.models.Model((inputs, contexts), outputs)
    # Wrop model
    add_model = wrap_model(add_model)
    for decay in range(decays):
        # Build optimizer
        add_optimizer = SVDAdam(add_model, context_model, learning_rate/2**decay, learning_rate/2**decay)
        # Compile model
        add_model.compile(add_optimizer, tf.keras.losses.CategoricalCrossentropy(), metric_fn)
        # train
        add_history_list.append(
            add_model.fit(train_ds, epochs=epochs, verbose=verbose, validation_data=val_ds, callbacks=[]))

Epoch 1/50


2022-01-31 22:00:46.784181: I tensorflow/stream_executor/cuda/cuda_dnn.cc:366] Loaded cuDNN version 8101
2022-01-31 22:00:48.325372: I tensorflow/core/util/cuda_solvers.cc:179] Creating GpuSolver handles for stream 0x5603601e4100


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/

In [32]:
add_history = history_list_to_dict(add_history_list)
save_history(add_history, f'histories/context_aware_{n_layers}_add')

### Multiplicative context-aware MLP

In [None]:
with central_storage_strategy.scope():
    # Image input layer
    inputs = tf.keras.layers.Input(shape=input_shape)
    # Context embedding input layer
    contexts = tf.keras.layers.Input(shape=(glove_dim))
    # ResNet 50 features
    features = ImageNet(inputs)
    # Get rank such that number of parameters are identical
    mul_rank = int(features.shape[-1] * (features.shape[-1] + glove_dim)/(2*features.shape[-1] + glove_dim))
    # Number of CA MLP layers
    for layer_idx in range(n_layers):
        features = CADenseMul(features.shape[-1], int(mul_rank * rank_factor), activation='elu', use_bias=False)([features, contexts])
    # Output layer
    outputs = tf.keras.layers.Dense(n_classes, 'softmax')(features)

In [None]:
mul_history_list = []
with central_storage_strategy.scope():
    # Build model
    mul_model = tf.keras.models.Model((inputs, contexts), outputs)
    # Wrop model
    mul_model = wrap_model(mul_model)
    for decay in range(decays):
        # Build optimizer
        mul_optimizer = SVDAdam(mul_model, None, learning_rate/2**decay, learning_rate/2**decay)
        # Compile model
        mul_model.compile(mul_optimizer, tf.keras.losses.CategoricalCrossentropy(), metric_fn)
        # train
        mul_history_list.append(
            mul_model.fit(train_ds, epochs=epochs, verbose=verbose, validation_data=val_ds, callbacks=[]))

In [None]:
mul_history = history_list_to_dict(mul_history_list)
save_history(mul_history, f'histories/context_aware_{n_layers}_mul')

### Regular MLP


In [None]:
with central_storage_strategy.scope():
    # Image input layer
    inputs = tf.keras.layers.Input(shape=input_shape)
    # Context embedding input layer
    contexts = tf.keras.layers.Input(shape=(glove_dim))
    # ResNet 50 features
    features = ImageNet(inputs)
    # Number of MLP layers
    for layer_idx in range(n_layers):
        features = tf.keras.layers.Dense(features.shape[-1], 'elu')(features)
    # Output layer
    outputs = tf.keras.layers.Dense(n_classes, 'softmax')(features)

In [None]:
reg_history_list = []
with central_storage_strategy.scope():
    # Build model
    reg_model = tf.keras.models.Model((inputs, contexts), outputs)
    for decay in range(decays):
        # Build optimizer
        reg_optimizer = tf.keras.optimizers.Adam(learning_rate/2**decay)
        # Compile model
        reg_model.compile(reg_optimizer, tf.keras.losses.CategoricalCrossentropy(), metric_fn)
        # train
        reg_history_list.append(
            reg_model.fit(train_ds, epochs=epochs, verbose=verbose, validation_data=val_ds, callbacks=[]))

In [None]:
reg_history = history_list_to_dict(reg_history_list)
save_history(reg_history, f'histories/regular_{n_layers}')

### Concatenated MLP

In [None]:
with central_storage_strategy.scope():
    # Image input layer
    inputs = tf.keras.layers.Input(shape=input_shape)
    # Context embedding input layer
    contexts = tf.keras.layers.Input(shape=(glove_dim))
    # ResNet 50 features
    features = ImageNet(inputs)
    # Concatenate features and context over features dimension
    features = tf.concat([features, contexts], axis=-1)
    # Number of MLP layers
    for layer_idx in range(n_layers):
        features = tf.keras.layers.Dense(features.shape[-1], 'elu')(features)
    # Output layer
    outputs = tf.keras.layers.Dense(n_classes, 'softmax')(features)

In [None]:
con_history_list = []
with central_storage_strategy.scope():
    # Build model
    con_model = tf.keras.models.Model((inputs, contexts), outputs)
    # Untrainable layers
    for layer in ImageNet.layers:
        layer.trainable = False
    for decay in range(decays):
        # Build optimizer
        con_optimizer = tf.keras.optimizers.Adam(learning_rate/2**decay)
        # Compile model
        con_model.compile(con_optimizer, tf.keras.losses.CategoricalCrossentropy(), metric_fn)
        # train
        con_history_list.append(
            con_model.fit(train_ds, epochs=epochs, verbose=verbose, validation_data=val_ds, callbacks=[]))

In [None]:
con_history = history_list_to_dict(con_history_list)
save_history(con_history, f'histories/concatenated_{n_layers}')

### Gated MLP

In [None]:
with central_storage_strategy.scope():
    # Image input layer
    inputs = tf.keras.layers.Input(shape=input_shape)
    # Context embedding input layer
    contexts = tf.keras.layers.Input(shape=(glove_dim))
    # ResNet 50 features
    features = ImageNet(inputs)
    # Number of MLP layers
    for layer_idx in range(n_layers):
        features = tf.keras.layers.Dense(features.shape[-1], 'elu')(features)
    # Calculate gate values
    gates = tf.keras.layers.Dense(features.shape[-1], activation='sigmoid')(contexts)
    # Gate features
    features = gates * features
    # Output layer
    outputs = tf.keras.layers.Dense(n_classes, 'softmax')(features)

In [None]:
gate_history_list = []
with central_storage_strategy.scope():
    # Build model
    gate_model = tf.keras.models.Model((inputs, contexts), outputs)
    # Untrainable layers
    for layer in ImageNet.layers:
        layer.trainable = False
    for decay in range(decays):
        # Build optimizer
        gate_optimizer = tf.keras.optimizers.Adam(learning_rate/2**decay)
        # Compile model
        gate_model.compile(gate_optimizer, tf.keras.losses.CategoricalCrossentropy(), metric_fn)
        # train
        gate_history_list.append(
            gate_model.fit(train_ds, epochs=epochs, verbose=verbose, validation_data=val_ds, callbacks=[]))

In [None]:
gate_history = history_list_to_dict(gate_history_list)
save_history(gate_history, f'histories/gated_{n_layers}')

### Only tags

In [None]:
with central_storage_strategy.scope():
    # Image input layer
    inputs = tf.keras.layers.Input(shape=input_shape)
    # Context embedding input layer
    contexts = tf.keras.layers.Input(shape=(glove_dim))
    # # ResNet 50 features
    # features = ImageNet(inputs)
    features = contexts
    # Number of CA MLP layers
    for layer_idx in range(n_layers):
        features = tf.keras.layers.Dense(glove_dim, 'elu')(features)
    # Output layer
    outputs = tf.keras.layers.Dense(n_classes, 'softmax')(features)

In [None]:
tag_history_list = []
with central_storage_strategy.scope():
    tag_model = tf.keras.models.Model((inputs, contexts), outputs)
    for decay in range(decays):
        tag_optimizer = tf.keras.optimizers.Adam(learning_rate*5/2**decay)
        tag_model.compile(tag_optimizer, tf.keras.losses.CategoricalCrossentropy(), metric_fn)
        tag_history_list.append(
            tag_model.fit(train_ds, validation_data = val_ds, epochs=epochs, verbose=verbose, callbacks=[]))

In [None]:
tag_history = history_list_to_dict(tag_history_list)
save_history(tag_history, f'histories/only_tags_{n_layers}')