# Fine-tuning Feed-Forward Networks

In [1]:
import os

from datetime import datetime

import numpy as np
import pandas as pd
import tensorflow as tf
from keras import optimizers, backend as K
from keras.callbacks import TerminateOnNaN, EarlyStopping, ReduceLROnPlateau, TensorBoard, ModelCheckpoint
from keras.preprocessing.image import ImageDataGenerator
from sklearn.utils.class_weight import compute_class_weight
from sacred import Experiment, utils as sacred_utils

from connoisseur import get_preprocess_fn
from connoisseur.datasets import load_pickle_data
from connoisseur.datasets.painter_by_numbers import load_multiple_outputs
from connoisseur.utils.image import MultipleOutputsDirectorySequence

import matplotlib.pyplot as plt

%matplotlib inline

Using TensorFlow backend.


In [2]:
use_gram_matrix = False
pooling = 'avg'
outputs_meta = [
    {'n': 'artist', 'u': 1584, 'a': 'softmax',
     'l': 'categorical_crossentropy',
     'm': ['categorical_accuracy', 'top_k_categorical_accuracy'],
     'w': .5},
    {'n': 'style', 'u': 135, 'a': 'softmax',
     'l': 'categorical_crossentropy',
     'm': ['categorical_accuracy', 'top_k_categorical_accuracy'],
     'w': .2},
    {'n': 'genre', 'u': 42, 'a': 'softmax',
     'l': 'categorical_crossentropy',
     'm': ['categorical_accuracy', 'top_k_categorical_accuracy'],
     'w': .2},
    {'n': 'date', 'u': 1, 'a': 'linear',
     'l': 'mse', 'm': 'mae', 'w': .1}
]

## Reading Training Labels

In [3]:
data_dir = "/work/datasets/patches/299-balanced-inceptionrnv2-299"
data = load_pickle_data(data_dir, phases=['train', 'valid'])

In [4]:
train_info = '/datasets/pbn/train_info.csv'
outputs, name_map = load_multiple_outputs(train_info, outputs_meta, encode='onehot')

unknown year ific
unknown year rain
unknown year rver


In [5]:
train_shuffle = True
valid_shuffle = True

In [6]:
print('layers available:', data['train'][0].keys())

layers available: dict_keys(['global_average_pooling2d_1'])


In [7]:
layer_name = 'global_average_pooling2d_1'

(x_train, _, names_train), (x_valid, _, names_valid) = data['train'], data['valid']
x_train, x_valid = (x[layer_name] for x in (x_train, x_valid))
print('x-train, x-valid shape:', x_train.shape, x_valid.shape)

p = np.arange(len(x_train))
np.random.shuffle(p)
x_train = x_train[p]
names_train = names_train[p]

p = np.arange(len(x_valid))
np.random.shuffle(p)
x_valid = x_valid[p]
names_valid = names_valid[p]

x-train, x-valid shape: (3192922, 1536) (1577778, 1536)


In [8]:
del data, p

In [9]:
ys = []
for phase, names in zip(('train', 'valid'),
                        (names_train, names_valid)):
    names = ['-'.join(os.path.basename(n).split('-')[:-1]) for n in names]
    indices = [name_map[n] for n in names]
    ys += [{o: v[indices] for o, v in outputs.items()}]

y_train, y_valid = ys

In [None]:
for y in (y_train, y_valid):
    f = plt.figure(figsize=(16, 6))
    
    groups = 'artist style genre'.split()
    
    for ix, group in enumerate(groups):
        _y = y[group]
        l, c = np.unique(np.argmax(_y, axis=1), return_counts=True)
        
        print('unique values:', len(l))
        
        ax = f.add_subplot(1, 3, ix + 1)
        plt.bar(l, c)
        plt.title(group)

## Defining Limb Network

In [12]:
device = '/gpu:0'
weights = 'imagenet'
last_base_layer = None
use_gram_matrix = False
pooling = 'avg'
ckpt_file = 'weights.hdf5'
shape = [1536]
dense_layers=[2048, 2048]

device = "/gpu:0"

opt_params = {'lr': .001}
dropout_p = 0.2

In [13]:
from keras import Model, Input
from keras.layers import Flatten, Lambda, Dense, Dropout

def build_meta_limb(shape, dropout_p=.5,
                    classes=1000, use_gram_matrix=False,
                    dense_layers=(),
                    include_top=True,
                    predictions_activation='softmax',
                    predictions_name='predictions', model_name=None):
    x = Input(shape=shape)

    if use_gram_matrix:
        sizes = K.get_variable_shape(x)
        k = sizes[-1]
        y = Lambda(gram_matrix, arguments=dict(norm_by_channels=False),
                   name='gram', output_shape=[k, k])(x)
    else:
        y = x

    if include_top:
        if K.ndim(y) > 2:
            y = Flatten(name='flatten')(y)

        for l_id, n_units in enumerate(dense_layers):
            y = Dense(n_units, activation='relu', name='fc%i' % l_id)(y)
            y = Dropout(dropout_p)(y)

        if not isinstance(classes, (list, tuple)):
            classes, predictions_activation, predictions_name = (
                [classes], [predictions_activation], [predictions_name])
        outputs = []
        for u, a, n in zip(classes, predictions_activation, predictions_name):
            outputs += [Dense(u, activation=a, name=n)(y)]
    else:
        outputs = [y]

    return Model(inputs=x, outputs=outputs, name=model_name)

In [14]:
with tf.device(device):
    print('building...')
    model = build_meta_limb(shape, dropout_p=dropout_p,
                            use_gram_matrix=use_gram_matrix,
                            include_top=True,
                            dense_layers=dense_layers,
                            classes=[o['u'] for o in outputs_meta],
                            predictions_name=[o['n'] for o in outputs_meta],
                            predictions_activation=[o['a'] for o in outputs_meta])
    
    model.summary()    

building...
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 1536)         0                                            
__________________________________________________________________________________________________
fc0 (Dense)                     (None, 2048)         3147776     input_1[0][0]                    
__________________________________________________________________________________________________
dropout_1 (Dropout)             (None, 2048)         0           fc0[0][0]                        
__________________________________________________________________________________________________
fc1 (Dense)                     (None, 2048)         4196352     dropout_1[0][0]                  
_________________________________________________________________________________________________

In [16]:
with tf.device(device):
    model.compile(optimizer=optimizers.Adam(**opt_params),
                  loss=dict((o['n'], o['l']) for o in outputs_meta),
                  metrics=dict((o['n'], o['m']) for o in outputs_meta),
                  loss_weights=dict((o['n'], o['w']) for o in outputs_meta))

Instructions for updating:
keep_dims is deprecated, use keepdims instead
Instructions for updating:
keep_dims is deprecated, use keepdims instead


## Training Model

In [17]:
epochs = 500
initial_epoch = 0
batch_size = 4096

steps_per_epoch = 500
validation_steps = None

workers = 8
use_multiprocessing = True

early_stop_patience = 100
reduce_lr_patience = int(early_stop_patience // 3)

class_weight = None
ckpt = 'meta-balanced-mo-%s.h5' % datetime.now().date()
report_dir = '/work/painter-by-numbers/' + ckpt.split('.')[0]

In [18]:
resuming_from = None
if resuming_from:
    print('re-loading weights...')
    model.load_weights(resuming_from)

In [None]:
try:
    print('training from epoch %i...' % initial_epoch)
    model.fit(x_train, y_train,
              epochs=epochs,
              batch_size=batch_size,
              validation_data=(x_valid, y_valid),
              initial_epoch=initial_epoch,
              verbose=2,
              class_weight=class_weight,
              callbacks=[
                  TerminateOnNaN(),
                  EarlyStopping(patience=early_stop_patience),
                  ReduceLROnPlateau(min_lr=1e-10, patience=reduce_lr_patience),
                  TensorBoard(report_dir, batch_size=batch_size),
                  ModelCheckpoint(ckpt,
                                  save_best_only=True,
                                  verbose=1)
              ])
except KeyboardInterrupt:
    print('interrupted by user')
else:
    print('done')
finally:
    print('train history:', model.history.history)

training from epoch 0...
Train on 3192922 samples, validate on 1577778 samples
Epoch 1/500
Epoch 00001: val_loss improved from inf to 3.39281, saving model to meta-balanced-mo-2018-10-18.h5
 - 232s - loss: 2.6289 - artist_loss: 3.4762 - style_loss: 2.3677 - genre_loss: 1.6810 - date_loss: 0.8106 - artist_categorical_accuracy: 0.3593 - artist_top_k_categorical_accuracy: 0.5285 - style_categorical_accuracy: 0.3331 - style_top_k_categorical_accuracy: 0.7143 - genre_categorical_accuracy: 0.5084 - genre_top_k_categorical_accuracy: 0.8466 - date_mean_absolute_error: 0.5876 - val_loss: 3.3928 - val_artist_loss: 5.0111 - val_style_loss: 2.4516 - val_genre_loss: 1.6529 - val_date_loss: 0.6635 - val_artist_categorical_accuracy: 0.2165 - val_artist_top_k_categorical_accuracy: 0.3876 - val_style_categorical_accuracy: 0.3323 - val_style_top_k_categorical_accuracy: 0.7207 - val_genre_categorical_accuracy: 0.5220 - val_genre_top_k_categorical_accuracy: 0.8521 - val_date_mean_absolute_error: 0.5341
Ep