In [1]:
import sys
import tensorflow as tf
import os
from matplotlib import pyplot as plt
from random import randint
from sklearn.model_selection import train_test_split
from tensorflow.keras.optimizers import Adam, SGD
import numpy as np
import pandas as pd
import pathlib
import shutil
import tempfile

if not os.path.exists('helpers'):
    !git clone https://github.com/solanyn/head-pose-estimation
    !mv head-pose-estimation/helpers .
    !rm -r head-pose-estimation
sys.path.append('helpers')

from helpers.data import *
from helpers.model import *
from helpers.plot import *

print("Tensorflow version is:", tf.__version__)
assert tf.__version__[0] == '2'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' 
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
AUTOTUNE = tf.data.experimental.AUTOTUNE

models = []

colab = True
if colab:
    print("Running on Google Colab (Importing data from Google Drive)")
else:
    print("Running locally (Data available locally)")

ModuleNotFoundError: ignored

In [None]:
logdir = pathlib.Path(tempfile.mkdtemp())/"tensorboard_logs"
shutil.rmtree(logdir, ignore_errors=True)

# Load the TensorBoard notebook extension
%load_ext tensorboard

# Open an embedded TensorBoard viewer
%tensorboard --logdir {logdir}/models

In [None]:
if colab:
    from google.colab import drive
    drive.mount('/content/drive')

    !cp /content/drive/'My Drive'/colab/head-pose-estimation/data.zip .
    !unzip -o -q data.zip
    !rm data.zip
    !unzip -o -q modified_data.zip
    !rm modified_data.zip

In [None]:
data_dir = './modified_data/'

train_data = pd.read_csv('./train_data.csv')
test_data = pd.read_csv('./test_data.csv')

In [None]:
test_data.dtypes

In [None]:
train_data.dtypes

In [None]:
# Look at a random image in the training set and the associated labels
index = randint(0, len(train_data))
fig = plt.figure()
im = plt.imread(data_dir + train_data['filename'][index])
plt.imshow(im)

print("Tilt: {}\nPan: {}\nSize: {}\nTotal images: {}".format(train_data["tilt"][index], train_data["pan"][index], im.shape, len(train_data)))

In [None]:
# Are our labels categorical or numerical?
train_data['pan_str'] = train_data['pan'].astype(str)
train_data['tilt_str'] = train_data['tilt'].astype(str)

train_data['pan_str'].value_counts().plot(kind='bar')
plt.title('Tilt Class Counts')
plt.show()
plt.title('Pan Class Counts')
train_data['tilt_str'].value_counts().plot(kind='bar')
plt.show()

In [None]:
# Definitely categorical. Some imbalances in both classes more apparent in the pan classes.

num_tilt_classes = len(train_data['tilt'].unique())
num_pan_classes = len(train_data['pan'].unique())

In [None]:
BATCH_SIZE = 128

train_data_df, val_data_df = train_test_split(train_data, test_size=0.2)

TRAIN_DATA_LEN = len(train_data_df)
VAL_DATA_LEN = len(val_data_df)
TEST_DATA_LEN = len(test_data)

In [None]:
# Tensorflow Datasets
tilt_train_dataset = tf.data.Dataset.from_generator(
    make_tilt_train_generator
    , output_types=(tf.float32,tf.float32)
    , output_shapes = ((1,64,64,3), (1, num_tilt_classes))
)

pan_train_dataset = tf.data.Dataset.from_generator(
    make_pan_train_generator
    , output_types=(tf.float32,tf.float32)
    , output_shapes = ((1,64,64,3), (1, num_pan_classes))
)

tilt_val_dataset = tf.data.Dataset.from_generator(
    make_tilt_val_generator
    , output_types=(tf.float32,tf.float32)
    , output_shapes = ((1,64,64,3), (1, num_tilt_classes))
)

pan_val_dataset = tf.data.Dataset.from_generator(
    make_pan_val_generator
    , output_types=(tf.float32,tf.float32)
    , output_shapes = ((1,64,64,3), (1, num_pan_classes))
)

test_dataset = tf.data.Dataset.from_generator(
    make_test_generator
    , output_types=(tf.float32,tf.float32)
)

In [None]:
for image, label in tilt_train_dataset.take(5):
    print(image.shape, label.shape)

for image, label in pan_train_dataset.take(5):
    print(image.shape, label.shape)

In [None]:
# No aug
tilt_train_batches = (
    tilt_train_dataset
    .take(TRAIN_DATA_LEN)
    .cache()
    .shuffle(TRAIN_DATA_LEN)
    .map(convert, num_parallel_calls=AUTOTUNE)
    .batch(BATCH_SIZE, drop_remainder=True)
) 

pan_train_batches = (
    pan_train_dataset
    .take(TRAIN_DATA_LEN)
    .cache()
    .shuffle(TRAIN_DATA_LEN)
    .map(convert, num_parallel_calls=AUTOTUNE)
    .batch(BATCH_SIZE, drop_remainder=True)
) 

# Aug
aug_tilt_train_batches = (
    tilt_train_dataset
    .take(TRAIN_DATA_LEN)
    .cache()
    .shuffle(TRAIN_DATA_LEN)
    .map(augment, num_parallel_calls=AUTOTUNE)
    .batch(BATCH_SIZE, drop_remainder=True)
) 

aug_pan_train_batches = (
    pan_train_dataset
    .take(TRAIN_DATA_LEN)
    .cache()
    .shuffle(TRAIN_DATA_LEN)
    .map(augment, num_parallel_calls=AUTOTUNE)
    .batch(BATCH_SIZE, drop_remainder=True)
) 

# For final model
full_tilt_train_batches = (
    tilt_train_dataset
    .take(TRAIN_DATA_LEN+VAL_DATA_LEN)
    .cache()
    .shuffle(TRAIN_DATA_LEN+VAL_DATA_LEN)
    .map(augment, num_parallel_calls=AUTOTUNE)
    .batch(BATCH_SIZE, drop_remainder=True)
) 

full_pan_train_batches = (
    pan_train_dataset
    .take(TRAIN_DATA_LEN+VAL_DATA_LEN)
    .cache()
    .shuffle(TRAIN_DATA_LEN+VAL_DATA_LEN)
    .map(augment, num_parallel_calls=AUTOTUNE)
    .batch(BATCH_SIZE, drop_remainder=True)
) 

# Validation
tilt_val_batches = (
    tilt_val_dataset
    .take(VAL_DATA_LEN)
    .cache()
    .map(convert, num_parallel_calls=AUTOTUNE)
    .batch(BATCH_SIZE, drop_remainder=True)
)

pan_val_batches = (
    pan_val_dataset
    .take(VAL_DATA_LEN)
    .cache()
    .map(convert, num_parallel_calls=AUTOTUNE)
    .batch(BATCH_SIZE, drop_remainder=True)
) 

# Submission set
test_batches = (
    test_dataset
    .take(TEST_DATA_LEN)
    .cache()
    .map(convert, num_parallel_calls=AUTOTUNE)
    .batch(BATCH_SIZE)
)

In [None]:
plt.figure(figsize=(10,5))
i=1
for image, label in tilt_train_batches.shuffle(100).take(10):
  plt.subplot(2,5,i)
  plt.imshow(image[0,:])
  plt.title("Tilt: " + str(label[0].numpy()[0]))
  i=i+1

In [None]:
epochs = 100

STEPS_PER_EPOCH = TRAIN_DATA_LEN//BATCH_SIZE
lr_a = 0.001
# lr = 0.0001

lr_schedule_a = tf.keras.optimizers.schedules.InverseTimeDecay(
    lr_a,
    decay_steps=STEPS_PER_EPOCH*1000,
    decay_rate=1,
    staircase=False
)

optimizer = Adam(learning_rate=lr_schedule_a)

An alternate optimisation method using Nesterov's momentum in stochastic gradient descent.

In [None]:
lr_s = 0.1

lr_schedule_s = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=1e-2,
    decay_steps=10000,
    decay_rate=0.9)

sgd = SGD(learning_rate=0.1, nesterov=0.9)


# Model Building

A small test to check the a small resnet model works. Nesterov momentum SGD and Adam optimisers are also compared.

In [None]:
%%time
s_histories_tilt = {}
s_histories_pan = {}

small_res_net_tilt = get_resnet_model("tilt", [64,], [1,])
small_res_net_tilt.compile(
    optimizer=optimizer, 
    loss='categorical_crossentropy',
    metrics=[tf.keras.losses.CategoricalCrossentropy(from_logits=False, name='CategoricalCrossentropy'),
    'accuracy']
)

small_res_net_pan = get_resnet_model("pan", [64,], [1,])
small_res_net_pan.compile(
    optimizer=optimizer, 
    loss='categorical_crossentropy',
    metrics=[tf.keras.losses.CategoricalCrossentropy(from_logits=False, name='CategoricalCrossentropy'),
    'accuracy']
)

small_res_net_tilt_sgd = get_resnet_model("tilt", [64,], [1,])
small_res_net_tilt_sgd.compile(
    optimizer=sgd, 
    loss='categorical_crossentropy',
    metrics=[tf.keras.losses.CategoricalCrossentropy(from_logits=False, name='CategoricalCrossentropy'),
    'accuracy']
)

small_res_net_pan_sgd = get_resnet_model("pan", [64,], [1,])
small_res_net_pan_sgd.compile(
    optimizer=sgd, 
    loss='categorical_crossentropy',
    metrics=[tf.keras.losses.CategoricalCrossentropy(from_logits=False, name='CategoricalCrossentropy'),
    'accuracy']
)

s_histories_tilt['tilt_small_resnet_adam'] = small_res_net_tilt.fit(tilt_train_batches,
                                              epochs=epochs, 
                                              validation_data=tilt_val_batches,
                                              verbose=0, 
                                              callbacks=get_callbacks('models/tilt_small_resnet') 
                                              )
s_histories_pan['pan_small_resnet_adam'] = small_res_net_pan.fit(pan_train_batches,
                                              epochs=epochs, 
                                              validation_data=pan_val_batches,
                                              verbose=0, 
                                              callbacks=get_callbacks('models/pan_small_resnet') 
                                              )

s_histories_tilt['tilt_small_resnet_sgd'] = small_res_net_tilt_sgd.fit(tilt_train_batches,
                                              epochs=epochs, 
                                              validation_data=tilt_val_batches,
                                              verbose=0, 
                                              callbacks=get_callbacks('models/tilt_small_resnet') 
                                              )
s_histories_pan['pan_small_resnet_sgd'] = small_res_net_pan_sgd.fit(pan_train_batches,
                                              epochs=epochs, 
                                              validation_data=pan_val_batches,
                                              verbose=0, 
                                              callbacks=get_callbacks('models/pan_small_resnet') 
                                              )

In [None]:
plotter(s_histories_tilt, ylim=[0.0, 2], metric = 'CategoricalCrossentropy')
plotter(s_histories_tilt, ylim=[0.0, 2], metric = 'CategoricalCrossentropy')

In [None]:
plotter(s_histories_tilt, ylim=[0.0, 1], metric = 'accuracy')
plotter(s_histories_pan, ylim=[0.0, 1], metric = 'accuracy')

## Augmentation

In [None]:
%%time
small_res_net_tilt_aug = get_resnet_model("tilt", [64,], [1,])
small_res_net_tilt_aug.compile(
    optimizer=optimizer, 
    loss='categorical_crossentropy',
    metrics=[tf.keras.losses.CategoricalCrossentropy(from_logits=False, name='CategoricalCrossentropy'),
    'accuracy']
)

small_res_net_pan_aug = get_resnet_model("pan", [64,], [1,])
small_res_net_pan_aug.compile(
    optimizer=optimizer, 
    loss='categorical_crossentropy',
    metrics=[tf.keras.losses.CategoricalCrossentropy(from_logits=False, name='CategoricalCrossentropy'),
    'accuracy']
)

s_histories_tilt['tilt_small_resnet_aug'] = small_res_net_tilt_aug.fit(aug_tilt_train_batches,
                                              epochs=epochs, 
                                              validation_data=tilt_val_batches,
                                              verbose=0, 
                                              callbacks=get_callbacks('models/tilt_small_resnet_aug', early_stop=False) 
                                              )
s_histories_pan['pan_small_resnet_aug'] = small_res_net_pan_aug.fit(aug_pan_train_batches,
                                              epochs=epochs, 
                                              validation_data=pan_val_batches,
                                              verbose=0, 
                                              callbacks=get_callbacks('models/pan_small_resnet_aug', early_stop=False) 
                                              )

In [None]:
plotter(s_histories_tilt, ylim=[0.0, 2], metric = 'CategoricalCrossentropy')

In [None]:
plotter(s_histories_pan, ylim=[0.0, 1], metric = 'accuracy')

The learning rate is pretty unsatisfactory. The projected convergence is quite low in loss and accuracy. We can increase the complexity of the model to remedy this.

## A baseline model

In [None]:
%%time

l_histories_tilt = {
    'tilt_small_resnet_aug': s_histories_tilt['tilt_small_resnet_aug'],
    }

l_histories_pan = {
    'pan_small_resnet_aug': s_histories_pan['pan_small_resnet_aug'],
}

large_res_net_tilt = get_resnet_model("tilt", [64, 128, 256], [3, 3, 3])
large_res_net_tilt.compile(
    optimizer=optimizer, 
    loss='categorical_crossentropy',
    metrics=[tf.keras.losses.CategoricalCrossentropy(from_logits=False, name='CategoricalCrossentropy'),
    'accuracy']
)

large_res_net_pan = get_resnet_model("pan", [64, 128, 256], [3, 3, 3])
large_res_net_pan.compile(
    optimizer=optimizer, 
    loss='categorical_crossentropy',
    metrics=[tf.keras.losses.CategoricalCrossentropy(from_logits=False, name='CategoricalCrossentropy'),
    'accuracy']
)

l_histories_tilt['tilt_large_resnet'] = large_res_net_tilt.fit(aug_tilt_train_batches,
                                              epochs=epochs, 
                                              validation_data=tilt_val_batches,
                                              verbose=0, 
                                              callbacks=get_callbacks('models/tilt_large_resnet') 
                                              )
l_histories_pan['pan_large_resnet'] = large_res_net_pan.fit(aug_pan_train_batches,
                                              epochs=epochs, 
                                              validation_data=pan_val_batches,
                                              verbose=0, 
                                              callbacks=get_callbacks('models/pan_large_resnet') 
                                              )

In [None]:
plotter(l_histories_tilt, ylim=[0.0, 2], metric = 'CategoricalCrossentropy')
plotter(l_histories_pan, ylim=[0.0, 2], metric = 'CategoricalCrossentropy')

In [None]:
plotter(l_histories_tilt, ylim=[0.0, 1], metric = 'accuracy')
plotter(l_histories_pan, ylim=[0.0, 1], metric = 'accuracy')

A larger model helps a lot but the validation accuracy looks like it can be improved. Might be some overfitting in the pan model.

## Adding depth to the model

In [None]:
%%time
larger_histories_tilt = {
    'tilt_large_resnet': l_histories_tilt['tilt_large_resnet']
}

larger_histories_pan = {
    'pan_large_resnet': l_histories_pan['pan_large_resnet']
}

larger_bottleneck_resnet_tilt = get_resnet_model("tilt", [64, 128, 256, 512], [3, 4, 6, 3])
larger_bottleneck_resnet_tilt.compile(
    optimizer=optimizer, 
    loss='categorical_crossentropy',
    metrics=[tf.keras.losses.CategoricalCrossentropy(from_logits=False, name='CategoricalCrossentropy'),
    'accuracy']
)

larger_bottleneck_resnet_pan = get_resnet_model("pan", [64, 128, 256, 512], [3, 4, 6, 3])
larger_bottleneck_resnet_pan.compile(
    optimizer=optimizer, 
    loss='categorical_crossentropy',
    metrics=[tf.keras.losses.CategoricalCrossentropy(from_logits=False, name='CategoricalCrossentropy'),
    'accuracy']
)

larger_histories_tilt['larger_resnet_tilt'] = larger_bottleneck_resnet_tilt.fit(aug_tilt_train_batches, 
                                                          epochs=epochs, 
                                                          validation_data=tilt_val_batches,
                                                          verbose=0, 
                                                          callbacks=get_callbacks('models/larger_resnet_bottleneck_tilt') )

larger_histories_pan['larger_resnet_pan'] = larger_bottleneck_resnet_pan.fit(aug_pan_train_batches, 
                                                          epochs=epochs, 
                                                          validation_data=pan_val_batches,
                                                          verbose=0, 
                                                          callbacks=get_callbacks('models/larger_resnet_bottleneck_pan') )

In [None]:
plotter(larger_histories_tilt, ylim=[0.0, 3], metric = 'CategoricalCrossentropy')
plotter(larger_histories_pan, ylim=[0.0, 3], metric = 'CategoricalCrossentropy')

In [None]:
plotter(larger_histories_tilt, ylim=[0.0, 1], metric = 'accuracy')
plt.show()
plotter(larger_histories_pan, ylim=[0.0, 1], metric = 'accuracy')
plt.show()

The learning rate looks better but the validation rates look unstable. A model using a modified residual block can help with the stability.

## Bottleneck and Preactivation Residual Blocks

In [None]:
%%time
mod_histories_tilt = {
    'tilt_resnet34': larger_histories_tilt['larger_resnet_tilt']
    }
mod_histories_pan = {
    'tilt_resnet34': larger_histories_pan['larger_resnet_pan']
}

bottleneck_resnet_tilt = get_bottleneck_resnet_model("tilt", [64, 128, 256], [3, 3, 3])
bottleneck_resnet_tilt.compile(
    optimizer=optimizer, 
    loss='categorical_crossentropy',
    metrics=[tf.keras.losses.CategoricalCrossentropy(from_logits=False, name='CategoricalCrossentropy'),
    'accuracy']
)

bottleneck_resnet_pan = get_bottleneck_resnet_model("pan", [64, 128, 256], [3, 3, 3])
bottleneck_resnet_pan.compile(
    optimizer=optimizer, 
    loss='categorical_crossentropy',
    metrics=[tf.keras.losses.CategoricalCrossentropy(from_logits=False, name='CategoricalCrossentropy'),
    'accuracy']
)

mod_histories_tilt['tilt_bottleneck_resnet20'] = bottleneck_resnet_tilt.fit(aug_tilt_train_batches,
                                              epochs=epochs, 
                                              validation_data=tilt_val_batches,
                                              verbose=0, 
                                              callbacks=get_callbacks('models/tilt_large_resnet') 
                                              )
mod_histories_pan['pan_bottleneck_resnet20'] = bottleneck_resnet_pan.fit(aug_pan_train_batches,
                                              epochs=epochs, 
                                              validation_data=pan_val_batches,
                                              verbose=0, 
                                              callbacks=get_callbacks('models/pan_large_resnet') 
                                              )

In [None]:
plotter(mod_histories_tilt, ylim=[0.0, 2], metric = 'CategoricalCrossentropy')
plt.show()
plotter(mod_histories_pan, ylim=[0.0, 2], metric = 'CategoricalCrossentropy')
plt.show()

In [None]:
plotter(mod_histories_tilt, ylim=[0.0, 1], metric = 'accuracy')
plt.show()
plotter(mod_histories_pan, ylim=[0.0, 1], metric = 'accuracy')
plt.show()

Using preactivation and the bottleneck structure in the network stabilises the validation loss and accuracy considerably.

# Dropout

Dropout was initially thought to have a negative effect on training efficiency in ResNet but CITATION proposed an alternative implementation of the network which included dropout after each convolution layer in residual blocks. The dropout within residual blocks is explored in the following cells.

In [None]:
%%time
drop_histories = {}
drop_histories_tilt = {
    'tilt_bottleneck_resnet20': mod_histories_tilt['tilt_bottleneck_resnet20'],
}

drop_histories_pan = {
    'pan_bottleneck_resnet20': mod_histories_pan['pan_bottleneck_resnet20']
}

bottleneck_res_net_tilt_drop = get_bottleneck_resnet_model("tilt", [64, 128, 256], [3, 3, 3], fdropout=True)
bottleneck_res_net_tilt_drop.compile(
    optimizer=optimizer, 
    loss='categorical_crossentropy',
    metrics=[tf.keras.losses.CategoricalCrossentropy(from_logits=False, name='CategoricalCrossentropy'),
    'accuracy']
  )

bottleneck_res_net_pan_drop = get_bottleneck_resnet_model("pan", [64, 128, 256], [3, 3, 3], fdropout=True)
bottleneck_res_net_pan_drop.compile(
    optimizer=optimizer, 
    loss='categorical_crossentropy',
    metrics=[tf.keras.losses.CategoricalCrossentropy(from_logits=False, name='CategoricalCrossentropy'),
    'accuracy']
  )

drop_histories_tilt['tilt_bottleneck_resnet20_dropout'] = bottleneck_res_net_tilt_drop.fit(aug_tilt_train_batches, 
                                                          epochs=epochs, 
                                                          validation_data=tilt_val_batches,
                                                          verbose=0, 
                                                          callbacks=get_callbacks('models/resnet_bottleneck_tilt_dropout') )

drop_histories_pan['pan_bottleneck_resnet20_dropout'] = bottleneck_res_net_pan_drop.fit(aug_pan_train_batches, 
                                                          epochs=epochs, 
                                                          validation_data=pan_val_batches,
                                                          verbose=0, 
                                                          callbacks=get_callbacks('models/resnet_bottleneck_pan_dropout') )

In [None]:
plotter(drop_histories_tilt, ylim=[0.0, 2], metric = 'CategoricalCrossentropy')
plt.show()
plotter(drop_histories_pan, ylim=[0.0, 2], metric = 'CategoricalCrossentropy')
plt.show()

In [None]:
plotter(drop_histories_tilt, ylim=[0.0, 1], metric = 'accuracy')
plt.show()
plotter(drop_histories_pan, ylim=[0.0, 1], metric = 'accuracy')
plt.show()

Using dropout after convolutional layers provides little benefit in this case.

## Regularisation

In [None]:
%%time
h_histories = {}
lambda_vals = [0.1, 0.01, 0.001, 0.0001, 0.00001]

for reg_lambda in lambda_vals:
    bottleneck_res_net_tilt_reg_ = get_bottleneck_resnet_model("tilt", [64, 128, 256], [3, 3, 3], reg_lambda=reg_lambda)
    bottleneck_res_net_tilt_reg_.compile(
        optimizer=optimizer, 
        loss='categorical_crossentropy',
        metrics=[tf.keras.losses.CategoricalCrossentropy(from_logits=False, name='CategoricalCrossentropy'),
        'accuracy']
    )

    bottleneck_res_net_pan_reg_ = get_bottleneck_resnet_model("pan", [64, 128, 256], [3, 3, 3], reg_lambda=reg_lambda)
    bottleneck_res_net_pan_reg_.compile(
        optimizer=optimizer, 
        loss='categorical_crossentropy',
        metrics=[tf.keras.losses.CategoricalCrossentropy(from_logits=False, name='CategoricalCrossentropy'),
        'accuracy']
    )

    h_histories['resnet_bottleneck_tilt_reg'+ '_h' + str(reg_lambda)] = bottleneck_res_net_tilt_reg_.fit(aug_tilt_train_batches, 
                                                          epochs=epochs, 
                                                          validation_data=tilt_val_batches,
                                                          verbose=0, 
                                                          callbacks=get_callbacks('models/resnet_bottleneck_tilt_reg'+ '_h1' + str(reg_lambda)) )


    h_histories['resnet_bottleneck_pan_reg'+ '_h' + str(reg_lambda)] = bottleneck_res_net_pan_reg_.fit(aug_pan_train_batches, 
                                                          epochs=epochs, 
                                                          validation_data=pan_val_batches,
                                                          verbose=0, 
                                                          callbacks=get_callbacks('models/resnet_bottleneck_pan_reg'+ '_h1' + str(reg_lambda)) )


In [None]:
plt.figure(figsize=(10,5))
metric = 'CategoricalCrossentropy'
l_train = list()
l_val = list()

for reg_lambda in lambda_vals:
    l_train.append(h_histories['resnet_bottleneck_tilt_reg'+ '_h' + str(reg_lambda)].history[metric][-1])
    l_val.append(h_histories['resnet_bottleneck_tilt_reg'+ '_h' + str(reg_lambda)].history['val_' + metric][-1])

plt.plot(lambda_vals,l_train, 'ro', label='Train' )
plt.plot(lambda_vals,l_val, 'bs', label='Test' )

plt.xlabel('Lambda', fontsize=14)
plt.ylabel('CategoricalCrossentropy', fontsize=14)
plt.legend()
plt.show()

In [None]:
lambda_tilt = lambda_vals[np.argmin(l_val)]
print(lambda_tilt)

In [None]:
plt.figure(figsize=(10,5))
metric = 'CategoricalCrossentropy'
l_train = list()
l_val = list()

for reg_lambda in lambda_vals:
    l_train.append(h_histories['resnet_bottleneck_pan_reg'+ '_h' + str(reg_lambda)].history[metric][-1])
    l_val.append(h_histories['resnet_bottleneck_pan_reg'+ '_h' + str(reg_lambda)].history['val_' + metric][-1])

plt.plot(lambda_vals,l_train, 'ro', label='Train' )
plt.plot(lambda_vals,l_val, 'bs', label='Test' )

plt.xlabel('Lambda', fontsize=14)
plt.ylabel('CategoricalCrossentropy', fontsize=14)
plt.legend()
plt.show()

In [None]:
lambda_pan = lambda_vals[np.argmin(l_val)]
print(lambda_pan)

Increasing the regularisation strength in the layers results in higher losses and thus we use a minimal amount of regularisation in the layers.

In [None]:
final_histories_tilt = {}
final_histories_pan = {}

final_model_tilt = get_bottleneck_resnet_model("tilt", [64, 128, 256], [3, 3, 3], reg_lambda=lambda_tilt)
final_model_tilt.compile(
    optimizer=optimizer, 
    loss='categorical_crossentropy',
    metrics=[tf.keras.losses.CategoricalCrossentropy(from_logits=False, name='CategoricalCrossentropy'),
    'accuracy']
)

final_model_pan = get_bottleneck_resnet_model("pan", [64, 128, 256], [3, 3, 3], reg_lambda=lambda_pan)
final_model_pan.compile(
    optimizer=optimizer, 
    loss='categorical_crossentropy',
    metrics=[tf.keras.losses.CategoricalCrossentropy(from_logits=False, name='CategoricalCrossentropy'),
    'accuracy']
)

final_histories_tilt['tilt_final'] = final_model_tilt.fit(aug_tilt_train_batches,
                     epochs=epochs, 
                     validation_data=tilt_val_batches,
                     verbose=0, 
                     callbacks=get_callbacks('models/tilt_final') 
                     )
final_histories_pan['pan_final'] = final_model_pan.fit(aug_pan_train_batches,
                    epochs=epochs,
                    validation_data=pan_val_batches,
                    verbose=0, 
                    callbacks=get_callbacks('models/pan_final') 
                    )

In [None]:
plotter(final_histories_tilt, ylim=[0.0, 2], metric = 'CategoricalCrossentropy')
plt.show()
plotter(final_histories_pan, ylim=[0.0, 2], metric = 'CategoricalCrossentropy')
plt.show()

In [None]:
plotter(final_histories_tilt, ylim=[0.0, 1], metric = 'accuracy')
plt.show()
plotter(final_histories_pan, ylim=[0.0, 1], metric = 'accuracy')
plt.show()

In [None]:
print(final_model_tilt.evaluate(tilt_val_batches))
print(final_model_pan.evaluate(pan_val_batches))

## Finalising 

In [None]:
final_model_tilt = get_bottleneck_resnet_model("tilt", [64, 128, 256], [3, 3, 3], reg_lambda=lambda_tilt)
final_model_tilt.compile(
    optimizer=optimizer, 
    loss='categorical_crossentropy',
    metrics=[tf.keras.losses.CategoricalCrossentropy(from_logits=False, name='CategoricalCrossentropy'),
    'accuracy']
)

final_model_pan = get_bottleneck_resnet_model("pan", [64, 128, 256], [3, 3, 3], reg_lambda=lambda_pan)
final_model_pan.compile(
    optimizer=optimizer, 
    loss='categorical_crossentropy',
    metrics=[tf.keras.losses.CategoricalCrossentropy(from_logits=False, name='CategoricalCrossentropy'),
    'accuracy']
)

final_model_tilt.fit(full_tilt_train_batches,
                     epochs=epochs, 
                     verbose=0
                     )
final_model_pan.fit(full_pan_train_batches,
                    epochs=epochs,
                    verbose=0
                    )

In [None]:
tilt_classes = dict((value, key) for key, value in make_tilt_train_generator().class_indices.items())
pan_classes = dict((value, key) for key, value in make_pan_train_generator().class_indices.items())

In [None]:
test = make_test_generator()
tilt_preds = final_model_tilt.predict(test)
pan_preds = final_model_pan.predict(test)

In [None]:
tilts = [tilt_classes[np.argmax(i)] for i in tilt_preds]

In [None]:
pans = [pan_classes[np.argmax(i)] for i in pan_preds]

In [None]:
num_rows = 5
num_cols = 3
num_images = num_rows*num_cols
plt.figure(figsize=(2*2*num_cols, 2*num_rows))
for i in range(num_images):
    plt.subplot(num_rows, 2*num_cols, 2*i+1)
    plot_image(tilt_preds[i], test_data['filename'][i], tilt_classes)
    plt.subplot(num_rows, 2*num_cols, 2*i+2)
    plot_value_array(tilt_preds[i], tilt_classes)
    _ = plt.xticks(range(len(tilt_classes)), tilt_classes.values(), rotation=45)
plt.tight_layout()
plt.show()

In [None]:
num_rows = 5
num_cols = 3
num_images = num_rows*num_cols
plt.figure(figsize=(2*2*num_cols, 2*num_rows))
for i in range(num_images):
    plt.subplot(num_rows, 2*num_cols, 2*i+1)
    plot_image(i, pan_preds[i], test_data['filename'][i], pan_classes)
    plt.subplot(num_rows, 2*num_cols, 2*i+2)
    plot_value_array(i, pan_preds[i], pan_classes)
    _ = plt.xticks(range(len(pan_classes)), pan_classes.values(), rotation=45)
plt.tight_layout()
plt.show()