## Load images from record

Herein the data is to be load from TFRecords instead of the system directory.

`inline_augment_images` takes the images in the provided directory and augments them in the directory, removing the old files of the `interim` directory.

`augment_images` also returns a `list` of `dictionaries` which contain information about the images.
This allows for an arbitrary amount of labels saved in the image to be saved without parsing the name of the file or similar.

`encode_record` takes the respective `data_list` and creates TFRecords which are then load later.

This methodology promises to be faster because necessary preprocessing like augmentation and decoding of the image is already done.

Each loaded example of the record is a tensor ready to be put into the training algorithm, with parallel calls and prefetching of data for future steps embedded.

In [1]:
from os.path import join

raw = join('data', 'raw')
interim = join('data', 'interim')
processed = join('data', 'processed')

from src.training_env import reset_and_populate

reset_and_populate(raw, interim, [400,100,0])

from src.image_handling import encode_record, inline_augment_images

train_images = inline_augment_images(join(interim, 'train'), target_size=(32, 32))
validation_images = inline_augment_images(join(interim, 'validation'), target_size=(32, 32))

encode_record(train_images, ['n', 'o', 'x'], processed, 'train')
encode_record(validation_images, ['n', 'o', 'x'], processed, 'validation')

In [2]:
import tensorflow as tf
import numpy as np
import pathlib

example_featue_description = { 
        'image': tf.io.FixedLenFeature([], tf.string),
        'label': tf.io.FixedLenFeature([], tf.int64)
}

def decode_record(record_path, batch_size, shuffle_buffer_size=1000):
        def decode_example(example):
                features = tf.io.parse_single_example(example, example_featue_description)
                image = tf.io.parse_tensor(features['image'], tf.float32)
                image.set_shape([32, 32, 1])
                label = features['label']

                return image, label

        autotune = tf.data.experimental.AUTOTUNE

        data = (tf.data.TFRecordDataset(record_path)
                .map(decode_example, num_parallel_calls=autotune)
                .cache()
                .shuffle(shuffle_buffer_size)
                .repeat()
                .batch(batch_size)
                .prefetch(buffer_size=autotune))
        return data

train_generator = decode_record(join(processed, 'train.tfrecord'), 20)
validation_generator = decode_record(join(processed, 'validation.tfrecord'), 10)

In [3]:
from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import SGD, RMSprop

def create_model(hp):
    model = models.Sequential()
    # Invariant Input
    model.add(layers.Flatten(input_shape=(32, 32, 1)))
    # Add n layers
    for i in range(hp.Int('num_layers', 2, 5)):
        model.add(layers.Dense(
            units=hp.Int('num_nodes', 16, 64, 4),
            activation='relu'))
    # Invariant Output
    model.add(layers.Dense(3, 'softmax'))
    # Select any Optimizer
    optimizers = {
        'sgd': SGD(
            lr=hp.Choice('learning_rate', [0.001, 0.003, 0.007, 0.01]),
            momentum=hp.Float('momentum', 0.5, 1, 0.1),
            nesterov=hp.Boolean('nesterov'))
    }
    # Compile Model
    model.compile(
        loss='sparse_categorical_crossentropy',
        optimizer=optimizers[hp.Choice('optimizer', ['sgd'])],
        metrics=['acc'])

    return model

In [4]:
from datetime import datetime
from tensorboard.plugins.hparams import api
from kerastuner import RandomSearch
from tensorflow import summary
from tensorflow.keras.callbacks import TensorBoard

class customTuner(RandomSearch):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

    def run_trial(self, trial, *args, **kwargs):
        callbacks = kwargs.pop('callbacks', [])
        callbacks = self._deepcopy_callbacks(callbacks)
        trial_dir = self.get_trial_dir(trial.trial_id)
        callbacks.append(TensorBoard(trial_dir))
        kwargs['callbacks'] = callbacks
        super().run_trial(trial, *args, **kwargs)

    def on_trial_end(self, trial):
        trial_dir = self.get_trial_dir(trial.trial_id)
        # put the hparams where the metrics of tensorboard are
        hparam_dir = join(trial_dir, trial.trial_id, 'execution0', 'train')
        hparams = trial.hyperparameters.values
        with summary.create_file_writer(hparam_dir).as_default():
            api.hparams(hparams, trial_id=trial.trial_id)
        print(datetime.now().strftime("%Y-%m-%dT%H-%M-%S"))
        print('Remaining Trials: ' + str(self.remaining_trials))
        super().on_trial_end(trial)

    def on_epoch_end(self, trial, model, epoch, logs):
        trial_dir = self.get_trial_dir(trial.trial_id)
        # put the data where the metrics of tensorboard are
        hist_dir = join(trial_dir, trial.trial_id, 'execution0', 'train')
        with summary.create_file_writer(hist_dir).as_default():
            for layer in model.weights:
                summary.histogram(layer.name, data=layer, step=epoch)
        super().on_epoch_end(trial, model, epoch, logs)

In [5]:
from kerastuner import HyperParameters

hp=HyperParameters()
log_dir = 'trial2_logs'
timestamp = datetime.now().strftime("%Y-%m-%dT%H-%M-%S")

tuner = customTuner(
    create_model,
    hyperparameters=hp,
    objective='acc',
    max_trials=5,
    executions_per_trial=1,
    directory=log_dir,
    project_name=timestamp)

tuner.search_space_summary()

In [6]:
from tensorflow.keras.callbacks import EarlyStopping

callbacks = [ EarlyStopping(monitor='loss', patience=3) ]

tuner.search(
    train_generator,
    validation_data=validation_generator,
    epochs=30,
    steps_per_epoch=30,
    validation_steps=30,
    verbose=0,
    callbacks=callbacks)

2019-11-18T00-02-03
Remaining Trials: 4
2019-11-18T00-02-10
Remaining Trials: 3
2019-11-18T00-02-20
Remaining Trials: 2
2019-11-18T00-02-28
Remaining Trials: 1
2019-11-18T00-02-41
Remaining Trials: 0
INFO:tensorflow:Oracle triggered exit


In [7]:
tuner.results_summary()