In [1]:
from __future__ import print_function
import sys
import time
import numpy as np
from math import sqrt
from sklearn.metrics import mean_squared_error
from matplotlib import pyplot
import tensorflow as tf
import pandas as pd
import gc
from sklearn.model_selection import TimeSeriesSplit, train_test_split
import random

In [2]:
def delete_model(model):
    """
    Clear a tensorflow model from memory & garbage collector.
    :param model: Tensorflow model to remove.
    :return:
    """
    # Memory handling
    del model  # Manually delete model
    tf.reset_default_graph()
    tf.keras.backend.clear_session()
    gc.collect()

In [3]:
def get_random_model():
    return [random.randint(lb[0], ub[0]),  # batch_size
             random.randint(lb[1], ub[1]), random.randint(lb[2], ub[2]),  # epoch_size, optimizer
             random.randint(lb[3], ub[3]), random.randint(lb[4], ub[4]), random.randint(lb[5], ub[5]),  # units
             random.uniform(lb[6], ub[6]), random.uniform(lb[7], ub[7]), random.uniform(lb[8], ub[8]),  # dropout
             random.uniform(lb[9], ub[9]), random.uniform(lb[10], ub[10]), random.uniform(lb[11], ub[11]),  # recurrent_dropout
             random.uniform(lb[12], ub[12]), random.uniform(lb[13], ub[13]), random.uniform(lb[14], ub[14]),  # gaussian noise std
             random.randint(lb[15], ub[15]), random.randint(lb[16], ub[16]), random.randint(lb[17], ub[17]), # gaussian_noise
             random.randint(lb[18], ub[18]), random.randint(lb[19], ub[19]), random.randint(lb[20], ub[20])]


In [4]:
# Model Search Space bounds
# TODO: Add weights initializer search: https://keras.io/initializers/
bounds = [(7, 1 * 31),  # batch_size (~ #days: week, month, year)  # TODO: reduced batch size to try avoiding OOM
          (350, 600), (0, 4),  # , 5)    # epoch_size, optimizer
          # (1023, 1024), (1023, 1024), (1023, 1024),  # units
          (64, 512), (64, 512), (64, 512),
          # (32, 512), (32, 196), (32, 384),
          (0.01, 0.25), (0.01, 0.25), (0.01, 0.25),  # dropout
          (0.01, 0.25), (0.01, 0.25), (0.01, 0.25),  # recurrent_dropout
          (0.01, 1), (0.01, 1), (0.01, 1),  # gaussian noise std
          (0, 1), (0, 1), (0, 1),  # gaussian_noise
          (0, 1), (0, 1), (0, 1)]  # batch normalization

# Lower Bounds
lb = [bounds[0][0],  # batch_size
      bounds[1][0], bounds[2][0],  # epoch_size, optimizer
      bounds[3][0], bounds[4][0], bounds[5][0],  # units
      bounds[6][0], bounds[7][0], bounds[8][0],  # dropout
      bounds[9][0], bounds[10][0], bounds[11][0],  # recurrent_dropout
      bounds[12][0], bounds[13][0], bounds[14][0],  # gaussian noise std
      bounds[15][0], bounds[16][0], bounds[17][0],  # gaussian_noise
      bounds[18][0], bounds[19][0], bounds[20][0]]  # batch normalization

# Upper Bounds
ub = [bounds[0][1],  # batch_size
      bounds[1][1], bounds[2][1],  # epoch_size, optimizer
      bounds[3][1], bounds[4][1], bounds[5][1],  # units
      bounds[6][1], bounds[7][1], bounds[8][1],  # dropout
      bounds[9][1], bounds[10][1], bounds[11][1],  # recurrent_dropout
      bounds[12][1], bounds[13][1], bounds[14][1],  # gaussian noise std
      bounds[15][1], bounds[16][1], bounds[17][1],  # gaussian_noise
      bounds[18][1], bounds[19][1], bounds[20][1]]  # batch normalization


In [24]:
def train_model(x, *args):
#     train_model.counter += 1
#     modelLabel = train_model.label
#     modelFolds = train_model.folds
#     data_manipulation = train_model.data_manipulation
#     rank = data_manipulation["rank"]
#     master = data_manipulation["master"]
#     directory = data_manipulation["directory"]
#     filePrefix = data_manipulation["filePrefix"]
#     island = data_manipulation["island"]
#     verbosity = data_manipulation["verbose"]
#     multi_gpu = data_manipulation["multi_gpu"]
#     store_plots = data_manipulation["storePlots"]

#     x_data, y_data = args

    # if island == "bh" or island == "sg":  # TODO: un-normalize data
    #     print("bounds ", data_manipulation["bounds"])
    #     print("x ", x)
    #     for i in range(len(x)):
    #         x[i] = x[i] * (data_manipulation["bounds"][i][1] - data_manipulation["bounds"][i][0]) \
    #                + data_manipulation["bounds"][i][0]
    #     x = np.array(x)
    #     print("un-normalized x ", x)

    # x = [32.269684115953126, 478.4579158867764, 2.4914987273745344, 291.55476719406147, 32.0, 512.0, 0.0812481431483004,
    #      0.01, 0.1445004524623349, 0.22335740221774894, 0.03443050512961357, 0.05488258021289669, 1.0,
    #      0.620275664519184, 0.34191582396595566, 0.9436131979280933, 0.4991752935129543, 0.4678261851228459, 0.0,
    #      0.355287972380982, 0.0]  # TODO: Temp set the same model to benchmark a specific DNN

    full_model_parameters = np.array(x.copy())
#     if data_manipulation["fp16"]:
#         full_model_parameters.astype(np.float32, casting='unsafe')  # TODO: temp test speed of keras with fp16

#     print("\n=============\n")
#     print("--- Rank {}: {} iteration {} using: {}".format(rank, modelLabel, train_model.counter, x[6:15]))

    dropout1 = x[6]
    dropout2 = x[7]
    dropout3 = x[8]
    recurrent_dropout1 = x[9]
    recurrent_dropout2 = x[10]
    recurrent_dropout3 = x[11]

    # Gaussian noise
    noise_stddev1 = x[12]
    noise_stddev2 = x[13]
    noise_stddev3 = x[14]

    x = np.rint(x).astype(np.int32)
    optimizers = ['adadelta', 'adagrad', 'nadam', 'adamax',
                  'adam', 'amsgrad']  # , 'rmsprop', 'sgd'] # Avoid loss NaNs, by removing rmsprop & sgd
    batch_size = x[0]
    epoch_size = x[1]
    optimizer = optimizers[x[2]]
    units1 = x[3]
    units2 = x[4]
    units3 = x[5]

    # Batch normalization
    use_batch_normalization1 = x[15]
    use_batch_normalization2 = x[16]
    use_batch_normalization3 = x[17]
    use_gaussian_noise1 = x[18]
    use_gaussian_noise2 = x[19]
    use_gaussian_noise3 = x[20]

#     print("--- Rank {}: batch_size: {}, epoch_size: {} Optimizer: {}, LSTM Unit sizes: {} "
#           "Batch Normalization/Gaussian Noise: {}"
#           .format(rank, x[0], x[1], optimizers[x[2]], x[3:6], x[15:21]))

#     x_data, x_data_holdout = x_data[:-365], x_data[-365:]
#     y_data, y_data_holdout = y_data[:-365], y_data[-365:]

#     totalFolds = modelFolds
#     timeSeriesCrossValidation = TimeSeriesSplit(n_splits=totalFolds)
    # timeSeriesCrossValidation = KFold(n_splits=totalFolds)

    smape_scores = []
    mse_scores = []
    train_mse_scores = []
    # dev_mse_scores = []
    current_fold = 0

    # TODO: (Baldwin) phenotypic plasticity, using random uniform.
    min_regularizer = 0.0
    max_regularizer = 0.01
    regularizer_chance = 0.1
    regularizer_chance_randoms = np.random.rand(9)
#     core_layers_randoms = np.random.randint(4, size=5)  # TODO: Dense, LSTM, BiLSTM, GRU, BiGRU
    core_layers_randoms = train_model.z

    l1_l2_randoms = np.random.uniform(low=min_regularizer, high=max_regularizer, size=(9, 2))
    
    
    model = tf.keras.models.Sequential()
    lstm_kwargs = {'units': units1, 'dropout': dropout1, 'recurrent_dropout': recurrent_dropout1,
                   'return_sequences': True,
                   'implementation': 2,
                   # 'kernel_regularizer': l2(0.01),
                   # 'activity_regularizer': l2(0.01),
                   # 'bias_regularizer': l2(0.01)    # TODO: test with kernel, activity, bias regularizers
                   }
    # Local mutation
    if regularizer_chance_randoms[0] < regularizer_chance:
        lstm_kwargs['activity_regularizer'] = tf.keras.regularizers.l1_l2(
            l1_l2_randoms[0, 0], l1_l2_randoms[0, 1])
    if regularizer_chance_randoms[1] < regularizer_chance:
        lstm_kwargs['bias_regularizer'] = tf.keras.regularizers.l1_l2(
            l1_l2_randoms[1, 0], l1_l2_randoms[2, 1])
    if regularizer_chance_randoms[2] < regularizer_chance:
        lstm_kwargs['kernel_regularizer'] = tf.keras.regularizers.l1_l2(
            l1_l2_randoms[2, 0], l1_l2_randoms[0, 1])

    # 1st base layer
    # model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(**lstm_kwargs), input_shape=(x_data.shape[1], x_data.shape[2])))  # input_shape: rows: n, timestep: 1, features: m
    if core_layers_randoms[0] == 0:
        model.add(tf.keras.layers.LSTM(**lstm_kwargs))
    elif core_layers_randoms[0] == 1:
        model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(**lstm_kwargs)))
    elif core_layers_randoms[0] == 2:
        model.add(tf.keras.layers.GRU(**lstm_kwargs))
    elif core_layers_randoms[0] == 3:
        model.add(tf.keras.layers.Bidirectional(tf.keras.layers.GRU(**lstm_kwargs)))
    else:
        model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(**lstm_kwargs)))
        # model.add(tf.keras.layers.Dense(units3,
        #                                 activity_regularizer=tf.keras.regularizers.l1_l2(l1_l2_randoms[3, 0],
        #                                                                                  l1_l2_randoms[3, 1]),
        #                                 bias_regularizer=tf.keras.regularizers.l1_l2(l1_l2_randoms[4, 0],
        #                                                                              l1_l2_randoms[4, 1]),
        #                                 kernel_regularizer=tf.keras.regularizers.l1_l2(l1_l2_randoms[5, 0],
        #                                                                                l1_l2_randoms[5, 1])))

    # 2nd base layer
    if use_gaussian_noise1 < 0.5:
        model.add(tf.keras.layers.GaussianNoise(noise_stddev1))
    if use_batch_normalization1 < 0.5:
        model.add(tf.keras.layers.BatchNormalization())

    lstm_kwargs['units'] = units2
    lstm_kwargs['dropout'] = dropout2
    lstm_kwargs['recurrent_dropout'] = recurrent_dropout2
    # TODO: Local mutation
    if regularizer_chance_randoms[3] < regularizer_chance:
        lstm_kwargs['activity_regularizer'] = tf.keras.regularizers.l1_l2(
            l1_l2_randoms[3, 0], l1_l2_randoms[3, 1])
    if regularizer_chance_randoms[4] < regularizer_chance:
        lstm_kwargs['bias_regularizer'] = tf.keras.regularizers.l1_l2(
            l1_l2_randoms[4, 0], l1_l2_randoms[4, 1])
    if regularizer_chance_randoms[5] < regularizer_chance:
        lstm_kwargs['kernel_regularizer'] = tf.keras.regularizers.l1_l2(
            l1_l2_randoms[5, 0], l1_l2_randoms[5, 1])
    # 2nd base layer
    if core_layers_randoms[1] == 0:
        model.add(tf.keras.layers.LSTM(**lstm_kwargs))
    elif core_layers_randoms[1] == 1:
        model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(**lstm_kwargs)))
    elif core_layers_randoms[1] == 2:
        model.add(tf.keras.layers.GRU(**lstm_kwargs))
    elif core_layers_randoms[1] == 3:
        model.add(tf.keras.layers.Bidirectional(tf.keras.layers.GRU(**lstm_kwargs)))
    else:
        model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(**lstm_kwargs)))
        # model.add(tf.keras.layers.Dense(units3,
        #                                 activity_regularizer=tf.keras.regularizers.l1_l2(l1_l2_randoms[3, 0],
        #                                                                                  l1_l2_randoms[3, 1]),
        #                                 bias_regularizer=tf.keras.regularizers.l1_l2(l1_l2_randoms[4, 0],
        #                                                                              l1_l2_randoms[4, 1]),
        #                                 kernel_regularizer=tf.keras.regularizers.l1_l2(l1_l2_randoms[5, 0],
        #                                                                                l1_l2_randoms[5, 1])))

    if use_gaussian_noise2 < 0.5:
        model.add(tf.keras.layers.GaussianNoise(noise_stddev2))
    if use_batch_normalization2 < 0.5:
        model.add(tf.keras.layers.BatchNormalization())

    # 3rd base layer
    lstm_kwargs['units'] = units3
    lstm_kwargs['dropout'] = dropout3
    lstm_kwargs['recurrent_dropout'] = recurrent_dropout3
    lstm_kwargs['return_sequences'] = False  # Last layer should return sequences
    # TODO: Local mutation
    if regularizer_chance_randoms[6] < regularizer_chance:
        lstm_kwargs['activity_regularizer'] = tf.keras.regularizers.l1_l2(
            l1_l2_randoms[6, 0], l1_l2_randoms[6, 1])
    if regularizer_chance_randoms[7] < regularizer_chance:
        lstm_kwargs['bias_regularizer'] = tf.keras.regularizers.l1_l2(
            l1_l2_randoms[7, 0], l1_l2_randoms[7, 1])
    if regularizer_chance_randoms[8] < regularizer_chance:
        lstm_kwargs['kernel_regularizer'] = tf.keras.regularizers.l1_l2(
            l1_l2_randoms[8, 0], l1_l2_randoms[8, 1])
    # model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(**lstm_kwargs)))
    if core_layers_randoms[2] == 0:
        model.add(tf.keras.layers.LSTM(**lstm_kwargs))
    elif core_layers_randoms[2] == 1:
        model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(**lstm_kwargs)))
    elif core_layers_randoms[2] == 2:
        model.add(tf.keras.layers.GRU(**lstm_kwargs))
    elif core_layers_randoms[2] == 3:
        model.add(tf.keras.layers.Bidirectional(tf.keras.layers.GRU(**lstm_kwargs)))
    else:
        model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(**lstm_kwargs)))
        # model.add(tf.keras.layers.Dense(units3,
        #                                 activity_regularizer=tf.keras.regularizers.l1_l2(l1_l2_randoms[3, 0],
        #                                                                                  l1_l2_randoms[3, 1]),
        #                                 bias_regularizer=tf.keras.regularizers.l1_l2(l1_l2_randoms[4, 0],
        #                                                                              l1_l2_randoms[4, 1]),
        #                                 kernel_regularizer=tf.keras.regularizers.l1_l2(l1_l2_randoms[5, 0],
        #                                                                                l1_l2_randoms[5, 1])))
    if use_gaussian_noise3 < 0.5:
        model.add(tf.keras.layers.GaussianNoise(noise_stddev3))
    if use_batch_normalization3 < 0.5:
        model.add(tf.keras.layers.BatchNormalization())

    # model.add(tf.keras.layers.Dense(y_data.shape[1], activation=random.choice(
    #     ["tanh", "softmax", "elu", "selu", "softplus", "relu", "softsign", "hard_sigmoid",
    #      "linear"])))  # TODO: test with 2 extra dense layers
#     model.add(tf.keras.layers.Dense(y_data.shape[1]))  # TODO: shape
    model.add(tf.keras.layers.Dense(16))
#     if multi_gpu:
#         model = tf.keras.utils.multi_gpu_model(model, gpus=2)

    if optimizer == 'amsgrad':  # Adam variant: amsgrad (boolean), "On the Convergence of Adam and Beyond".
        model.compile(loss='mean_squared_error', optimizer=tf.keras.optimizers.Adam(amsgrad=True))
    else:
        model.compile(loss='mean_squared_error', optimizer=optimizer)

    current_fold += 1  # TODO: train, trainValidation, validation
#     print("--- Rank {}: Current Fold: {}/{}".format(rank, current_fold, totalFolds))

    early_stop = [
        tf.keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1, mode='auto'),
        tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, mode='auto',
                                             cooldown=1, verbose=1),
        tf.keras.callbacks.TerminateOnNaN()
    ]

#     try:
#         history = model.fit(x_data[train], y_data[train],
#                             verbose=verbosity,
#                             batch_size=batch_size,
#                             epochs=epoch_size,
#                             validation_data=(x_data[validation], y_data[validation]),
#                             callbacks=early_stop)
#     except ValueError:
# #         print("--- Rank {}: Value Error exception: Model fit exception. Trying again...".format(rank))
#         history = model.fit(x_data[train], y_data[train],
#                             verbose=verbosity,
#                             batch_size=batch_size,
#                             epochs=epoch_size,
#                             validation_data=(x_data[validation], y_data[validation]),
#                             callbacks=early_stop)
#     except:
# #         print("--- Rank {}: Exception: Returning max float value for this iteration.".format(rank))
#         print("--- Exception: Returning max float value for this iteration.")
#         delete_model(model)

#         return sys.float_info.max

    print('.', end='')
    return 0

In [26]:
for l in range(100):
    for i in range(0, 4):
        for j in range(0, 4):
            for k in range(0, 4):
                z = np.array([i, j, k])

                x = get_random_model()
                args = {}
                args["modelLabel"] = "test"
                train_model.counter = 1
                train_model.label = "test"
                train_model.folds = 1
                train_model.data_manipulation = args    
                train_model.z = z

                result = train_model(x, *args)
                if result != 0:
                    print("Invalid model")
                    break

........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................

KeyboardInterrupt: 