In [None]:
from __future__ import print_function
import sys
import time
import numpy as np
from math import sqrt
from sklearn.metrics import mean_squared_error
from matplotlib import pyplot
import tensorflow as tf
import pandas as pd
import gc
from sklearn.model_selection import TimeSeriesSplit, train_test_split
import random

In [None]:
def delete_model(model):
    """
    Clear a tensorflow model from memory & garbage collector.
    :param model: Tensorflow model to remove.
    :return:
    """
    # Memory handling
    del model  # Manually delete model
    tf.reset_default_graph()
    tf.keras.backend.clear_session()
    gc.collect()

In [8]:
def get_random_model():
    return [random.randint(lb[0], ub[0]),  # batch_size
             random.randint(lb[1], ub[1]), random.randint(lb[2], ub[2]),  # epoch_size, optimizer
             random.randint(lb[3], ub[3]), random.randint(lb[4], ub[4]), random.randint(lb[5], ub[5]),  # units
             random.uniform(lb[6], ub[6]), random.uniform(lb[7], ub[7]), random.uniform(lb[8], ub[8]),  # dropout
             random.uniform(lb[9], ub[9]), random.uniform(lb[10], ub[10]), random.uniform(lb[11], ub[11]),  # recurrent_dropout
             random.uniform(lb[12], ub[12]), random.uniform(lb[13], ub[13]), random.uniform(lb[14], ub[14]),  # gaussian noise std
             random.randint(lb[15], ub[15]), random.randint(lb[16], ub[16]), random.randint(lb[17], ub[17]),  # gaussian_noise
             random.randint(lb[18], ub[18]), random.randint(lb[19], ub[19]), random.randint(lb[20], ub[20]),  # batch normalization
             random.randint(lb[21], ub[21]), random.randint(lb[22], ub[22]), random.randint(lb[23], ub[23]),  # base layer types
             random.randint(lb[24], ub[24]), random.randint(lb[25], ub[25]), random.randint(lb[26], ub[26])]  # layer initializers, normal/uniform he/lecun

In [None]:
bounds = [(7, 1 * 31),  # batch_size (~ #days: week, month, year)
          (350, 600),  # epoch_size
          (0, 4),  # optimizer
          (64, 512),  # units
          (64, 512),
          (64, 512),
          (0.01, 0.25),  # dropout
          (0.01, 0.25),
          (0.01, 0.25),
          (0.01, 0.25),  # recurrent_dropout
          (0.01, 0.25),
          (0.01, 0.25),
          (0.1, 0.5),  # gaussian noise std
          (0.1, 0.5),
          (0.1, 0.5),
          (0, 1),  # batch normalization layers
          (0, 1),
          (0, 1),
          (0, 1),  # gaussian noise layer layers
          (0, 1),
          (0, 1),
          (0, 5),  # base layer types (plain/bidirectional: LSTM, GRU, Simple RNN)
          (0, 5),
          (0, 5),
          (0, 9),  # layer initializers, normal/uniform he/lecun,...
          (0, 9),
          (0, 9)]

# Model Search Space bounds
lb, ub = zip(*bounds)
lb = list(lb)  # Lower Bounds
ub = list(ub)  # Upper Bounds


In [None]:
def train_model1(x, *args):
#     train_model.counter += 1
#     modelLabel = train_model.label
#     modelFolds = train_model.folds
#     data_manipulation = train_model.data_manipulation
#     rank = data_manipulation["rank"]
#     master = data_manipulation["master"]
#     directory = data_manipulation["directory"]
#     filePrefix = data_manipulation["filePrefix"]
#     island = data_manipulation["island"]
#     verbosity = data_manipulation["verbose"]
#     multi_gpu = data_manipulation["multi_gpu"]
#     store_plots = data_manipulation["storePlots"]

#     x_data, y_data = args

    # if island == "bh" or island == "sg":  # TODO: un-normalize data
    #     print("bounds ", data_manipulation["bounds"])
    #     print("x ", x)
    #     for i in range(len(x)):
    #         x[i] = x[i] * (data_manipulation["bounds"][i][1] - data_manipulation["bounds"][i][0]) \
    #                + data_manipulation["bounds"][i][0]
    #     x = np.array(x)
    #     print("un-normalized x ", x)

    # x = [32.269684115953126, 478.4579158867764, 2.4914987273745344, 291.55476719406147, 32.0, 512.0, 0.0812481431483004,
    #      0.01, 0.1445004524623349, 0.22335740221774894, 0.03443050512961357, 0.05488258021289669, 1.0,
    #      0.620275664519184, 0.34191582396595566, 0.9436131979280933, 0.4991752935129543, 0.4678261851228459, 0.0,
    #      0.355287972380982, 0.0]  # TODO: Temp set the same model to benchmark a specific DNN

    full_model_parameters = np.array(x.copy())
#     if data_manipulation["fp16"]:
#         full_model_parameters.astype(np.float32, casting='unsafe')  # TODO: temp test speed of keras with fp16

#     print("\n=============\n")
#     print("--- Rank {}: {} iteration {} using: {}".format(rank, modelLabel, train_model.counter, x[6:15]))

    dropout1 = x[6]
    dropout2 = x[7]
    dropout3 = x[8]
    recurrent_dropout1 = x[9]
    recurrent_dropout2 = x[10]
    recurrent_dropout3 = x[11]

    # Gaussian noise
    noise_stddev1 = x[12]
    noise_stddev2 = x[13]
    noise_stddev3 = x[14]

    x = np.rint(x).astype(np.int32)
    optimizers = ['adadelta', 'adagrad', 'nadam', 'adamax',
                  'adam', 'amsgrad']  # , 'rmsprop', 'sgd'] # Avoid loss NaNs, by removing rmsprop & sgd
    batch_size = x[0]
    epoch_size = x[1]
    optimizer = optimizers[x[2]]
    units1 = x[3]
    units2 = x[4]
    units3 = x[5]

    # Batch normalization
    use_batch_normalization1 = x[15]
    use_batch_normalization2 = x[16]
    use_batch_normalization3 = x[17]
    use_gaussian_noise1 = x[18]
    use_gaussian_noise2 = x[19]
    use_gaussian_noise3 = x[20]

#     print("--- Rank {}: batch_size: {}, epoch_size: {} Optimizer: {}, LSTM Unit sizes: {} "
#           "Batch Normalization/Gaussian Noise: {}"
#           .format(rank, x[0], x[1], optimizers[x[2]], x[3:6], x[15:21]))

#     x_data, x_data_holdout = x_data[:-365], x_data[-365:]
#     y_data, y_data_holdout = y_data[:-365], y_data[-365:]

#     totalFolds = modelFolds
#     timeSeriesCrossValidation = TimeSeriesSplit(n_splits=totalFolds)
    # timeSeriesCrossValidation = KFold(n_splits=totalFolds)

    smape_scores = []
    mse_scores = []
    train_mse_scores = []
    # dev_mse_scores = []
    current_fold = 0

    # TODO: (Baldwin) phenotypic plasticity, using random uniform.
    min_regularizer = 0.0
    max_regularizer = 0.01
    regularizer_chance = 0.1
    regularizer_chance_randoms = np.random.rand(9)
#     core_layers_randoms = np.random.randint(4, size=5)  # TODO: Dense, LSTM, BiLSTM, GRU, BiGRU
    core_layers_randoms = train_model.z
    layer_initializer_genes = train_model.m
    layer_initializers = ['he_normal', 'lecun_normal', 'glorot_normal', 'random_normal', 'truncated_normal',
                      'he_uniform', 'lecun_uniform', 'random_uniform',
                      'zeros', 'ones']

    l1_l2_randoms = np.random.uniform(low=min_regularizer, high=max_regularizer, size=(9, 2))
    
    
    model = tf.keras.models.Sequential()
    lstm_kwargs = {'units': units1, 'dropout': dropout1, 'recurrent_dropout': recurrent_dropout1,
                   'return_sequences': True,
                   'implementation': 2,
                   # 'kernel_regularizer': l2(0.01),
                   # 'activity_regularizer': l2(0.01),
                   # 'bias_regularizer': l2(0.01)    # TODO: test with kernel, activity, bias regularizers
                   }
    # Local mutation
    if regularizer_chance_randoms[0] < regularizer_chance:
        lstm_kwargs['activity_regularizer'] = tf.keras.regularizers.l1_l2(
            l1_l2_randoms[0, 0], l1_l2_randoms[0, 1])
    if regularizer_chance_randoms[1] < regularizer_chance:
        lstm_kwargs['bias_regularizer'] = tf.keras.regularizers.l1_l2(
            l1_l2_randoms[1, 0], l1_l2_randoms[2, 1])
    if regularizer_chance_randoms[2] < regularizer_chance:
        lstm_kwargs['kernel_regularizer'] = tf.keras.regularizers.l1_l2(
            l1_l2_randoms[2, 0], l1_l2_randoms[0, 1])

    # 1st base layer
    # model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(**lstm_kwargs), input_shape=(x_data.shape[1], x_data.shape[2])))  # input_shape: rows: n, timestep: 1, features: m
    lstm_kwargs['kernel_initializer'] = layer_initializers[layer_initializer_genes[0]]  # TODO: layer initializer
    if core_layers_randoms[0] == 0:
        model.add(tf.keras.layers.LSTM(**lstm_kwargs))
    elif core_layers_randoms[0] == 1:
        model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(**lstm_kwargs)))
    elif core_layers_randoms[0] == 2:
        model.add(tf.keras.layers.GRU(**lstm_kwargs))
    elif core_layers_randoms[0] == 3:
        model.add(tf.keras.layers.Bidirectional(tf.keras.layers.GRU(**lstm_kwargs)))
    else:
#         model.add(tf.keras.layers.Dense(units1))
        model.add(tf.keras.layers.Dense(units1,
                                        activity_regularizer=tf.keras.regularizers.l1_l2(l1_l2_randoms[3, 0],
                                                                                         l1_l2_randoms[3, 1]),
                                        bias_regularizer=tf.keras.regularizers.l1_l2(l1_l2_randoms[4, 0],
                                                                                     l1_l2_randoms[4, 1]),
                                        kernel_regularizer=tf.keras.regularizers.l1_l2(l1_l2_randoms[5, 0],
                                                                                       l1_l2_randoms[5, 1])))

    # 2nd base layer
    if use_gaussian_noise1 < 0.5:
        model.add(tf.keras.layers.GaussianNoise(noise_stddev1))
    if use_batch_normalization1 < 0.5:
        model.add(tf.keras.layers.BatchNormalization())

    lstm_kwargs['kernel_initializer'] = layer_initializers[layer_initializer_genes[1]]  # TODO: layer initializer
    lstm_kwargs['units'] = units2
    lstm_kwargs['dropout'] = dropout2
    lstm_kwargs['recurrent_dropout'] = recurrent_dropout2
    # TODO: Local mutation
    if regularizer_chance_randoms[3] < regularizer_chance:
        lstm_kwargs['activity_regularizer'] = tf.keras.regularizers.l1_l2(
            l1_l2_randoms[3, 0], l1_l2_randoms[3, 1])
    if regularizer_chance_randoms[4] < regularizer_chance:
        lstm_kwargs['bias_regularizer'] = tf.keras.regularizers.l1_l2(
            l1_l2_randoms[4, 0], l1_l2_randoms[4, 1])
    if regularizer_chance_randoms[5] < regularizer_chance:
        lstm_kwargs['kernel_regularizer'] = tf.keras.regularizers.l1_l2(
            l1_l2_randoms[5, 0], l1_l2_randoms[5, 1])
    # 2nd base layer
    if core_layers_randoms[1] == 0:
        model.add(tf.keras.layers.LSTM(**lstm_kwargs))
    elif core_layers_randoms[1] == 1:
        model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(**lstm_kwargs)))
    elif core_layers_randoms[1] == 2:
        model.add(tf.keras.layers.GRU(**lstm_kwargs))
    elif core_layers_randoms[1] == 3:
        model.add(tf.keras.layers.Bidirectional(tf.keras.layers.GRU(**lstm_kwargs)))
    else:
#         model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(**lstm_kwargs)))
        model.add(tf.keras.layers.Dense(units2,
                                        activity_regularizer=tf.keras.regularizers.l1_l2(l1_l2_randoms[3, 0],
                                                                                         l1_l2_randoms[3, 1]),
                                        bias_regularizer=tf.keras.regularizers.l1_l2(l1_l2_randoms[4, 0],
                                                                                     l1_l2_randoms[4, 1]),
                                        kernel_regularizer=tf.keras.regularizers.l1_l2(l1_l2_randoms[5, 0],
                                                                                       l1_l2_randoms[5, 1])))

    if use_gaussian_noise2 < 0.5:
        model.add(tf.keras.layers.GaussianNoise(noise_stddev2))
    if use_batch_normalization2 < 0.5:
        model.add(tf.keras.layers.BatchNormalization())

    # 3rd base layer
    lstm_kwargs['kernel_initializer'] = layer_initializers[layer_initializer_genes[2]]  # TODO: layer initializer
    lstm_kwargs['units'] = units3
    lstm_kwargs['dropout'] = dropout3
    lstm_kwargs['recurrent_dropout'] = recurrent_dropout3
    lstm_kwargs['return_sequences'] = False  # Last layer should return sequences
    # TODO: Local mutation
    if regularizer_chance_randoms[6] < regularizer_chance:
        lstm_kwargs['activity_regularizer'] = tf.keras.regularizers.l1_l2(
            l1_l2_randoms[6, 0], l1_l2_randoms[6, 1])
    if regularizer_chance_randoms[7] < regularizer_chance:
        lstm_kwargs['bias_regularizer'] = tf.keras.regularizers.l1_l2(
            l1_l2_randoms[7, 0], l1_l2_randoms[7, 1])
    if regularizer_chance_randoms[8] < regularizer_chance:
        lstm_kwargs['kernel_regularizer'] = tf.keras.regularizers.l1_l2(
            l1_l2_randoms[8, 0], l1_l2_randoms[8, 1])
#     model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(**lstm_kwargs)))
    
#     model.add(tf.keras.layers.Dense(units3))
    
    if core_layers_randoms[2] == 0:
        model.add(tf.keras.layers.LSTM(**lstm_kwargs))
    elif core_layers_randoms[2] == 1:
        model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(**lstm_kwargs)))
    elif core_layers_randoms[2] == 2:
        model.add(tf.keras.layers.GRU(**lstm_kwargs))
    elif core_layers_randoms[2] == 3:
        model.add(tf.keras.layers.Bidirectional(tf.keras.layers.GRU(**lstm_kwargs)))
    else:
#         model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(**lstm_kwargs)))
        model.add(tf.keras.layers.Dense(units3,
                                        activity_regularizer=tf.keras.regularizers.l1_l2(l1_l2_randoms[3, 0],
                                                                                         l1_l2_randoms[3, 1]),
                                        bias_regularizer=tf.keras.regularizers.l1_l2(l1_l2_randoms[4, 0],
                                                                                     l1_l2_randoms[4, 1]),
                                        kernel_regularizer=tf.keras.regularizers.l1_l2(l1_l2_randoms[5, 0],
                                                                                       l1_l2_randoms[5, 1])))
    if use_gaussian_noise3 < 0.5:
        model.add(tf.keras.layers.GaussianNoise(noise_stddev3))
    if use_batch_normalization3 < 0.5:
        model.add(tf.keras.layers.BatchNormalization())

    # model.add(tf.keras.layers.Dense(y_data.shape[1], activation=random.choice(
    #     ["tanh", "softmax", "elu", "selu", "softplus", "relu", "softsign", "hard_sigmoid",
    #      "linear"])))  # TODO: test with 2 extra dense layers
#     model.add(tf.keras.layers.Dense(y_data.shape[1]))  # TODO: shape
#     if multi_gpu:
#         model = tf.keras.utils.multi_gpu_model(model, gpus=2)

    if optimizer == 'amsgrad':  # Adam variant: amsgrad (boolean), "On the Convergence of Adam and Beyond".
        model.compile(loss='mean_squared_error', optimizer=tf.keras.optimizers.Adam(amsgrad=True))
    else:
        model.compile(loss='mean_squared_error', optimizer=optimizer)

    current_fold += 1  # TODO: train, trainValidation, validation
#     print("--- Rank {}: Current Fold: {}/{}".format(rank, current_fold, totalFolds))

    early_stop = [
        tf.keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1, mode='auto'),
        tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, mode='auto',
                                             cooldown=1, verbose=1),
        tf.keras.callbacks.TerminateOnNaN()
    ]

#     try:
#         history = model.fit(x_data[train], y_data[train],
#                             verbose=verbosity,
#                             batch_size=batch_size,
#                             epochs=epoch_size,
#                             validation_data=(x_data[validation], y_data[validation]),
#                             callbacks=early_stop)
#     except ValueError:
# #         print("--- Rank {}: Value Error exception: Model fit exception. Trying again...".format(rank))
#         history = model.fit(x_data[train], y_data[train],
#                             verbose=verbosity,
#                             batch_size=batch_size,
#                             epochs=epoch_size,
#                             validation_data=(x_data[validation], y_data[validation]),
#                             callbacks=early_stop)
#     except:
# #         print("--- Rank {}: Exception: Returning max float value for this iteration.".format(rank))
#         print("--- Exception: Returning max float value for this iteration.")
#         delete_model(model)

#         return sys.float_info.max

    print('.', end='')
    return model

In [None]:
def train_model(x, *args):
    """
    Train a deep learning model.
    :param x: Model phenotype.
    :param args: Data (inputs and expected).
    :return: Average validation Mean Squared Error.
    """

    # TODO: Used for rapid distributed island testing
    # import random
    # rand_number = random.uniform(150, 500)
    # if np.isnan(np.array(x)).any() or np.array(x).size == 0:
    #     train_model.one_nan = True
    # if not train_model.one_nan:
    #     print("x: ", x)
    # else:
    #     print("\n================\nNans detected\n================\n")
    # return rand_number

#     startTime = time.time()  # training time per model

#     train_model.counter += 1
#     modelLabel = train_model.label
#     modelFolds = train_model.folds
#     data_manipulation = train_model.data_manipulation
#     rank = data_manipulation["rank"]
#     master = data_manipulation["master"]
#     directory = data_manipulation["directory"]
#     filePrefix = data_manipulation["filePrefix"]
#     island = data_manipulation["island"]
#     verbosity = data_manipulation["verbose"]
#     multi_gpu = data_manipulation["multi_gpu"]
#     store_plots = data_manipulation["storePlots"]

#     x_data, y_data = args

#     print("=== TODO: Test network blocks (LSTM only for now) ===")
    # x2 = np.array([31.0, 402.80111162405194, 1.9058202160101727, 487.6506286543307, 124.26215489827942, 512.0, 0.241744517820298,
    #  0.25, 0.12677851439487847, 0.23147568997273035, 0.01, 0.19396586046669612, 1.0, 0.6535668275388125,
    #  0.16500668136007904, 0.999225537577359, 0.0, 0.20307441174041735, 1.0, 1.0, 0.0, 0.0, 0.5635281795259502,
    #  1.4141248802054807, 4.763734792829404, 3.0683379620449647, 5.267796469977627])  # TODO: Temp set the same model to benchmark a specific DNN
    # x[12:15] = x2[12:15]  # TODO: Tested: All ~(12:19). With adamax (index: 2) -> Fail. With gaussNoise & batchNorm -> Fail
    # TODO: Test: da x3
    # x[3:6] = np.array([8, 8, 8])  # TODO: test small for da

    full_model_parameters = np.array(x.copy())
#     if data_manipulation["fp16"]:
#         full_model_parameters.astype(np.float32, casting='unsafe')  # TODO: temp test speed of keras with fp16

#     print("\n=============\n")
#     print("--- Rank {}: {} iteration {} using: {}".format(rank, modelLabel, train_model.counter, x[6:15]))

    dropout1 = x[6]
    dropout2 = x[7]
    dropout3 = x[8]
    recurrent_dropout1 = x[9]
    recurrent_dropout2 = x[10]
    recurrent_dropout3 = x[11]

    # Gaussian noise std
    noise_stddev1 = x[12]
    noise_stddev2 = x[13]
    noise_stddev3 = x[14]

    x = np.rint(x).astype(np.int32)
    optimizers = ['nadam', 'amsgrad', 'adagrad', 'adadelta', 'adam',
                  'nadam']  # Avoid loss NaNs, by removing rmsprop, sgd, adamax. TODO: ftrl: needs lr param

    batch_size = x[0]
    epoch_size = x[1]
    optimizer = optimizers[x[2]]
    units1 = x[3]
    units2 = x[4]
    units3 = x[5]

    # Use Batch normalization?
    use_batch_normalization1 = x[15]
    use_batch_normalization2 = x[16]
    use_batch_normalization3 = x[17]

    # Use gaussian noise?
    use_gaussian_noise1 = x[18]
    use_gaussian_noise2 = x[19]
    use_gaussian_noise3 = x[20]

#     core_layers_genes = np.around(x[21:24], decimals=0).astype(int)  # TODO: plain/bidirectional: LSTM, GRU, SimpleRNN
    core_layers_genes = train_model.z
    layer_initializer_genes = train_model.m

    layer_types = ['LSTM', 'BiLSTM', 'GRU', 'BiGRU', 'SimpleRNN', 'BiSimpleRNN']
#     print("--- Rank {}: Layer Types: {}->{}->{}"
#           .format(rank, layer_types[core_layers_genes[0]], layer_types[core_layers_genes[1]],
#                   layer_types[core_layers_genes[2]]))

#     print("--- Rank {}: batch_size: {}, epoch_size: {} Optimizer: {}, Unit sizes: {} "
#           "Batch Normalization/Gaussian Noise: {}"
#           .format(rank, x[0], x[1], optimizers[x[2]], x[3:6], x[15:21]))

#     layer_initializer_genes = np.around(x[24:27], decimals=0).astype(int)  # layer initializers, normal/uniform he/lecun #  TODO: layer initializers
    layer_initializers = ['he_normal', 'lecun_normal', 'glorot_normal', 'random_normal', 'truncated_normal',
                          'he_uniform', 'lecun_uniform', 'random_uniform', 'zeros', 'ones']
#     print("--- Rank {}: Layer initializers: {}->{}->{}"
#           .format(rank, layer_initializers[layer_initializer_genes[0]], layer_initializers[layer_initializer_genes[1]],
#                   layer_initializers[layer_initializer_genes[2]]))

#     x_data, x_data_holdout = x_data[:-365], x_data[-365:]
#     y_data, y_data_holdout = y_data[:-365], y_data[-365:]

#     totalFolds = modelFolds
#     timeSeriesCrossValidation = TimeSeriesSplit(n_splits=totalFolds)
    # timeSeriesCrossValidation = KFold(n_splits=totalFolds)

    smape_scores = []
    mse_scores = []
    train_mse_scores = []
    # dev_mse_scores = []
    current_fold = 0

    # TODO: (Baldwin) phenotypic plasticity, using random uniform.
    min_regularizer = 0.0
    max_regularizer = 0.01
    regularizer_chance = 0.1
    regularizer_chance_randoms = np.random.rand(9)
    
    l1_l2_randoms = np.random.uniform(low=min_regularizer, high=max_regularizer, size=(9, 2))

#     for train, validation in timeSeriesCrossValidation.split(x_data, y_data):  # TODO: test train/dev/validation
# for train, validation_full in timeSeriesCrossValidation.split(x_data, y_data):  # TODO: Nested CV?

#     train, validation = reduce_time_series_validation_fold_size(train, validation)

    # dev, validation = train_test_split(validation_full, test_size=0.1, shuffle=False)  # TODO: 50-50 for dev/val

    # create model
    model = tf.keras.models.Sequential()
    lstm_kwargs = {'units': units1, 'dropout': dropout1, 'recurrent_dropout': recurrent_dropout1,
                   'return_sequences': True,
                   'implementation': 2,
                   # 'kernel_regularizer': l2(0.01),
                   # 'activity_regularizer': l2(0.01),
                   # 'bias_regularizer': l2(0.01)
                   }
    # Local mutation
    if regularizer_chance_randoms[0] < regularizer_chance:
        lstm_kwargs['activity_regularizer'] = tf.keras.regularizers.l1_l2(
            l1_l2_randoms[0, 0], l1_l2_randoms[0, 1])
    if regularizer_chance_randoms[1] < regularizer_chance:
        lstm_kwargs['bias_regularizer'] = tf.keras.regularizers.l1_l2(
            l1_l2_randoms[1, 0], l1_l2_randoms[2, 1])
    if regularizer_chance_randoms[2] < regularizer_chance:
        lstm_kwargs['kernel_regularizer'] = tf.keras.regularizers.l1_l2(
            l1_l2_randoms[2, 0], l1_l2_randoms[0, 1])

    # 1st base layer
    lstm_kwargs['kernel_initializer'] = layer_initializers[layer_initializer_genes[0]]  # TODO: layer initializer

#     model.add(tf.keras.layers.LSTM(**lstm_kwargs))
    
    n_timesteps,n_features = 2, 64
    #ValueError: Negative dimension size caused by subtracting 3
    #from 1 for 'conv1d/conv1d' (op: 'Conv2D') with input shapes: [?,1,1,16], [1,3,16,64].  
#     n_timesteps, n_features, n_outputs = trainX.shape[1], trainX.shape[2], trainy.shape[1]
    
    model.add(tf.keras.layers.Conv1D(
        filters=64, kernel_size=3,
        activation='relu', input_shape=(n_timesteps,n_features)))
#     model.add(tf.keras.layers.Conv1D(filters=64, kernel_size=3, activation='relu'))
    model.add(tf.keras.layers.Dropout(0.5))
    model.add(tf.keras.layers.MaxPooling1D(pool_size=2))
    model.add(tf.keras.layers.Flatten())
    
    if core_layers_genes[2] == 0:
        model.add(tf.keras.layers.LSTM(**lstm_kwargs))
    elif core_layers_genes[2] == 1:
        model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(**lstm_kwargs)))
    elif core_layers_genes[2] == 2:
        model.add(tf.keras.layers.GRU(**lstm_kwargs))
    elif core_layers_genes[2] == 3:
        model.add(tf.keras.layers.Bidirectional(tf.keras.layers.GRU(**lstm_kwargs)))
    elif core_layers_genes[2] == 4:
        model.add(tf.keras.layers.SimpleRNN(**lstm_kwargs))
    else:
        model.add(tf.keras.layers.Bidirectional(tf.keras.layers.SimpleRNN(**lstm_kwargs)))
    if use_gaussian_noise1 < 0.5:
        model.add(tf.keras.layers.GaussianNoise(noise_stddev1))
    if use_batch_normalization1 < 0.5:
        model.add(tf.keras.layers.BatchNormalization())

    # 2nd base layer
    lstm_kwargs['kernel_initializer'] = layer_initializers[layer_initializer_genes[1]]  # TODO: layer initializer
    lstm_kwargs['units'] = units2
    lstm_kwargs['dropout'] = dropout2
    lstm_kwargs['recurrent_dropout'] = recurrent_dropout2
    # TODO: Local mutation
    if regularizer_chance_randoms[3] < regularizer_chance:
        lstm_kwargs['activity_regularizer'] = tf.keras.regularizers.l1_l2(
            l1_l2_randoms[3, 0], l1_l2_randoms[3, 1])
    if regularizer_chance_randoms[4] < regularizer_chance:
        lstm_kwargs['bias_regularizer'] = tf.keras.regularizers.l1_l2(
            l1_l2_randoms[4, 0], l1_l2_randoms[4, 1])
    if regularizer_chance_randoms[5] < regularizer_chance:
        lstm_kwargs['kernel_regularizer'] = tf.keras.regularizers.l1_l2(
            l1_l2_randoms[5, 0], l1_l2_randoms[5, 1])
    # 2nd base layer
    if core_layers_genes[2] == 0:
        model.add(tf.keras.layers.LSTM(**lstm_kwargs))
    elif core_layers_genes[2] == 1:
        model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(**lstm_kwargs)))
    elif core_layers_genes[2] == 2:
        model.add(tf.keras.layers.GRU(**lstm_kwargs))
    elif core_layers_genes[2] == 3:
        model.add(tf.keras.layers.Bidirectional(tf.keras.layers.GRU(**lstm_kwargs)))
    elif core_layers_genes[2] == 4:
        model.add(tf.keras.layers.SimpleRNN(**lstm_kwargs))
    else:
        model.add(tf.keras.layers.Bidirectional(tf.keras.layers.SimpleRNN(**lstm_kwargs)))
    if use_gaussian_noise2 < 0.5:
        model.add(tf.keras.layers.GaussianNoise(noise_stddev2))
    if use_batch_normalization2 < 0.5:
        model.add(tf.keras.layers.BatchNormalization())

    # 3rd base layer
    lstm_kwargs['kernel_initializer'] = layer_initializers[layer_initializer_genes[2]]  # TODO: layer initializer
    lstm_kwargs['units'] = units3
    lstm_kwargs['dropout'] = dropout3
    lstm_kwargs['recurrent_dropout'] = recurrent_dropout3
    lstm_kwargs['return_sequences'] = False  # Last layer should return sequences
    # TODO: Local mutation
    if regularizer_chance_randoms[6] < regularizer_chance:
        lstm_kwargs['activity_regularizer'] = tf.keras.regularizers.l1_l2(
            l1_l2_randoms[6, 0], l1_l2_randoms[6, 1])
    if regularizer_chance_randoms[7] < regularizer_chance:
        lstm_kwargs['bias_regularizer'] = tf.keras.regularizers.l1_l2(
            l1_l2_randoms[7, 0], l1_l2_randoms[7, 1])
    if regularizer_chance_randoms[8] < regularizer_chance:
        lstm_kwargs['kernel_regularizer'] = tf.keras.regularizers.l1_l2(
            l1_l2_randoms[8, 0], l1_l2_randoms[8, 1])
    if core_layers_genes[2] == 0:
        model.add(tf.keras.layers.LSTM(**lstm_kwargs))
    elif core_layers_genes[2] == 1:
        model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(**lstm_kwargs)))
    elif core_layers_genes[2] == 2:
        model.add(tf.keras.layers.GRU(**lstm_kwargs))
    elif core_layers_genes[2] == 3:
        model.add(tf.keras.layers.Bidirectional(tf.keras.layers.GRU(**lstm_kwargs)))
    elif core_layers_genes[2] == 4:
        model.add(tf.keras.layers.SimpleRNN(**lstm_kwargs))
    else:
        model.add(tf.keras.layers.Bidirectional(tf.keras.layers.SimpleRNN(**lstm_kwargs)))

    if use_gaussian_noise3 < 0.5:
        model.add(tf.keras.layers.GaussianNoise(noise_stddev3))
    if use_batch_normalization3 < 0.5:
        model.add(tf.keras.layers.BatchNormalization())

    # model.add(tf.keras.layers.Dense(y_data.shape[1], activation=random.choice(
    #     ["tanh", "softmax", "elu", "selu", "softplus", "relu", "softsign", "hard_sigmoid",
    #      "linear"])))  # TODO: test with 2 extra dense layers
    #TODO: test dense
    denseCount = 16
#     model.add(tf.keras.layers.Dense(y_data.shape[1]))
    model.add(tf.keras.layers.Dense(denseCount))
#     if multi_gpu:
#         model = tf.keras.utils.multi_gpu_model(model, gpus=2)

    if optimizer == 'amsgrad':  # Adam variant: amsgrad (boolean), "On the Convergence of Adam and Beyond".
        model.compile(loss='mean_squared_error', optimizer=tf.keras.optimizers.Adam(amsgrad=True))
    else:
        model.compile(loss='mean_squared_error', optimizer=optimizer)

    current_fold += 1  # TODO: train, trainValidation, validation
#     print("--- Rank {}: Current Fold: {}/{}".format(rank, current_fold, totalFolds))

    early_stop = [
        tf.keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1, mode='auto'),
        tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, mode='auto',
                                             cooldown=1, verbose=1),
        tf.keras.callbacks.TerminateOnNaN()
    ]

    # try:  # TODO: Use dev set
    #     history = model.fit(x_data[train], y_data[train],
    #                         verbose=verbosity,
    #                         batch_size=batch_size,
    #                         epochs=epoch_size,
    #                         validation_data=(x_data[dev], y_data[dev]),
    #                         callbacks=early_stop)
    # except ValueError:
    #     print("--- Rank {}: Value Error exception: Model fit exception. Trying again...".format(rank))
    #     history = model.fit(x_data[train], y_data[train],
    #                         verbose=verbosity,
    #                         batch_size=batch_size,
    #                         epochs=epoch_size,
    #                         validation_data=(x_data[dev], y_data[dev]),
    #                         callbacks=early_stop)
#     try:
#         history = model.fit(x_data[train], y_data[train],
#                             verbose=verbosity,
#                             batch_size=batch_size,
#                             epochs=epoch_size,
#                             validation_data=(x_data[validation], y_data[validation]),
#                             callbacks=early_stop)
#     except ValueError:
#         print("--- Rank {}: Value Error exception: Model fit exception. Trying again...".format(rank))
#         history = model.fit(x_data[train], y_data[train],
#                             verbose=verbosity,
#                             batch_size=batch_size,
#                             epochs=epoch_size,
#                             validation_data=(x_data[validation], y_data[validation]),
#                             callbacks=early_stop)
#     except:
#         print("--- Rank {}: Exception: Returning max float value for this iteration.".format(rank))
#         delete_model(model)

#         return sys.float_info.max

    print('=', end='')
    return model

In [None]:
def train_model(x, *args):  
    full_model_parameters = np.array(x.copy())
    dropout1 = x[6]
    dropout2 = x[7]
    dropout3 = x[8]
    recurrent_dropout1 = x[9]
    recurrent_dropout2 = x[10]
    recurrent_dropout3 = x[11]
    noise_stddev1 = x[12]
    noise_stddev2 = x[13]
    noise_stddev3 = x[14]
    x = np.rint(x).astype(np.int32)
    optimizers = ['nadam', 'amsgrad', 'adagrad', 'adadelta', 'adam',
                  'nadam']  # Avoid loss NaNs, by removing rmsprop, sgd, adamax. TODO: ftrl: needs lr param
    batch_size = x[0]
    epoch_size = x[1]
    optimizer = optimizers[x[2]]
    units1 = x[3]
    units2 = x[4]
    units3 = x[5]
    use_batch_normalization1 = x[15]
    use_batch_normalization2 = x[16]
    use_batch_normalization3 = x[17]
    use_gaussian_noise1 = x[18]
    use_gaussian_noise2 = x[19]
    use_gaussian_noise3 = x[20]
    core_layers_genes = train_model.z
    layer_initializer_genes = train_model.m
    layer_types = ['LSTM', 'BiLSTM', 'GRU', 'BiGRU', 'SimpleRNN', 'BiSimpleRNN']
    layer_initializers = ['he_normal', 'lecun_normal', 'glorot_normal', 'random_normal', 'truncated_normal',
                          'he_uniform', 'lecun_uniform', 'random_uniform', 'zeros', 'ones']
    smape_scores = []
    mse_scores = []
    train_mse_scores = []
    current_fold = 0
    min_regularizer = 0.0
    max_regularizer = 0.01
    regularizer_chance = 0.1
    regularizer_chance_randoms = np.random.rand(9)
    l1_l2_randoms = np.random.uniform(low=min_regularizer, high=max_regularizer, size=(9, 2))
    model = tf.keras.models.Sequential()
    lstm_kwargs = {'units': units1, 'dropout': dropout1, 'recurrent_dropout': recurrent_dropout1,
                   'return_sequences': True,
                   'implementation': 2,
                   }
    if regularizer_chance_randoms[0] < regularizer_chance:
        lstm_kwargs['activity_regularizer'] = tf.keras.regularizers.l1_l2(
            l1_l2_randoms[0, 0], l1_l2_randoms[0, 1])
    if regularizer_chance_randoms[1] < regularizer_chance:
        lstm_kwargs['bias_regularizer'] = tf.keras.regularizers.l1_l2(
            l1_l2_randoms[1, 0], l1_l2_randoms[2, 1])
    if regularizer_chance_randoms[2] < regularizer_chance:
        lstm_kwargs['kernel_regularizer'] = tf.keras.regularizers.l1_l2(
            l1_l2_randoms[2, 0], l1_l2_randoms[0, 1])
    lstm_kwargs['kernel_initializer'] = layer_initializers[layer_initializer_genes[0]]  # TODO: layer initializer

    n_timesteps,n_features = 2, 64
    model.add(tf.keras.layers.Conv1D(
        filters=64, kernel_size=3,
        activation='relu', input_shape=(n_timesteps,n_features)))
#     model.add(tf.keras.layers.Conv1D(filters=64, kernel_size=3, activation='relu'))
    model.add(tf.keras.layers.Dropout(0.5))
    model.add(tf.keras.layers.MaxPooling1D(pool_size=2))
    model.add(tf.keras.layers.Flatten())
    
    if core_layers_genes[2] == 0:
        model.add(tf.keras.layers.LSTM(**lstm_kwargs))
    elif core_layers_genes[2] == 1:
        model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(**lstm_kwargs)))
    elif core_layers_genes[2] == 2:
        model.add(tf.keras.layers.GRU(**lstm_kwargs))
    elif core_layers_genes[2] == 3:
        model.add(tf.keras.layers.Bidirectional(tf.keras.layers.GRU(**lstm_kwargs)))
    elif core_layers_genes[2] == 4:
        model.add(tf.keras.layers.SimpleRNN(**lstm_kwargs))
    else:
        model.add(tf.keras.layers.Bidirectional(tf.keras.layers.SimpleRNN(**lstm_kwargs)))
    if use_gaussian_noise1 < 0.5:
        model.add(tf.keras.layers.GaussianNoise(noise_stddev1))
    if use_batch_normalization1 < 0.5:
        model.add(tf.keras.layers.BatchNormalization())

    # 2nd base layer
    lstm_kwargs['kernel_initializer'] = layer_initializers[layer_initializer_genes[1]]  # TODO: layer initializer
    lstm_kwargs['units'] = units2
    lstm_kwargs['dropout'] = dropout2
    lstm_kwargs['recurrent_dropout'] = recurrent_dropout2
    # TODO: Local mutation
    if regularizer_chance_randoms[3] < regularizer_chance:
        lstm_kwargs['activity_regularizer'] = tf.keras.regularizers.l1_l2(
            l1_l2_randoms[3, 0], l1_l2_randoms[3, 1])
    if regularizer_chance_randoms[4] < regularizer_chance:
        lstm_kwargs['bias_regularizer'] = tf.keras.regularizers.l1_l2(
            l1_l2_randoms[4, 0], l1_l2_randoms[4, 1])
    if regularizer_chance_randoms[5] < regularizer_chance:
        lstm_kwargs['kernel_regularizer'] = tf.keras.regularizers.l1_l2(
            l1_l2_randoms[5, 0], l1_l2_randoms[5, 1])
    # 2nd base layer
    if core_layers_genes[2] == 0:
        model.add(tf.keras.layers.LSTM(**lstm_kwargs))
    elif core_layers_genes[2] == 1:
        model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(**lstm_kwargs)))
    elif core_layers_genes[2] == 2:
        model.add(tf.keras.layers.GRU(**lstm_kwargs))
    elif core_layers_genes[2] == 3:
        model.add(tf.keras.layers.Bidirectional(tf.keras.layers.GRU(**lstm_kwargs)))
    elif core_layers_genes[2] == 4:
        model.add(tf.keras.layers.SimpleRNN(**lstm_kwargs))
    else:
        model.add(tf.keras.layers.Bidirectional(tf.keras.layers.SimpleRNN(**lstm_kwargs)))
    if use_gaussian_noise2 < 0.5:
        model.add(tf.keras.layers.GaussianNoise(noise_stddev2))
    if use_batch_normalization2 < 0.5:
        model.add(tf.keras.layers.BatchNormalization())

    # 3rd base layer
    lstm_kwargs['kernel_initializer'] = layer_initializers[layer_initializer_genes[2]]  # TODO: layer initializer
    lstm_kwargs['units'] = units3
    lstm_kwargs['dropout'] = dropout3
    lstm_kwargs['recurrent_dropout'] = recurrent_dropout3
    lstm_kwargs['return_sequences'] = False  # Last layer should return sequences
    # TODO: Local mutation
    if regularizer_chance_randoms[6] < regularizer_chance:
        lstm_kwargs['activity_regularizer'] = tf.keras.regularizers.l1_l2(
            l1_l2_randoms[6, 0], l1_l2_randoms[6, 1])
    if regularizer_chance_randoms[7] < regularizer_chance:
        lstm_kwargs['bias_regularizer'] = tf.keras.regularizers.l1_l2(
            l1_l2_randoms[7, 0], l1_l2_randoms[7, 1])
    if regularizer_chance_randoms[8] < regularizer_chance:
        lstm_kwargs['kernel_regularizer'] = tf.keras.regularizers.l1_l2(
            l1_l2_randoms[8, 0], l1_l2_randoms[8, 1])
    if core_layers_genes[2] == 0:
        model.add(tf.keras.layers.LSTM(**lstm_kwargs))
    elif core_layers_genes[2] == 1:
        model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(**lstm_kwargs)))
    elif core_layers_genes[2] == 2:
        model.add(tf.keras.layers.GRU(**lstm_kwargs))
    elif core_layers_genes[2] == 3:
        model.add(tf.keras.layers.Bidirectional(tf.keras.layers.GRU(**lstm_kwargs)))
    elif core_layers_genes[2] == 4:
        model.add(tf.keras.layers.SimpleRNN(**lstm_kwargs))
    else:
        model.add(tf.keras.layers.Bidirectional(tf.keras.layers.SimpleRNN(**lstm_kwargs)))

    if use_gaussian_noise3 < 0.5:
        model.add(tf.keras.layers.GaussianNoise(noise_stddev3))
    if use_batch_normalization3 < 0.5:
        model.add(tf.keras.layers.BatchNormalization())

    # model.add(tf.keras.layers.Dense(y_data.shape[1], activation=random.choice(
    #     ["tanh", "softmax", "elu", "selu", "softplus", "relu", "softsign", "hard_sigmoid",
    #      "linear"])))  # TODO: test with 2 extra dense layers
    #TODO: test dense
    denseCount = 16
#     model.add(tf.keras.layers.Dense(y_data.shape[1]))
    model.add(tf.keras.layers.Dense(denseCount))
#     if multi_gpu:
#         model = tf.keras.utils.multi_gpu_model(model, gpus=2)

    if optimizer == 'amsgrad':  # Adam variant: amsgrad (boolean), "On the Convergence of Adam and Beyond".
        model.compile(loss='mean_squared_error', optimizer=tf.keras.optimizers.Adam(amsgrad=True))
    else:
        model.compile(loss='mean_squared_error', optimizer=optimizer)

    current_fold += 1  # TODO: train, trainValidation, validation
#     print("--- Rank {}: Current Fold: {}/{}".format(rank, current_fold, totalFolds))

    early_stop = [
        tf.keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1, mode='auto'),
        tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, mode='auto',
                                             cooldown=1, verbose=1),
        tf.keras.callbacks.TerminateOnNaN()
    ]


    print('=', end='')
    return model

In [None]:
result2 = None
for l in range(1):
    for i in range(0, 4):
        for j in range(0, 4):
            for k in range(0, 4):
#                 z = np.array([i, j, k])

                x = get_random_model()
                args = {}
                args["modelLabel"] = "test"
                train_model.counter = 1
                train_model.label = "test"
                train_model.folds = 1
                train_model.data_manipulation = args    
#                 train_model.z = np.array([0, 0, 0])
                train_model.m = np.array([0, 0, 0])
                train_model.z = np.array([i, j, k])

#                 result2 = train_model1(x, *args)
                result2 = train_model(x, *args)
#                 if result2 != 0:
#                     print("Invalid model")
#                     break
print("DONE")

Sequence classification with LSTM:

In [79]:
# Multiple Inputs
import os
import tensorflow as tf
from tensorflow.keras.utils import plot_model
# from tensorflow.keras.models import Model
# from tensorflow.keras.layers import Input, Conv2D, Reshape, Dropout, TimeDistributed
from tensorflow.keras.layers import Dense, Flatten
# from tensorflow.keras.layers import MaxPooling2D, MaxPooling3D, AveragePooling3D, MaxPooling1D
# from tensorflow.keras.layers import BatchNormalization, Bidirectional
# from tensorflow.keras.layers import concatenate
from tensorflow.keras.layers import LSTM
# from tensorflow.keras.layers import ConvLSTM2D
# from tensorflow.keras.layers import Conv3D, Conv1D
from tensorflow.keras.models import Sequential
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"  # These lines should be called asap, after the os import
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"  # Use CPU only by default
os.environ["CUDA_VISIBLE_DEVICES"] = "1"  # gtx 970
# from keras.models import Sequential
# from keras.layers import LSTM, Dense
import numpy as np

In [80]:
data_dim = 16
timesteps = 8
num_classes = 10
# expected input data shape: (batch_size, timesteps, data_dim)
model = Sequential()
model.add(LSTM(16, return_sequences=True))
model.add(LSTM(32, return_sequences=True))
model.add(LSTM(32))
model.add(Dense(10, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])

x_train = np.random.random((100, timesteps, data_dim))
y_train = np.random.random((100, num_classes))
x_val = np.random.random((10, timesteps, data_dim))
y_val = np.random.random((10, num_classes))

model.fit(x_train, y_train, batch_size=64, epochs=5, validation_data=(x_val, y_val))
print(score)

Train on 100 samples, validate on 10 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[0.24249929547309876, 0.12]


LSTM + CONV1d

In [101]:
from tensorflow.keras.layers import MaxPooling1D, Dropout, GlobalAveragePooling1D
from tensorflow.keras.layers import Conv1D, Reshape
data_dim = 16
timesteps = 8 #8
num_classes = 10
epochs = 5
x_train = np.random.random((1000, timesteps, data_dim))
y_train = np.random.random((1000, num_classes))
x_val = np.random.random((100, timesteps, data_dim))
y_val = np.random.random((100, num_classes))
x_test = np.random.random((100, timesteps, data_dim))
y_test = np.random.random((100, num_classes))


model = Sequential()
model.add(Conv1D(64, 3, activation='relu'))))
model.add(MaxPooling1D(3))
model.add(LSTM(16, return_sequences=True))'))
model.add(LSTM(32))
model.add(Dense(10, activation='softmax'))
model.compile(loss='mean_squared_error', optimizer='nadam', metrics=['accuracy'])
model.fit(x_train, y_train, verbose=1, batch_size=64, epochs=epochs, validation_data=(x_val, y_val))
score = model.evaluate(x_test, y_test, batch_size=16)
print(score)

Train on 1000 samples, validate on 100 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[0.24579177379608155, 0.15]


In [152]:
from tensorflow.keras.layers import MaxPooling1D, Dropout, GlobalAveragePooling1D
from tensorflow.keras.layers import Conv1D, Reshape
data_dim = 16
timesteps = 8 #8
num_classes = 10
epochs = 5
x_train = np.random.random((1000, timesteps, data_dim))
y_train = np.random.random((1000, num_classes))
x_val = np.random.random((100, timesteps, data_dim))
y_val = np.random.random((100, num_classes))
x_test = np.random.random((100, timesteps, data_dim))
y_test = np.random.random((100, num_classes))

model = Sequential()
model.add(Conv1D(64, 2, activation='relu'))
model.add(MaxPooling1D(2))
model.add(Conv1D(64, 2, activation='relu'))
model.add(MaxPooling1D(2))

model.add(LSTM(16, return_sequences=True))

# model.add(Conv1D(timesteps, 2, activation='relu'))
model.add(LSTM(32))
model.add(Dense(10, activation='softmax'))
model.compile(loss='mean_squared_error', optimizer='nadam', metrics=['accuracy'])
model.fit(x_train, y_train, verbose=1, batch_size=64, epochs=epochs, validation_data=(x_val, y_val))
score = model.evaluate(x_test, y_test, batch_size=16)
print(score)

Train on 1000 samples, validate on 100 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[0.23940041780471802, 0.05]


In [157]:
from tensorflow.keras.layers import MaxPooling1D, Dropout, GlobalAveragePooling1D
from tensorflow.keras.layers import Conv1D, Reshape
data_dim = 16
timesteps = 3 #8
num_classes = 10
epochs = 5
x_train = np.random.random((1000, timesteps, data_dim))
y_train = np.random.random((1000, num_classes))
x_val = np.random.random((100, timesteps, data_dim))
y_val = np.random.random((100, num_classes))
x_test = np.random.random((100, timesteps, data_dim))
y_test = np.random.random((100, num_classes))

model = Sequential()
model.add(Conv1D(64, 2, activation='relu'))
model.add(Conv1D(64, 2, activation='relu'))
model.add(MaxPooling1D(2))

model.add(LSTM(16, return_sequences=True))

# model.add(Conv1D(timesteps, 2, activation='relu'))
model.add(LSTM(32))
model.add(Dense(10, activation='softmax'))
model.compile(loss='mean_squared_error', optimizer='nadam', metrics=['accuracy'])
model.fit(x_train, y_train, verbose=1, batch_size=64, epochs=epochs, validation_data=(x_val, y_val))
score = model.evaluate(x_test, y_test, batch_size=16)
print(score)

Train on 1000 samples, validate on 100 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[0.23915619552135467, 0.07]


In [57]:
model = Sequential()
model.add(LSTM(16, return_sequences=True))
#                ,
#                input_shape=(timesteps, data_dim))
#          )  # returns a sequence of vectors of dimension 32
model.add(LSTM(32))  # return a single vector of dimension 32
model.add(Dense(10, activation='softmax'))
model.compile(loss='mse',
              optimizer='nadam',
              metrics=['accuracy'])
model.fit(x_train, y_train, verbose=1,
          batch_size=64, epochs=epochs,
          validation_data=(x_val, y_val))
score = model.evaluate(x_test, y_test, batch_size=16)
print(score)

Train on 1000 samples, validate on 100 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[0.24002591967582704, 0.11]


# Testing local search

In [1]:
import scipy.optimize

In [2]:
def func1(x):
    return x[0] **2

In [3]:
scipy.optimize.minimize(x0 = [4, 2, 5], fun=func1, method="L-BFGS-B", bounds=[(-50, 11), (0, 1), (0, 2)])

      fun: 2.529512644585829e-17
 hess_inv: <3x3 LbfgsInvHessProduct with dtype=float64>
      jac: array([-5.88521106e-11,  0.00000000e+00,  0.00000000e+00])
  message: b'CONVERGENCE: NORM_OF_PROJECTED_GRADIENT_<=_PGTOL'
     nfev: 16
      nit: 2
   status: 0
  success: True
        x: array([-5.02942606e-09,  1.00000000e+00,  2.00000000e+00])

In [4]:
scipy.optimize.minimize(x0 = [4, 2, 5], fun=func1, method="TNC", bounds=[(-50, 11), (0, 1), (0, 2)])

     fun: 2.447953341786534e-17
     jac: array([1.98953592e-08, 0.00000000e+00, 0.00000000e+00])
 message: 'Converged (|f_n-f_(n-1)| ~= 0)'
    nfev: 7
     nit: 3
  status: 1
 success: True
       x: array([4.9476796e-09, 1.0000000e+00, 2.0000000e+00])

In [5]:
scipy.optimize.minimize(x0 = [4, 2, 5], fun=func1, method="SLSQP", bounds=[(-50, 11), (0, 1), (0, 2)])

     fun: 0.0
     jac: array([1.49011612e-08, 0.00000000e+00, 0.00000000e+00])
 message: 'Optimization terminated successfully.'
    nfev: 11
     nit: 2
    njev: 2
  status: 0
 success: True
       x: array([0., 1., 2.])

In [6]:
scipy.optimize.minimize(x0 = [4, 2, 5], fun=func1, method="trust-constr", bounds=[(-50, 11), (0, 1), (0, 2)])



 barrier_parameter: 2.048000000000001e-09
 barrier_tolerance: 2.048000000000001e-09
          cg_niter: 59
      cg_stop_cond: 4
            constr: [array([1.30622094e-10, 5.71539216e-01, 1.09904140e+00])]
       constr_nfev: [0]
       constr_nhev: [0]
       constr_njev: [0]
    constr_penalty: 1.0
  constr_violation: 0.0
    execution_time: 0.18509197235107422
               fun: 1.706213144861233e-20
              grad: array([1.51624054e-08, 0.00000000e+00, 0.00000000e+00])
               jac: [<3x3 sparse matrix of type '<class 'numpy.float64'>'
	with 3 stored elements in Compressed Sparse Row format>]
   lagrangian_grad: array([1.51761342e-08, 1.25843779e-10, 1.33893790e-10])
           message: '`xtol` termination condition is satisfied.'
            method: 'tr_interior_point'
              nfev: 308
              nhev: 0
               nit: 61
             niter: 61
              njev: 0
        optimality: 1.5176134150145295e-08
            status: 2
           success: Tru

In [7]:
scipy.optimize.minimize(x0 =get_random_model(), fun=func1, method="L-BFGS-B", bounds=bounds)

NameError: name 'get_random_model' is not defined

In [None]:
scipy.optimize.minimize(x0 = [4, 2, 5], fun=func1, method="TNC", bounds=[(-50, 11), (0, 1), (0, 2)])

In [None]:
scipy.optimize.minimize(x0 =get_random_model(), fun=func1, method="TNC", bounds=bounds)

In [None]:
scipy.optimize.minimize(x0 = [4, 2, 5], fun=func1, method="SLSQP", bounds=[(-50, 11), (0, 1), (0, 2)])

In [None]:
scipy.optimize.minimize(x0 =get_random_model(), fun=func1, method="SLSQP", bounds=bounds)

## Testing weight training stochasticity

In [1]:
import tensorflow as tf
mnist = tf.keras.datasets.mnist

(x_train, y_train),(x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

model = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(input_shape=(28, 28)),
  tf.keras.layers.Dense(512, activation=tf.nn.relu),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(10, activation=tf.nn.softmax)
])
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

model.fit(x_train, y_train, epochs=5)
model.evaluate(x_test, y_test)

W0813 23:05:30.996449 17416 deprecation.py:506] From C:\Users\temp3rr0r\Anaconda3\envs\tensorflowLast\lib\site-packages\tensorflow\python\ops\init_ops.py:1251: calling VarianceScaling.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


Epoch 1/5


InternalError: 2 root error(s) found.
  (0) Internal: Blas GEMM launch failed : a.shape=(32, 784), b.shape=(784, 512), m=32, n=512, k=784
	 [[{{node dense/MatMul}}]]
	 [[metrics/acc/Identity/_61]]
  (1) Internal: Blas GEMM launch failed : a.shape=(32, 784), b.shape=(784, 512), m=32, n=512, k=784
	 [[{{node dense/MatMul}}]]
0 successful operations.
0 derived errors ignored.