In [None]:
import numpy as np 
import tensorflow as tf
import adabelief
import talos

neural_network='relu_dropout'
neural_network_file=f'best_{neural_network}'

In [None]:
(train_data, train_labels), (test_data, test_labels) = tf.keras.datasets.mnist.load_data()

train_data = train_data / 255.0
test_data  = test_data / 255.0

train_labels = tf.keras.utils.to_categorical(train_labels, num_classes=10, dtype='float32')
test_labels = tf.keras.utils.to_categorical(test_labels, num_classes=10, dtype='float32')

train_data = train_data.reshape(train_data.shape[0], 784)
test_data  = test_data.reshape(test_data.shape[0], 784)

In [None]:
class MyEarlyStopping(tf.keras.callbacks.Callback):
    def __init__(self,
                 monitor='val_accuracy',
                 baseline=0.9,
                 baseline_epochs=1,
                 restore_best_weights=True):
        super(MyEarlyStopping, self).__init__()

        self.monitor = monitor
        self.baseline = baseline
        self.baseline_epochs = baseline_epochs
        self.wait = 0
        self.stopped_epoch = 0
        self.restore_best_weights = restore_best_weights
        self.best_weights = None

        if 'acc' in self.monitor:
            self.monitor_op = np.greater
        else:
            self.monitor_op = np.less

    def on_train_begin(self, logs=None):
        self.wait = 0
        self.stopped_epoch = 0
        self.best = np.Inf if self.monitor_op == np.less else -np.Inf
        self.best_weights = None

    def on_epoch_end(self, epoch, logs=None):
        current = self.get_monitor_value(logs)
        if current is None:
            return
        if self.restore_best_weights and self.best_weights is None:
            self.best_weights = self.model.get_weights()
        if self._is_improvement(current, self.best):
            self.best = current
            self.wait = 0
            if self.restore_best_weights:
                self.best_weights = self.model.get_weights()
        if self._is_improvement(current, self.baseline):
            self.wait = 0
        else:
            self.wait += 1
        if self.wait >= self.baseline_epochs:
            self.stopped_epoch = epoch
            self.model.stop_training = True
            if self.restore_best_weights and self.best_weights is not None:
                self.model.set_weights(self.best_weights)

    def on_train_end(self, logs=None):
        if self.restore_best_weights and self.best_weights is not None:
            self.model.set_weights(self.best_weights)
    
    def get_monitor_value(self, logs):
        logs = logs or {}
        monitor_value = logs.get(self.monitor)
        if monitor_value is None:
            logging.warning('Early stopping conditioned on metric `%s` '
                                            'which is not available. Available metrics are: %s',
                                            self.monitor, ','.join(list(logs.keys())))
        return monitor_value

    def _is_improvement(self, monitor_value, reference_value):
        return self.monitor_op(monitor_value, reference_value)

In [None]:
p = {
    'optimizer': [
                  lambda lr: adabelief.AdaBeliefOptimizer(lr=lr, amsgrad=True),
                  lambda lr: adabelief.AdaBeliefOptimizer(lr=lr, amsgrad=False),
                  lambda lr: tf.keras.optimizers.Adam(lr=lr, amsgrad=True),
                  lambda lr: tf.keras.optimizers.Adam(lr=lr, amsgrad=False),
                  tf.keras.optimizers.Adadelta,
                  tf.keras.optimizers.Adagrad,
                  tf.keras.optimizers.Adamax,
                  tf.keras.optimizers.Nadam,
                  lambda lr: tf.keras.optimizers.RMSprop(lr=lr, centered=True),
                  lambda lr: tf.keras.optimizers.RMSprop(lr=lr, centered=False),
                  lambda lr: tf.keras.optimizers.SGD(lr=lr, nesterov=True),
                  lambda lr: tf.keras.optimizers.SGD(lr=lr, nesterov=False),
                 ],
    'batch_size': [10, 15, 20, 25, 30, 35, 40],
    'units': [250],
    'lr': [0.001, 0.01, 0.1, 1, 2, 5],
    'dropout': [0.2, 0.3, 0.4, 0.5]
}

def mnist_model(train_data, train_labels, _, __, params):
    model = tf.keras.models.Sequential([
                                    tf.keras.layers.Dropout(params['dropout'], input_shape=(784,)),
                                    tf.keras.layers.Dense(params['units'], activation='linear'),
                                    tf.keras.layers.ReLU(),
                                    tf.keras.layers.Dense(params['units'], activation='linear'),
                                    tf.keras.layers.ReLU(),
                                    tf.keras.layers.Dense(10, activation='linear')
                                   ])
    optimizer = params['optimizer'](params['lr'])
    model.compile(optimizer=optimizer, loss=tf.nn.softmax_cross_entropy_with_logits, metrics=['accuracy'])
    early_stopping = MyEarlyStopping(monitor='val_accuracy', baseline=0.6, baseline_epochs=1, restore_best_weights=True)
    history = model.fit(train_data, train_labels, validation_data=(test_data, test_labels), batch_size=params['batch_size'], epochs=30, callbacks=[early_stopping], validation_split=0.0, use_multiprocessing=True, verbose=0)    
    loss, accuracy = model.evaluate(train_data, train_labels, verbose=0)
    val_loss, val_accuracy = model.evaluate(test_data, test_labels, verbose=0)
    history.history['loss'], history.history['accuracy'] = [loss], [accuracy]
    history.history['val_loss'], history.history['val_accuracy'] = [val_loss], [val_accuracy]
    return history, model

t = talos.Scan(train_data, train_labels, model=mnist_model, params=p, experiment_name='mnist_experiment', val_split=0., save_weights=False)

In [None]:
import pandas as pd
from copy import deepcopy

file = f"{neural_network}_file.csv"

df = deepcopy(t.data)
df = df.sort_values('val_accuracy', ascending=False)
df.to_csv(file)
df = pd.read_csv(file)

for i, opt in enumerate(p['optimizer'], 1):
    df.loc[df.optimizer == str(opt), 'optimizer'] = opt(1)._name + '_' + str(i)

df.to_csv(file)

In [None]:
df.head(10)

In [None]:
from time import perf_counter

params_list = [
    {'batch_size': 20, 'dropout': 0.3, 'lr': 0.1, 'optimizer': tf.keras.optimizers.Adagrad, 'name': 'Adagrad'},
    {'batch_size': 40, 'dropout': 0.3, 'lr': 0.1, 'optimizer': lambda lr: tf.keras.optimizers.SGD(lr=lr, nesterov=False), 'name': 'SGD'},
    {'batch_size': 40, 'dropout': 0.3, 'lr': 0.1, 'optimizer': lambda lr: tf.keras.optimizers.SGD(lr=lr, nesterov=True), 'name': 'SGD_nesterov'},
    {'batch_size': 30, 'dropout': 0.3, 'lr': 0.001, 'optimizer': lambda lr: adabelief.AdaBeliefOptimizer(lr=lr, amsgrad=True), 'name': 'AdaBelief_amsgrad'},
    {'batch_size': 20, 'dropout': 0.2, 'lr': 0.01, 'optimizer': tf.keras.optimizers.Adamax, 'name': 'Adamax'},
    {'batch_size': 40, 'dropout': 0.2, 'lr': 2, 'optimizer': tf.keras.optimizers.Adadelta, 'name': 'Adadelta'},
    {'batch_size': 25, 'dropout': 0.3, 'lr': 0.001, 'optimizer': lambda lr: tf.keras.optimizers.Adam(lr=lr, amsgrad=True), 'name': 'Adam_amsgrad'},
    {'batch_size': 35, 'dropout': 0.2, 'lr': 0.001, 'optimizer': lambda lr: tf.keras.optimizers.Adam(lr=lr, amsgrad=False), 'name': 'Adam'},
    {'batch_size': 40, 'dropout': 0.4, 'lr': 0.001, 'optimizer': lambda lr: adabelief.AdaBeliefOptimizer(lr=lr, amsgrad=False), 'name': 'AdaBelief'},
    {'batch_size': 40, 'dropout': 0.2, 'lr': 0.001, 'optimizer': tf.keras.optimizers.Nadam, 'name': 'Nadam'},
    {'batch_size': 40, 'dropout': 0.2, 'lr': 0.001, 'optimizer': lambda lr: tf.keras.optimizers.RMSprop(lr=lr, centered=False), 'name': 'RMSprop'},
    {'batch_size': 40, 'dropout': 0.3, 'lr': 0.001, 'optimizer': lambda lr: tf.keras.optimizers.RMSprop(lr=lr, centered=True), 'name': 'RMSprop_centered'},
]

def train_model(params):
    model = tf.keras.models.Sequential([
                                    tf.keras.layers.Dropout(params['dropout'], input_shape=(784,)),
                                    tf.keras.layers.Dense(250, activation='linear'),
                                    tf.keras.layers.ReLU(),
                                    tf.keras.layers.Dense(250, activation='linear'),
                                    tf.keras.layers.ReLU(),
                                    tf.keras.layers.Dense(10, activation='linear')
                                   ])
    optimizer = params['optimizer'](params['lr'])
    model.compile(optimizer=optimizer, loss=tf.nn.softmax_cross_entropy_with_logits, metrics=['accuracy'])
    model.fit(train_data, train_labels, validation_data=(test_data, test_labels), batch_size=params['batch_size'], epochs=50, validation_split=0.0, use_multiprocessing=True, verbose=0)    
    loss, accuracy = model.evaluate(train_data, train_labels, verbose=0)
    val_loss, val_accuracy = model.evaluate(test_data, test_labels, verbose=0)
    return accuracy, val_accuracy, model

tests = 10
for i, params in enumerate(params_list, 1):
    print(f"{i}/{len(params_list)}. {params['name']}, lr={params['lr']}")
    accuracy_list = []
    val_accuracy_list = []
    max_accuracy = 0
    max_val_accuracy = 0
    s = 0
    best_model = None
    for i in range(1, tests + 1):
        start = perf_counter()
        accuracy, val_accuracy, model = train_model(params)
        stop = perf_counter()
        if max_val_accuracy < val_accuracy or max_val_accuracy == val_accuracy and max_accuracy < accuracy:
            best_model = model
            max_accuracy = accuracy
            max_val_accuracy = val_accuracy
        s += stop - start
        accuracy_list.append(accuracy)
        val_accuracy_list.append(val_accuracy)
        print(f"{i}/{tests} | accuracy: {accuracy} | val_accuracy: {val_accuracy} | time: {stop - start} s.")
    s /= tests
    if best_model is not None:
        loss, accuracy = best_model.evaluate(train_data, train_labels, verbose=0)
        val_loss, val_accuracy = best_model.evaluate(test_data, test_labels, verbose=0)
        tf.keras.models.save_model(best_model, f"{neural_network}_{params['name']}_{params['lr']}_{accuracy:g}_{val_accuracy:g}")
    print(f"accuracy, val_accuracy={list(zip(accuracy_list, val_accuracy_list))}")
    print(f"mean time={s} s.")

In [None]:
train_data = train_data.reshape(train_data.shape[0], 784)
test_data  = test_data.reshape(test_data.shape[0], 784)

model = tf.keras.models.Sequential([
                                    tf.keras.layers.Dropout(0.4, input_shape=(784,)),
                                    tf.keras.layers.Dense(250, activation='linear'),
                                    tf.keras.layers.ReLU(),
                                    tf.keras.layers.Dense(250, activation='linear'),
                                    tf.keras.layers.ReLU(),
                                    tf.keras.layers.Dense(10, activation='linear')
                                   ])

model.compile(optimizer = tf.keras.optimizers.Adadelta(lr=5),
     loss=tf.nn.softmax_cross_entropy_with_logits,
     metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
tf.keras.utils.plot_model(
    model, 
    show_shapes=True, 
    show_layer_names=True,
    expand_nested=True,
    dpi = 60
)

In [None]:
modelcheckpoint_best = tf.keras.callbacks.ModelCheckpoint(neural_network_file, monitor='val_accuracy', save_best_only=True, mode='max', save_weights_only=False)
history = model.fit(train_data,train_labels, validation_data=(test_data, test_labels), batch_size=500, epochs=30, validation_split=0.0, use_multiprocessing=True, callbacks=[modelcheckpoint_best])

In [None]:
model = tf.keras.models.load_model(neural_network_file, custom_objects={'AdaBeliefOptimizer': adabelief.AdaBeliefOptimizer})

In [None]:
print("Train evaluate")
evaluation = model.evaluate(train_data, train_labels)
print(evaluation)
print()
print("Test evaluate")
evaluation = model.evaluate(test_data, test_labels)
print(evaluation)