In [2]:
import numpy as np
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
from functools import partial

  from ._conv import register_converters as _register_converters


In [23]:
n_inputs = 28*28
n_hidden1 = 100
n_hidden2 = 100
n_hidden3 = 100
n_hidden4 = 100
n_hidden5 = 100
n_outputs = 5

# Let's define the placeholders for the inputs and the targets
X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int64, shape=(None), name="y")

In [24]:
# Let's create the DNN
he_init = tf.contrib.layers.variance_scaling_initializer()
my_dense_layer = partial(
                    tf.layers.dense, activation=tf.nn.elu, 
                    kernel_initializer=he_init)

with tf.name_scope("dnn"):
    hidden1 = my_dense_layer(X, n_hidden1, name="hidden1")
    hidden2 = my_dense_layer(hidden1, n_hidden2, name="hidden2")
    hidden3 = my_dense_layer(hidden2, n_hidden3, name="hidden3")
    hidden4 = my_dense_layer(hidden3, n_hidden4, name="hidden4")
    hidden5 = my_dense_layer(hidden4, n_hidden5, name="hidden5")
    logits = my_dense_layer(hidden5, n_outputs, activation=None, name="outputs")
    Y_proba = tf.nn.softmax(logits, name="Y_proba")
    
learning_rate = 0.01

with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")

with tf.name_scope("train"):
    optimizer = tf.train.AdamOptimizer(learning_rate)
    training_op = optimizer.minimize(loss, name="training_op")

with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y , 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")

init = tf.global_variables_initializer()
saver = tf.train.Saver()

In [8]:
mnist = input_data.read_data_sets("/tmp/data/")

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [5]:
X_train1 = mnist.train.images[mnist.train.labels < 5]
y_train1 = mnist.train.labels[mnist.train.labels < 5]
X_valid1 = mnist.validation.images[mnist.validation.labels < 5]
y_valid1 = mnist.validation.labels[mnist.validation.labels < 5]
X_test1 = mnist.test.images[mnist.test.labels < 5]
y_test1 = mnist.test.labels[mnist.test.labels < 5]

In [6]:
n_epochs = 1000
batch_size = 20

max_checks_without_progress = 20
checks_without_progress = 0
best_loss = np.infty

with tf.Session() as sess:
    init.run()

    for epoch in range(n_epochs):
        rnd_idx = np.random.permutation(len(X_train1))
        for rnd_indices in np.array_split(rnd_idx, len(X_train1) // batch_size):
            X_batch, y_batch = X_train1[rnd_indices], y_train1[rnd_indices]
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        # Calculate loss and acc on the validation set to do early stopping
        loss_val, acc_val = sess.run([loss, accuracy], feed_dict={X: X_valid1, y: y_valid1})
        if loss_val < best_loss:
            save_path = saver.save(sess, "./my_mnist_model_0_to_4.ckpt")
            best_loss = loss_val
            checks_without_progress = 0
        else:
            checks_without_progress += 1
            if checks_without_progress > max_checks_without_progress:
                print("Early stopping!")
                break
        print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(
            epoch, loss_val, best_loss, acc_val * 100))

with tf.Session() as sess:
    saver.restore(sess, "./my_mnist_model_0_to_4.ckpt")
    acc_test = accuracy.eval(feed_dict={X: X_test1, y: y_test1})
    print("Final test accuracy: {:.2f}%".format(acc_test * 100))

0	Validation loss: 0.181033	Best loss: 0.181033	Accuracy: 95.27%
1	Validation loss: 0.125367	Best loss: 0.125367	Accuracy: 97.19%
2	Validation loss: 0.258047	Best loss: 0.125367	Accuracy: 97.30%
3	Validation loss: 0.137535	Best loss: 0.125367	Accuracy: 97.07%
4	Validation loss: 0.120659	Best loss: 0.120659	Accuracy: 97.11%
5	Validation loss: 0.467715	Best loss: 0.120659	Accuracy: 75.76%
6	Validation loss: 0.246886	Best loss: 0.120659	Accuracy: 95.97%
7	Validation loss: 0.234324	Best loss: 0.120659	Accuracy: 97.15%
8	Validation loss: 0.136194	Best loss: 0.120659	Accuracy: 97.58%
9	Validation loss: 0.360426	Best loss: 0.120659	Accuracy: 96.87%
10	Validation loss: 0.769926	Best loss: 0.120659	Accuracy: 74.39%
11	Validation loss: 0.742954	Best loss: 0.120659	Accuracy: 78.81%
12	Validation loss: 0.542269	Best loss: 0.120659	Accuracy: 77.37%
13	Validation loss: 0.487362	Best loss: 0.120659	Accuracy: 79.20%
14	Validation loss: 0.510532	Best loss: 0.120659	Accuracy: 79.24%
15	Validation loss: 

<h1>DNNClassifier</h1>

In [19]:
import numpy as np
import tensorflow as tf
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.exceptions import NotFittedError

class DNNClassifier(BaseEstimator, ClassifierMixin):
        
    def __init__(self, n_hidden_layers=5, n_neurons=100, optimizer_class=tf.train.AdamOptimizer,
                 learning_rate=0.01, batch_size=20, activation=tf.nn.elu, initializer=he_init,
                 batch_norm_momentum=None, dropout_rate=None, random_state=None):
        """Initialize the DNNClassifier by simply storing all the hyperparameters."""
        self.n_hidden_layers = n_hidden_layers
        self.n_neurons = n_neurons
        self.optimizer_class = optimizer_class
        self.learning_rate = learning_rate
        self.batch_size = batch_size
        self.activation = activation
        self.initializer = initializer
        self.batch_norm_momentum = batch_norm_momentum
        self.dropout_rate = dropout_rate
        self.random_state = random_state
        self._session = None

    def _dnn(self, inputs):
        """Build the hidden layers, with support for batch normalization and dropout."""
        for layer in range(self.n_hidden_layers):
            if self.dropout_rate:
                inputs = tf.layers.dropout(inputs, self.dropout_rate, training=self._training)
            inputs = tf.layers.dense(inputs, self.n_neurons,
                                     kernel_initializer=self.initializer,
                                     name="hidden%d" % (layer + 1))
            if self.batch_norm_momentum:
                inputs = tf.layers.batch_normalization(inputs, momentum=self.batch_norm_momentum,
                                                       training=self._training)
            inputs = self.activation(inputs, name="hidden%d_out" % (layer + 1))
        return inputs

    def _build_graph(self, n_inputs, n_outputs):
        """Build the same model as earlier"""
        if self.random_state is not None:
            tf.set_random_seed(self.random_state)
            np.random.seed(self.random_state)

        X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
        y = tf.placeholder(tf.int32, shape=(None), name="y")

        if self.batch_norm_momentum or self.dropout_rate:
            self._training = tf.placeholder_with_default(False, shape=(), name='training')
        else:
            self._training = None

        dnn_outputs = self._dnn(X)

        logits = tf.layers.dense(dnn_outputs, n_outputs, kernel_initializer=he_init, name="logits")
        Y_proba = tf.nn.softmax(logits, name="Y_proba")

        xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y,
                                                                  logits=logits)
        loss = tf.reduce_mean(xentropy, name="loss")

        optimizer = self.optimizer_class(learning_rate=self.learning_rate)
        training_op = optimizer.minimize(loss)

        correct = tf.nn.in_top_k(logits, y, 1)
        accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")

        init = tf.global_variables_initializer()
        saver = tf.train.Saver()

        # Make the important operations available easily through instance variables
        self._X, self._y = X, y
        self._Y_proba, self._loss = Y_proba, loss
        self._training_op, self._accuracy = training_op, accuracy
        self._init, self._saver = init, saver

    def close_session(self):
        if self._session:
            self._session.close()

    def _get_model_params(self):
        """Get all variable values (used for early stopping, faster than saving to disk)"""
        with self._graph.as_default():
            gvars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
        return {gvar.op.name: value for gvar, value in zip(gvars, self._session.run(gvars))}

    def _restore_model_params(self, model_params):
        """Set all variables to the given values (for early stopping, faster than loading from disk)"""
        gvar_names = list(model_params.keys())
        assign_ops = {gvar_name: self._graph.get_operation_by_name(gvar_name + "/Assign")
                      for gvar_name in gvar_names}
        init_values = {gvar_name: assign_op.inputs[1] for gvar_name, assign_op in assign_ops.items()}
        feed_dict = {init_values[gvar_name]: model_params[gvar_name] for gvar_name in gvar_names}
        self._session.run(assign_ops, feed_dict=feed_dict)

    def fit(self, X, y, n_epochs=100, X_valid=None, y_valid=None):
        """Fit the model to the training set. If X_valid and y_valid are provided, use early stopping."""
        self.close_session()

        # infer n_inputs and n_outputs from the training set.
        n_inputs = X.shape[1]
        self.classes_ = np.unique(y)
        n_outputs = len(self.classes_)
        
        # Translate the labels vector to a vector of sorted class indices, containing
        # integers from 0 to n_outputs - 1.
        # For example, if y is equal to [8, 8, 9, 5, 7, 6, 6, 6], then the sorted class
        # labels (self.classes_) will be equal to [5, 6, 7, 8, 9], and the labels vector
        # will be translated to [3, 3, 4, 0, 2, 1, 1, 1]
        self.class_to_index_ = {label: index
                                for index, label in enumerate(self.classes_)}
        y = np.array([self.class_to_index_[label]
                      for label in y], dtype=np.int32)
        
        self._graph = tf.Graph()
        with self._graph.as_default():
            self._build_graph(n_inputs, n_outputs)
            # extra ops for batch normalization
            extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

        # needed in case of early stopping
        max_checks_without_progress = 20
        checks_without_progress = 0
        best_loss = np.infty
        best_params = None
        
        # Now train the model!
        self._session = tf.Session(graph=self._graph)
        with self._session.as_default() as sess:
            self._init.run()
            for epoch in range(n_epochs):
                rnd_idx = np.random.permutation(len(X))
                for rnd_indices in np.array_split(rnd_idx, len(X) // self.batch_size):
                    X_batch, y_batch = X[rnd_indices], y[rnd_indices]
                    feed_dict = {self._X: X_batch, self._y: y_batch}
                    if self._training is not None:
                        feed_dict[self._training] = True
                    sess.run(self._training_op, feed_dict=feed_dict)
                    if extra_update_ops:
                        sess.run(extra_update_ops, feed_dict=feed_dict)
                if X_valid is not None and y_valid is not None:
                    loss_val, acc_val = sess.run([self._loss, self._accuracy],
                                                 feed_dict={self._X: X_valid,
                                                            self._y: y_valid})
                    if loss_val < best_loss:
                        best_params = self._get_model_params()
                        best_loss = loss_val
                        checks_without_progress = 0
                    else:
                        checks_without_progress += 1
                    print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(
                        epoch, loss_val, best_loss, acc_val * 100))
                    if checks_without_progress > max_checks_without_progress:
                        print("Early stopping!")
                        break
                else:
                    loss_train, acc_train = sess.run([self._loss, self._accuracy],
                                                     feed_dict={self._X: X_batch,
                                                                self._y: y_batch})
                    print("{}\tLast training batch loss: {:.6f}\tAccuracy: {:.2f}%".format(
                        epoch, loss_train, acc_train * 100))
            # If we used early stopping then rollback to the best model found
            if best_params:
                self._restore_model_params(best_params)
            return self

    def predict_proba(self, X):
        if not self._session:
            raise NotFittedError("This %s instance is not fitted yet" % self.__class__.__name__)
        with self._session.as_default() as sess:
            return self._Y_proba.eval(feed_dict={self._X: X})

    def predict(self, X):
        class_indices = np.argmax(self.predict_proba(X), axis=1)
        return np.array([[self.classes_[class_index]]
                         for class_index in class_indices], np.int32)

    def save(self, path):
        self._saver.save(self._session, path)

In [13]:
dnn_clf = DNNClassifier(random_state=42)
dnn_clf.fit(X_train1, y_train1, n_epochs=1000, X_valid=X_valid1, y_valid=y_valid1)

0	Validation loss: 0.190826	Best loss: 0.190826	Accuracy: 96.64%
1	Validation loss: 1.689649	Best loss: 0.190826	Accuracy: 18.73%
2	Validation loss: 1.660114	Best loss: 0.190826	Accuracy: 20.91%
3	Validation loss: 1.778077	Best loss: 0.190826	Accuracy: 22.01%
4	Validation loss: 1.667106	Best loss: 0.190826	Accuracy: 22.01%
5	Validation loss: 1.654532	Best loss: 0.190826	Accuracy: 22.01%
6	Validation loss: 1.680933	Best loss: 0.190826	Accuracy: 18.73%
7	Validation loss: 1.779077	Best loss: 0.190826	Accuracy: 22.01%
8	Validation loss: 1.699482	Best loss: 0.190826	Accuracy: 19.27%
9	Validation loss: 1.767771	Best loss: 0.190826	Accuracy: 20.91%
10	Validation loss: 1.629350	Best loss: 0.190826	Accuracy: 22.01%
11	Validation loss: 1.812643	Best loss: 0.190826	Accuracy: 22.01%
12	Validation loss: 1.675939	Best loss: 0.190826	Accuracy: 18.73%
13	Validation loss: 1.633259	Best loss: 0.190826	Accuracy: 20.91%
14	Validation loss: 1.652904	Best loss: 0.190826	Accuracy: 20.91%
15	Validation loss: 

DNNClassifier(activation=<function elu at 0x000001A7BE2E2268>,
       batch_norm_momentum=None, batch_size=20, dropout_rate=None,
       initializer=<function variance_scaling_initializer.<locals>._initializer at 0x000001A7BC93C730>,
       learning_rate=0.01, n_hidden_layers=5, n_neurons=100,
       optimizer_class=<class 'tensorflow.python.training.adam.AdamOptimizer'>,
       random_state=42)

In [14]:
from sklearn.metrics import accuracy_score

y_pred = dnn_clf.predict(X_test1)
accuracy_score(y_test1, y_pred)

0.9708114419147694

In [15]:
from sklearn.model_selection import RandomizedSearchCV

def leaky_relu(alpha=0.01):
    def parametrized_leaky_relu(z, name=None):
        return tf.maximum(alpha * z, z, name=name)
    return parametrized_leaky_relu

param_distribs = {
    "n_neurons": [10, 30, 50, 70, 90, 100, 120, 140, 160],
    "batch_size": [10, 50, 100, 500],
    "learning_rate": [0.01, 0.02, 0.05, 0.1],
    "activation": [tf.nn.relu, tf.nn.elu, leaky_relu(alpha=0.01), leaky_relu(alpha=0.1)],
    # you could also try exploring different numbers of hidden layers, different optimizers, etc.
    #"n_hidden_layers": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
    #"optimizer_class": [tf.train.AdamOptimizer, partial(tf.train.MomentumOptimizer, momentum=0.95)],
}

rnd_search = RandomizedSearchCV(DNNClassifier(random_state=42), param_distribs, n_iter=50,
                                fit_params={"X_valid": X_valid1, "y_valid": y_valid1, "n_epochs": 1000},
                                random_state=42, verbose=2)
rnd_search.fit(X_train1, y_train1)

Fitting 3 folds for each of 50 candidates, totalling 150 fits
[CV] n_neurons=10, batch_size=100, activation=<function elu at 0x000001A7BE2E2268>, learning_rate=0.05 
0	Validation loss: 0.131135	Best loss: 0.131135	Accuracy: 96.36%
1	Validation loss: 0.130872	Best loss: 0.130872	Accuracy: 96.17%
2	Validation loss: 0.153402	Best loss: 0.130872	Accuracy: 96.36%
3	Validation loss: 0.145776	Best loss: 0.130872	Accuracy: 96.76%
4	Validation loss: 0.132235	Best loss: 0.130872	Accuracy: 96.87%
5	Validation loss: 0.140970	Best loss: 0.130872	Accuracy: 96.99%
6	Validation loss: 0.150057	Best loss: 0.130872	Accuracy: 96.25%
7	Validation loss: 0.138547	Best loss: 0.130872	Accuracy: 96.64%
8	Validation loss: 0.129539	Best loss: 0.129539	Accuracy: 96.05%
9	Validation loss: 0.131695	Best loss: 0.129539	Accuracy: 96.44%
10	Validation loss: 0.135688	Best loss: 0.129539	Accuracy: 96.68%
11	Validation loss: 1.787319	Best loss: 0.129539	Accuracy: 19.12%
12	Validation loss: 1.630504	Best loss: 0.129539	Acc

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   12.2s remaining:    0.0s


0	Validation loss: 0.183487	Best loss: 0.183487	Accuracy: 95.93%
1	Validation loss: 0.106714	Best loss: 0.106714	Accuracy: 96.99%
2	Validation loss: 0.150361	Best loss: 0.106714	Accuracy: 95.78%
3	Validation loss: 0.127387	Best loss: 0.106714	Accuracy: 96.64%
4	Validation loss: 0.109397	Best loss: 0.106714	Accuracy: 96.99%
5	Validation loss: 0.132421	Best loss: 0.106714	Accuracy: 96.29%
6	Validation loss: 0.125783	Best loss: 0.106714	Accuracy: 96.44%
7	Validation loss: 0.242102	Best loss: 0.106714	Accuracy: 94.88%
8	Validation loss: 1.612336	Best loss: 0.106714	Accuracy: 19.27%
9	Validation loss: 1.618384	Best loss: 0.106714	Accuracy: 22.01%
10	Validation loss: 1.614465	Best loss: 0.106714	Accuracy: 19.27%
11	Validation loss: 1.625938	Best loss: 0.106714	Accuracy: 22.01%
12	Validation loss: 1.624183	Best loss: 0.106714	Accuracy: 20.91%
13	Validation loss: 1.636243	Best loss: 0.106714	Accuracy: 19.08%
14	Validation loss: 1.623178	Best loss: 0.106714	Accuracy: 22.01%
15	Validation loss: 

37	Validation loss: 0.096842	Best loss: 0.060442	Accuracy: 98.01%
38	Validation loss: 0.063650	Best loss: 0.060442	Accuracy: 98.51%
39	Validation loss: 0.075109	Best loss: 0.060442	Accuracy: 98.67%
40	Validation loss: 0.070305	Best loss: 0.060442	Accuracy: 98.48%
41	Validation loss: 0.074671	Best loss: 0.060442	Accuracy: 98.71%
42	Validation loss: 0.100321	Best loss: 0.060442	Accuracy: 98.48%
43	Validation loss: 0.093451	Best loss: 0.060442	Accuracy: 98.67%
44	Validation loss: 0.094709	Best loss: 0.060442	Accuracy: 98.16%
45	Validation loss: 0.098850	Best loss: 0.060442	Accuracy: 98.63%
46	Validation loss: 0.093627	Best loss: 0.060442	Accuracy: 98.67%
47	Validation loss: 0.099696	Best loss: 0.060442	Accuracy: 98.32%
48	Validation loss: 0.099620	Best loss: 0.060442	Accuracy: 98.48%
Early stopping!
[CV]  n_neurons=30, batch_size=500, activation=<function relu at 0x000001A7BE30C048>, learning_rate=0.02, total=  20.5s
[CV] n_neurons=30, batch_size=500, activation=<function relu at 0x000001

1	Validation loss: 9.133509	Best loss: 2.372510	Accuracy: 54.18%
2	Validation loss: 3.197797	Best loss: 2.372510	Accuracy: 71.66%
3	Validation loss: 3.479365	Best loss: 2.372510	Accuracy: 72.56%
4	Validation loss: 2.982099	Best loss: 2.372510	Accuracy: 81.04%
5	Validation loss: 1.101210	Best loss: 1.101210	Accuracy: 87.41%
6	Validation loss: 1.374337	Best loss: 1.101210	Accuracy: 83.89%
7	Validation loss: 1.475405	Best loss: 1.101210	Accuracy: 82.92%
8	Validation loss: 470.632080	Best loss: 1.101210	Accuracy: 31.74%
9	Validation loss: 170.168671	Best loss: 1.101210	Accuracy: 57.66%
10	Validation loss: 278.015991	Best loss: 1.101210	Accuracy: 61.18%
11	Validation loss: 120.567238	Best loss: 1.101210	Accuracy: 70.17%
12	Validation loss: 161.904739	Best loss: 1.101210	Accuracy: 72.01%
13	Validation loss: 69.559654	Best loss: 1.101210	Accuracy: 79.24%
14	Validation loss: 91.122765	Best loss: 1.101210	Accuracy: 77.83%
15	Validation loss: 70.470764	Best loss: 1.101210	Accuracy: 85.18%
16	Val

0	Validation loss: 0.107352	Best loss: 0.107352	Accuracy: 96.72%
1	Validation loss: 0.067582	Best loss: 0.067582	Accuracy: 98.08%
2	Validation loss: 0.070102	Best loss: 0.067582	Accuracy: 97.89%
3	Validation loss: 0.048823	Best loss: 0.048823	Accuracy: 98.36%
4	Validation loss: 0.057367	Best loss: 0.048823	Accuracy: 98.20%
5	Validation loss: 0.053407	Best loss: 0.048823	Accuracy: 98.36%
6	Validation loss: 0.046155	Best loss: 0.046155	Accuracy: 98.75%
7	Validation loss: 0.054474	Best loss: 0.046155	Accuracy: 98.67%
8	Validation loss: 0.053990	Best loss: 0.046155	Accuracy: 98.44%
9	Validation loss: 0.049635	Best loss: 0.046155	Accuracy: 98.51%
10	Validation loss: 0.072630	Best loss: 0.046155	Accuracy: 98.28%
11	Validation loss: 0.051085	Best loss: 0.046155	Accuracy: 98.71%
12	Validation loss: 0.050371	Best loss: 0.046155	Accuracy: 98.83%
13	Validation loss: 0.044364	Best loss: 0.044364	Accuracy: 98.87%
14	Validation loss: 0.065085	Best loss: 0.044364	Accuracy: 98.71%
15	Validation loss: 

9	Validation loss: 0.061988	Best loss: 0.048897	Accuracy: 98.20%
10	Validation loss: 0.063151	Best loss: 0.048897	Accuracy: 98.59%
11	Validation loss: 0.071210	Best loss: 0.048897	Accuracy: 98.16%
12	Validation loss: 0.054979	Best loss: 0.048897	Accuracy: 98.71%
13	Validation loss: 0.054100	Best loss: 0.048897	Accuracy: 98.75%
14	Validation loss: 0.051875	Best loss: 0.048897	Accuracy: 98.75%
15	Validation loss: 0.056787	Best loss: 0.048897	Accuracy: 98.63%
16	Validation loss: 0.044779	Best loss: 0.044779	Accuracy: 98.63%
17	Validation loss: 0.049773	Best loss: 0.044779	Accuracy: 98.98%
18	Validation loss: 0.061082	Best loss: 0.044779	Accuracy: 98.67%
19	Validation loss: 0.051344	Best loss: 0.044779	Accuracy: 98.67%
20	Validation loss: 0.059503	Best loss: 0.044779	Accuracy: 98.67%
21	Validation loss: 0.044567	Best loss: 0.044567	Accuracy: 98.87%
22	Validation loss: 0.051911	Best loss: 0.044567	Accuracy: 98.91%
23	Validation loss: 0.083118	Best loss: 0.044567	Accuracy: 98.67%
24	Validati

41	Validation loss: 0.072957	Best loss: 0.038141	Accuracy: 98.91%
42	Validation loss: 0.073438	Best loss: 0.038141	Accuracy: 98.94%
Early stopping!
[CV]  n_neurons=90, batch_size=500, activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0x000001A7C66F6158>, learning_rate=0.01, total=  37.0s
[CV] n_neurons=140, batch_size=500, activation=<function elu at 0x000001A7BE2E2268>, learning_rate=0.01 
0	Validation loss: 0.140913	Best loss: 0.140913	Accuracy: 95.39%
1	Validation loss: 0.082842	Best loss: 0.082842	Accuracy: 97.62%
2	Validation loss: 0.086206	Best loss: 0.082842	Accuracy: 97.38%
3	Validation loss: 0.073511	Best loss: 0.073511	Accuracy: 97.54%
4	Validation loss: 0.062357	Best loss: 0.062357	Accuracy: 98.05%
5	Validation loss: 0.055234	Best loss: 0.055234	Accuracy: 98.36%
6	Validation loss: 0.058129	Best loss: 0.055234	Accuracy: 98.28%
7	Validation loss: 0.058522	Best loss: 0.055234	Accuracy: 98.36%
8	Validation loss: 0.072047	Best loss: 0.055234	Accuracy: 98.05%
9	V

2	Validation loss: 1.619831	Best loss: 1.619831	Accuracy: 19.27%
3	Validation loss: 1.616520	Best loss: 1.616520	Accuracy: 19.27%
4	Validation loss: 1.654958	Best loss: 1.616520	Accuracy: 22.01%
5	Validation loss: 1.621479	Best loss: 1.616520	Accuracy: 22.01%
6	Validation loss: 1.632693	Best loss: 1.616520	Accuracy: 19.27%
7	Validation loss: 1.614858	Best loss: 1.614858	Accuracy: 19.27%
8	Validation loss: 1.642606	Best loss: 1.614858	Accuracy: 19.27%
9	Validation loss: 1.639819	Best loss: 1.614858	Accuracy: 19.27%
10	Validation loss: 1.615375	Best loss: 1.614858	Accuracy: 22.01%
11	Validation loss: 1.630571	Best loss: 1.614858	Accuracy: 22.01%
12	Validation loss: 1.636737	Best loss: 1.614858	Accuracy: 19.08%
13	Validation loss: 1.613103	Best loss: 1.613103	Accuracy: 19.27%
14	Validation loss: 1.629263	Best loss: 1.613103	Accuracy: 19.08%
15	Validation loss: 1.617149	Best loss: 1.613103	Accuracy: 22.01%
16	Validation loss: 1.626923	Best loss: 1.613103	Accuracy: 18.73%
17	Validation loss

19	Validation loss: 1.619096	Best loss: 1.612441	Accuracy: 22.01%
20	Validation loss: 1.609111	Best loss: 1.609111	Accuracy: 22.01%
21	Validation loss: 1.689011	Best loss: 1.609111	Accuracy: 22.01%
22	Validation loss: 1.618893	Best loss: 1.609111	Accuracy: 18.73%
23	Validation loss: 1.638928	Best loss: 1.609111	Accuracy: 22.01%
24	Validation loss: 1.660613	Best loss: 1.609111	Accuracy: 20.91%
25	Validation loss: 1.619334	Best loss: 1.609111	Accuracy: 19.27%
26	Validation loss: 1.611323	Best loss: 1.609111	Accuracy: 22.01%
27	Validation loss: 1.614304	Best loss: 1.609111	Accuracy: 20.91%
28	Validation loss: 1.628118	Best loss: 1.609111	Accuracy: 19.08%
29	Validation loss: 1.610351	Best loss: 1.609111	Accuracy: 20.91%
30	Validation loss: 1.626932	Best loss: 1.609111	Accuracy: 20.91%
31	Validation loss: 1.633272	Best loss: 1.609111	Accuracy: 18.73%
32	Validation loss: 1.609066	Best loss: 1.609066	Accuracy: 22.01%
33	Validation loss: 1.610652	Best loss: 1.609066	Accuracy: 19.08%
34	Validat

8	Validation loss: 0.101037	Best loss: 0.073866	Accuracy: 98.24%
9	Validation loss: 0.080386	Best loss: 0.073866	Accuracy: 98.24%
10	Validation loss: 0.088669	Best loss: 0.073866	Accuracy: 97.81%
11	Validation loss: 0.082566	Best loss: 0.073866	Accuracy: 97.62%
12	Validation loss: 0.109005	Best loss: 0.073866	Accuracy: 97.34%
13	Validation loss: 0.084519	Best loss: 0.073866	Accuracy: 98.08%
14	Validation loss: 0.104962	Best loss: 0.073866	Accuracy: 97.58%
15	Validation loss: 0.084852	Best loss: 0.073866	Accuracy: 97.97%
16	Validation loss: 0.086790	Best loss: 0.073866	Accuracy: 97.97%
17	Validation loss: 0.084387	Best loss: 0.073866	Accuracy: 97.89%
18	Validation loss: 0.086400	Best loss: 0.073866	Accuracy: 98.12%
19	Validation loss: 0.092807	Best loss: 0.073866	Accuracy: 98.16%
20	Validation loss: 0.087880	Best loss: 0.073866	Accuracy: 98.01%
21	Validation loss: 0.097622	Best loss: 0.073866	Accuracy: 97.89%
22	Validation loss: 0.085171	Best loss: 0.073866	Accuracy: 98.08%
23	Validatio

18	Validation loss: 0.135112	Best loss: 0.064808	Accuracy: 97.34%
19	Validation loss: 0.116741	Best loss: 0.064808	Accuracy: 97.73%
20	Validation loss: 0.244508	Best loss: 0.064808	Accuracy: 98.24%
21	Validation loss: 0.157297	Best loss: 0.064808	Accuracy: 97.81%
22	Validation loss: 0.121554	Best loss: 0.064808	Accuracy: 98.16%
23	Validation loss: 0.154864	Best loss: 0.064808	Accuracy: 98.71%
24	Validation loss: 0.195426	Best loss: 0.064808	Accuracy: 97.46%
25	Validation loss: 0.214001	Best loss: 0.064808	Accuracy: 98.87%
26	Validation loss: 0.260616	Best loss: 0.064808	Accuracy: 98.20%
Early stopping!
[CV]  n_neurons=50, batch_size=50, activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0x000001A7C66F69D8>, learning_rate=0.01, total=  31.1s
[CV] n_neurons=50, batch_size=50, activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0x000001A7C66F69D8>, learning_rate=0.01 
0	Validation loss: 0.082086	Best loss: 0.082086	Accuracy: 97.89%
1	Validation loss: 0.088

10	Validation loss: 332.792786	Best loss: 1.779191	Accuracy: 94.80%
11	Validation loss: 38853.316406	Best loss: 1.779191	Accuracy: 97.11%
12	Validation loss: 184045.453125	Best loss: 1.779191	Accuracy: 96.83%
13	Validation loss: 152851.156250	Best loss: 1.779191	Accuracy: 96.25%
14	Validation loss: 4360.989258	Best loss: 1.779191	Accuracy: 97.46%
15	Validation loss: 5734.652344	Best loss: 1.779191	Accuracy: 96.79%
16	Validation loss: 14958.007812	Best loss: 1.779191	Accuracy: 81.86%
17	Validation loss: 992.584045	Best loss: 1.779191	Accuracy: 97.85%
18	Validation loss: 7236.906250	Best loss: 1.779191	Accuracy: 97.11%
19	Validation loss: 4666.975586	Best loss: 1.779191	Accuracy: 98.16%
20	Validation loss: 4732.598145	Best loss: 1.779191	Accuracy: 97.65%
21	Validation loss: 6202.767578	Best loss: 1.779191	Accuracy: 97.11%
Early stopping!
[CV]  n_neurons=120, batch_size=10, activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0x000001A7C66F69D8>, learning_rate=0.02, total= 

39	Validation loss: 0.158407	Best loss: 0.114138	Accuracy: 96.68%
40	Validation loss: 0.202634	Best loss: 0.114138	Accuracy: 95.78%
41	Validation loss: 0.126115	Best loss: 0.114138	Accuracy: 96.79%
42	Validation loss: 0.140091	Best loss: 0.114138	Accuracy: 96.95%
Early stopping!
[CV]  n_neurons=50, batch_size=500, activation=<function elu at 0x000001A7BE2E2268>, learning_rate=0.1, total=  24.6s
[CV] n_neurons=50, batch_size=500, activation=<function elu at 0x000001A7BE2E2268>, learning_rate=0.1 
0	Validation loss: 0.870433	Best loss: 0.870433	Accuracy: 68.57%
1	Validation loss: 0.394312	Best loss: 0.394312	Accuracy: 86.67%
2	Validation loss: 0.266387	Best loss: 0.266387	Accuracy: 92.34%
3	Validation loss: 0.203651	Best loss: 0.203651	Accuracy: 94.14%
4	Validation loss: 0.174143	Best loss: 0.174143	Accuracy: 95.07%
5	Validation loss: 0.159555	Best loss: 0.159555	Accuracy: 95.82%
6	Validation loss: 0.160493	Best loss: 0.159555	Accuracy: 95.90%
7	Validation loss: 0.148725	Best loss: 0.148

0	Validation loss: 1259.743408	Best loss: 1259.743408	Accuracy: 20.17%
1	Validation loss: 96.390083	Best loss: 96.390083	Accuracy: 57.00%
2	Validation loss: 17.642414	Best loss: 17.642414	Accuracy: 74.16%
3	Validation loss: 7.617156	Best loss: 7.617156	Accuracy: 80.14%
4	Validation loss: 13011.362305	Best loss: 7.617156	Accuracy: 42.57%
5	Validation loss: 3004.953369	Best loss: 7.617156	Accuracy: 68.14%
6	Validation loss: 2943.410889	Best loss: 7.617156	Accuracy: 65.68%
7	Validation loss: 3154.546387	Best loss: 7.617156	Accuracy: 68.80%
8	Validation loss: 3968.066162	Best loss: 7.617156	Accuracy: 62.04%
9	Validation loss: 1418.524780	Best loss: 7.617156	Accuracy: 72.83%
10	Validation loss: 10261.757812	Best loss: 7.617156	Accuracy: 56.37%
11	Validation loss: 1404.711182	Best loss: 7.617156	Accuracy: 81.39%
12	Validation loss: 2874.049316	Best loss: 7.617156	Accuracy: 73.34%
13	Validation loss: 1350.321899	Best loss: 7.617156	Accuracy: 69.98%
14	Validation loss: 532769.875000	Best loss:

3	Validation loss: 0.285138	Best loss: 0.239165	Accuracy: 94.76%
4	Validation loss: 0.176472	Best loss: 0.176472	Accuracy: 96.01%
5	Validation loss: 0.324212	Best loss: 0.176472	Accuracy: 95.58%
6	Validation loss: 0.184572	Best loss: 0.176472	Accuracy: 96.68%
7	Validation loss: 0.285719	Best loss: 0.176472	Accuracy: 95.58%
8	Validation loss: 0.241373	Best loss: 0.176472	Accuracy: 95.50%
9	Validation loss: 0.205716	Best loss: 0.176472	Accuracy: 97.07%
10	Validation loss: 0.172299	Best loss: 0.172299	Accuracy: 97.54%
11	Validation loss: 0.194180	Best loss: 0.172299	Accuracy: 97.22%
12	Validation loss: 0.242742	Best loss: 0.172299	Accuracy: 95.43%
13	Validation loss: 0.461739	Best loss: 0.172299	Accuracy: 97.30%
14	Validation loss: 0.228085	Best loss: 0.172299	Accuracy: 96.72%
15	Validation loss: 0.163901	Best loss: 0.163901	Accuracy: 97.42%
16	Validation loss: 0.182091	Best loss: 0.163901	Accuracy: 97.65%
17	Validation loss: 0.221363	Best loss: 0.163901	Accuracy: 97.58%
18	Validation los

8	Validation loss: 1.700592	Best loss: 1.700592	Accuracy: 19.27%
9	Validation loss: 2.334473	Best loss: 1.700592	Accuracy: 22.01%
10	Validation loss: 1.853945	Best loss: 1.700592	Accuracy: 18.73%
11	Validation loss: 1.842176	Best loss: 1.700592	Accuracy: 19.27%
12	Validation loss: 2.091004	Best loss: 1.700592	Accuracy: 20.91%
13	Validation loss: 3.280352	Best loss: 1.700592	Accuracy: 19.08%
14	Validation loss: 2.147494	Best loss: 1.700592	Accuracy: 19.08%
15	Validation loss: 1.816312	Best loss: 1.700592	Accuracy: 19.27%
16	Validation loss: 1.802170	Best loss: 1.700592	Accuracy: 18.73%
17	Validation loss: 2.435369	Best loss: 1.700592	Accuracy: 20.91%
18	Validation loss: 1.970473	Best loss: 1.700592	Accuracy: 19.27%
19	Validation loss: 2.368722	Best loss: 1.700592	Accuracy: 22.01%
20	Validation loss: 2.211048	Best loss: 1.700592	Accuracy: 19.27%
21	Validation loss: 2.549920	Best loss: 1.700592	Accuracy: 22.01%
22	Validation loss: 1.927104	Best loss: 1.700592	Accuracy: 19.27%
23	Validatio

2	Validation loss: 0.092433	Best loss: 0.092433	Accuracy: 97.42%
3	Validation loss: 0.067391	Best loss: 0.067391	Accuracy: 97.97%
4	Validation loss: 0.058411	Best loss: 0.058411	Accuracy: 98.16%
5	Validation loss: 0.059778	Best loss: 0.058411	Accuracy: 98.28%
6	Validation loss: 0.055561	Best loss: 0.055561	Accuracy: 98.44%
7	Validation loss: 0.047203	Best loss: 0.047203	Accuracy: 98.44%
8	Validation loss: 0.060032	Best loss: 0.047203	Accuracy: 98.08%
9	Validation loss: 0.046201	Best loss: 0.046201	Accuracy: 98.67%
10	Validation loss: 0.055269	Best loss: 0.046201	Accuracy: 98.51%
11	Validation loss: 0.051800	Best loss: 0.046201	Accuracy: 98.91%
12	Validation loss: 0.054688	Best loss: 0.046201	Accuracy: 98.91%
13	Validation loss: 0.065163	Best loss: 0.046201	Accuracy: 98.32%
14	Validation loss: 0.066003	Best loss: 0.046201	Accuracy: 98.83%
15	Validation loss: 0.056981	Best loss: 0.046201	Accuracy: 98.67%
16	Validation loss: 0.054378	Best loss: 0.046201	Accuracy: 98.63%
17	Validation loss

12	Validation loss: 1.526649	Best loss: 0.110374	Accuracy: 90.85%
13	Validation loss: 1.954679	Best loss: 0.110374	Accuracy: 90.54%
14	Validation loss: 2.019754	Best loss: 0.110374	Accuracy: 90.38%
15	Validation loss: 2.482432	Best loss: 0.110374	Accuracy: 91.83%
16	Validation loss: 2.355162	Best loss: 0.110374	Accuracy: 93.16%
17	Validation loss: 1.972294	Best loss: 0.110374	Accuracy: 92.77%
18	Validation loss: 2.100486	Best loss: 0.110374	Accuracy: 93.20%
19	Validation loss: 2.607072	Best loss: 0.110374	Accuracy: 87.53%
20	Validation loss: 1.775062	Best loss: 0.110374	Accuracy: 93.51%
21	Validation loss: 1.657023	Best loss: 0.110374	Accuracy: 93.20%
Early stopping!
[CV]  n_neurons=50, batch_size=100, activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0x000001A7C66F69D8>, learning_rate=0.05, total=  18.5s
[CV] n_neurons=50, batch_size=100, activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0x000001A7C66F69D8>, learning_rate=0.05 
0	Validation loss: 0.

23	Validation loss: 0.762173	Best loss: 0.155837	Accuracy: 58.80%
24	Validation loss: 0.699977	Best loss: 0.155837	Accuracy: 57.54%
25	Validation loss: 0.428770	Best loss: 0.155837	Accuracy: 78.93%
Early stopping!
[CV]  n_neurons=10, batch_size=10, activation=<function elu at 0x000001A7BE2E2268>, learning_rate=0.02, total=  47.3s
[CV] n_neurons=10, batch_size=10, activation=<function elu at 0x000001A7BE2E2268>, learning_rate=0.02 
0	Validation loss: 0.229509	Best loss: 0.229509	Accuracy: 93.90%
1	Validation loss: 0.168935	Best loss: 0.168935	Accuracy: 95.43%
2	Validation loss: 0.198185	Best loss: 0.168935	Accuracy: 94.53%
3	Validation loss: 0.155462	Best loss: 0.155462	Accuracy: 96.79%
4	Validation loss: 0.162245	Best loss: 0.155462	Accuracy: 96.09%
5	Validation loss: 0.156478	Best loss: 0.155462	Accuracy: 96.56%
6	Validation loss: 0.175575	Best loss: 0.155462	Accuracy: 96.44%
7	Validation loss: 0.136318	Best loss: 0.136318	Accuracy: 97.19%
8	Validation loss: 0.233459	Best loss: 0.1363

20	Validation loss: 0.102346	Best loss: 0.090823	Accuracy: 97.97%
21	Validation loss: 0.115663	Best loss: 0.090823	Accuracy: 97.97%
22	Validation loss: 0.134390	Best loss: 0.090823	Accuracy: 97.62%
23	Validation loss: 0.120632	Best loss: 0.090823	Accuracy: 97.62%
24	Validation loss: 0.095852	Best loss: 0.090823	Accuracy: 98.16%
25	Validation loss: 0.125111	Best loss: 0.090823	Accuracy: 98.01%
26	Validation loss: 0.120941	Best loss: 0.090823	Accuracy: 98.12%
27	Validation loss: 0.175437	Best loss: 0.090823	Accuracy: 97.22%
28	Validation loss: 0.127602	Best loss: 0.090823	Accuracy: 98.05%
29	Validation loss: 0.150040	Best loss: 0.090823	Accuracy: 97.89%
30	Validation loss: 0.149179	Best loss: 0.090823	Accuracy: 97.97%
31	Validation loss: 0.157277	Best loss: 0.090823	Accuracy: 97.85%
32	Validation loss: 0.155046	Best loss: 0.090823	Accuracy: 97.65%
33	Validation loss: 0.223248	Best loss: 0.090823	Accuracy: 97.03%
34	Validation loss: 0.165429	Best loss: 0.090823	Accuracy: 97.73%
35	Validat

26	Validation loss: 0.349922	Best loss: 0.348468	Accuracy: 88.90%
27	Validation loss: 0.365997	Best loss: 0.348468	Accuracy: 88.74%
28	Validation loss: 0.342024	Best loss: 0.342024	Accuracy: 89.52%
29	Validation loss: 0.347647	Best loss: 0.342024	Accuracy: 90.03%
30	Validation loss: 0.393049	Best loss: 0.342024	Accuracy: 88.27%
31	Validation loss: 0.363485	Best loss: 0.342024	Accuracy: 90.19%
32	Validation loss: 0.346115	Best loss: 0.342024	Accuracy: 90.23%
33	Validation loss: 0.417754	Best loss: 0.342024	Accuracy: 85.81%
34	Validation loss: 0.345079	Best loss: 0.342024	Accuracy: 89.60%
35	Validation loss: 0.347902	Best loss: 0.342024	Accuracy: 89.48%
36	Validation loss: 0.401176	Best loss: 0.342024	Accuracy: 88.58%
37	Validation loss: 0.377315	Best loss: 0.342024	Accuracy: 88.39%
38	Validation loss: 0.590922	Best loss: 0.342024	Accuracy: 85.30%
39	Validation loss: 0.466982	Best loss: 0.342024	Accuracy: 87.37%
40	Validation loss: 0.459692	Best loss: 0.342024	Accuracy: 83.50%
41	Validat

29	Validation loss: 0.108948	Best loss: 0.094675	Accuracy: 97.34%
30	Validation loss: 0.112125	Best loss: 0.094675	Accuracy: 96.87%
31	Validation loss: 0.126527	Best loss: 0.094675	Accuracy: 97.22%
32	Validation loss: 0.106123	Best loss: 0.094675	Accuracy: 97.30%
33	Validation loss: 0.118133	Best loss: 0.094675	Accuracy: 97.03%
34	Validation loss: 0.124480	Best loss: 0.094675	Accuracy: 97.19%
35	Validation loss: 0.127079	Best loss: 0.094675	Accuracy: 97.26%
36	Validation loss: 0.106997	Best loss: 0.094675	Accuracy: 97.46%
37	Validation loss: 0.120236	Best loss: 0.094675	Accuracy: 97.15%
38	Validation loss: 0.104174	Best loss: 0.094675	Accuracy: 97.22%
Early stopping!
[CV]  n_neurons=10, batch_size=500, activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0x000001A7C66F6158>, learning_rate=0.02, total=  12.8s
[CV] n_neurons=10, batch_size=500, activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0x000001A7C66F6158>, learning_rate=0.02 
0	Validation loss: 0.

2	Validation loss: 0.090065	Best loss: 0.083387	Accuracy: 97.50%
3	Validation loss: 0.068677	Best loss: 0.068677	Accuracy: 98.24%
4	Validation loss: 0.083533	Best loss: 0.068677	Accuracy: 97.77%
5	Validation loss: 0.087899	Best loss: 0.068677	Accuracy: 98.05%
6	Validation loss: 0.076249	Best loss: 0.068677	Accuracy: 98.12%
7	Validation loss: 0.105916	Best loss: 0.068677	Accuracy: 97.77%
8	Validation loss: 0.208663	Best loss: 0.068677	Accuracy: 97.11%
9	Validation loss: 1.655652	Best loss: 0.068677	Accuracy: 19.08%
10	Validation loss: 1.609612	Best loss: 0.068677	Accuracy: 22.01%
11	Validation loss: 1.626743	Best loss: 0.068677	Accuracy: 19.27%
12	Validation loss: 1.664769	Best loss: 0.068677	Accuracy: 20.91%
13	Validation loss: 1.650111	Best loss: 0.068677	Accuracy: 19.08%
14	Validation loss: 1.631884	Best loss: 0.068677	Accuracy: 19.08%
15	Validation loss: 1.645576	Best loss: 0.068677	Accuracy: 22.01%
16	Validation loss: 1.685346	Best loss: 0.068677	Accuracy: 18.73%
17	Validation loss

8	Validation loss: 2.946320	Best loss: 2.076442	Accuracy: 19.08%
9	Validation loss: 2.138835	Best loss: 2.076442	Accuracy: 19.27%
10	Validation loss: 2.189744	Best loss: 2.076442	Accuracy: 19.27%
11	Validation loss: 3.824192	Best loss: 2.076442	Accuracy: 19.08%
12	Validation loss: 2.287050	Best loss: 2.076442	Accuracy: 22.01%
13	Validation loss: 2.921958	Best loss: 2.076442	Accuracy: 19.27%
14	Validation loss: 3.607187	Best loss: 2.076442	Accuracy: 18.73%
15	Validation loss: 1.990910	Best loss: 1.990910	Accuracy: 19.08%
16	Validation loss: 2.753008	Best loss: 1.990910	Accuracy: 22.01%
17	Validation loss: 3.354759	Best loss: 1.990910	Accuracy: 19.08%
18	Validation loss: 3.922930	Best loss: 1.990910	Accuracy: 22.01%
19	Validation loss: 3.308564	Best loss: 1.990910	Accuracy: 22.01%
20	Validation loss: 3.115769	Best loss: 1.990910	Accuracy: 20.91%
21	Validation loss: 2.433603	Best loss: 1.990910	Accuracy: 20.91%
22	Validation loss: 2.151388	Best loss: 1.990910	Accuracy: 22.01%
23	Validatio

15	Validation loss: 2.251834	Best loss: 1.776087	Accuracy: 19.27%
16	Validation loss: 2.265980	Best loss: 1.776087	Accuracy: 20.91%
17	Validation loss: 2.629085	Best loss: 1.776087	Accuracy: 20.91%
18	Validation loss: 2.920799	Best loss: 1.776087	Accuracy: 19.27%
19	Validation loss: 2.067248	Best loss: 1.776087	Accuracy: 22.01%
20	Validation loss: 2.164310	Best loss: 1.776087	Accuracy: 20.91%
21	Validation loss: 3.457067	Best loss: 1.776087	Accuracy: 22.01%
22	Validation loss: 2.324863	Best loss: 1.776087	Accuracy: 22.01%
23	Validation loss: 3.668049	Best loss: 1.776087	Accuracy: 19.08%
24	Validation loss: 2.673012	Best loss: 1.776087	Accuracy: 19.08%
25	Validation loss: 2.491214	Best loss: 1.776087	Accuracy: 22.01%
26	Validation loss: 2.664450	Best loss: 1.776087	Accuracy: 19.27%
27	Validation loss: 1.825970	Best loss: 1.776087	Accuracy: 20.91%
28	Validation loss: 2.968775	Best loss: 1.776087	Accuracy: 19.27%
29	Validation loss: 2.958214	Best loss: 1.776087	Accuracy: 20.91%
Early stop

11	Validation loss: 2.352865	Best loss: 0.337535	Accuracy: 77.37%
12	Validation loss: 1.502918	Best loss: 0.337535	Accuracy: 83.82%
13	Validation loss: 1.355175	Best loss: 0.337535	Accuracy: 81.94%
14	Validation loss: 2.377819	Best loss: 0.337535	Accuracy: 77.21%
15	Validation loss: 0.920967	Best loss: 0.337535	Accuracy: 85.73%
16	Validation loss: 0.851882	Best loss: 0.337535	Accuracy: 85.14%
17	Validation loss: 1.043197	Best loss: 0.337535	Accuracy: 81.35%
18	Validation loss: 0.878488	Best loss: 0.337535	Accuracy: 83.42%
19	Validation loss: 1.551096	Best loss: 0.337535	Accuracy: 76.97%
20	Validation loss: 0.811408	Best loss: 0.337535	Accuracy: 84.83%
21	Validation loss: 0.648286	Best loss: 0.337535	Accuracy: 86.59%
22	Validation loss: 0.615781	Best loss: 0.337535	Accuracy: 85.81%
23	Validation loss: 2.176587	Best loss: 0.337535	Accuracy: 60.75%
Early stopping!
[CV]  n_neurons=50, batch_size=500, activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0x000001A7C66F6158>, l

30	Validation loss: 0.107060	Best loss: 0.058871	Accuracy: 98.55%
31	Validation loss: 0.079433	Best loss: 0.058871	Accuracy: 98.67%
32	Validation loss: 0.080235	Best loss: 0.058871	Accuracy: 98.71%
Early stopping!
[CV]  n_neurons=140, batch_size=500, activation=<function elu at 0x000001A7BE2E2268>, learning_rate=0.02, total=  42.8s
[CV] n_neurons=140, batch_size=500, activation=<function elu at 0x000001A7BE2E2268>, learning_rate=0.02 
0	Validation loss: 0.197095	Best loss: 0.197095	Accuracy: 94.18%
1	Validation loss: 0.106369	Best loss: 0.106369	Accuracy: 96.40%
2	Validation loss: 0.091567	Best loss: 0.091567	Accuracy: 97.07%
3	Validation loss: 0.074419	Best loss: 0.074419	Accuracy: 97.81%
4	Validation loss: 0.068379	Best loss: 0.068379	Accuracy: 97.93%
5	Validation loss: 0.061854	Best loss: 0.061854	Accuracy: 98.16%
6	Validation loss: 0.055008	Best loss: 0.055008	Accuracy: 98.20%
7	Validation loss: 0.054343	Best loss: 0.054343	Accuracy: 98.40%
8	Validation loss: 0.081942	Best loss: 0.

16	Validation loss: 0.099642	Best loss: 0.066748	Accuracy: 97.81%
17	Validation loss: 0.086891	Best loss: 0.066748	Accuracy: 98.05%
18	Validation loss: 0.121513	Best loss: 0.066748	Accuracy: 97.42%
19	Validation loss: 0.085458	Best loss: 0.066748	Accuracy: 98.05%
20	Validation loss: 0.096431	Best loss: 0.066748	Accuracy: 98.16%
21	Validation loss: 0.134715	Best loss: 0.066748	Accuracy: 97.15%
22	Validation loss: 0.096339	Best loss: 0.066748	Accuracy: 97.85%
23	Validation loss: 0.081181	Best loss: 0.066748	Accuracy: 98.28%
24	Validation loss: 0.084465	Best loss: 0.066748	Accuracy: 98.16%
25	Validation loss: 0.073502	Best loss: 0.066748	Accuracy: 98.36%
26	Validation loss: 0.095363	Best loss: 0.066748	Accuracy: 98.16%
27	Validation loss: 0.090096	Best loss: 0.066748	Accuracy: 98.55%
28	Validation loss: 0.119113	Best loss: 0.066748	Accuracy: 97.89%
29	Validation loss: 0.106815	Best loss: 0.066748	Accuracy: 98.51%
30	Validation loss: 0.124527	Best loss: 0.066748	Accuracy: 98.05%
31	Validat

21	Validation loss: 0.398110	Best loss: 0.065525	Accuracy: 97.58%
22	Validation loss: 0.762818	Best loss: 0.065525	Accuracy: 95.27%
23	Validation loss: 0.185959	Best loss: 0.065525	Accuracy: 97.42%
Early stopping!
[CV]  n_neurons=70, batch_size=100, activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0x000001A7C66F69D8>, learning_rate=0.02, total=  25.0s
[CV] n_neurons=70, batch_size=100, activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0x000001A7C66F69D8>, learning_rate=0.02 
0	Validation loss: 0.111403	Best loss: 0.111403	Accuracy: 96.99%
1	Validation loss: 0.116951	Best loss: 0.111403	Accuracy: 96.91%
2	Validation loss: 0.108808	Best loss: 0.108808	Accuracy: 97.15%
3	Validation loss: 0.086976	Best loss: 0.086976	Accuracy: 97.93%
4	Validation loss: 1.611509	Best loss: 0.086976	Accuracy: 91.48%
5	Validation loss: 0.698746	Best loss: 0.086976	Accuracy: 93.82%
6	Validation loss: 0.458561	Best loss: 0.086976	Accuracy: 94.76%
7	Validation loss: 0.327412	

6	Validation loss: 201.397980	Best loss: 22.839771	Accuracy: 93.32%
7	Validation loss: 28.380657	Best loss: 22.839771	Accuracy: 95.97%
8	Validation loss: 25.409012	Best loss: 22.839771	Accuracy: 96.64%
9	Validation loss: 28.664434	Best loss: 22.839771	Accuracy: 95.11%
10	Validation loss: 63.238846	Best loss: 22.839771	Accuracy: 92.73%
11	Validation loss: 27.404522	Best loss: 22.839771	Accuracy: 96.17%
12	Validation loss: 23.256517	Best loss: 22.839771	Accuracy: 96.01%
13	Validation loss: 9.556460	Best loss: 9.556460	Accuracy: 97.46%
14	Validation loss: 8.471555	Best loss: 8.471555	Accuracy: 97.81%
15	Validation loss: 9.082010	Best loss: 8.471555	Accuracy: 97.97%
16	Validation loss: 15.228566	Best loss: 8.471555	Accuracy: 97.03%
17	Validation loss: 6.536887	Best loss: 6.536887	Accuracy: 97.77%
18	Validation loss: 9.936001	Best loss: 6.536887	Accuracy: 97.11%
19	Validation loss: 1179332.875000	Best loss: 6.536887	Accuracy: 34.87%
20	Validation loss: 1275983.000000	Best loss: 6.536887	Acc

8	Validation loss: 0.164080	Best loss: 0.057505	Accuracy: 96.91%
9	Validation loss: 0.121735	Best loss: 0.057505	Accuracy: 97.50%
10	Validation loss: 0.121198	Best loss: 0.057505	Accuracy: 97.77%
11	Validation loss: 0.097516	Best loss: 0.057505	Accuracy: 97.77%
12	Validation loss: 0.120414	Best loss: 0.057505	Accuracy: 97.81%
13	Validation loss: 0.092679	Best loss: 0.057505	Accuracy: 97.85%
14	Validation loss: 0.098198	Best loss: 0.057505	Accuracy: 97.85%
15	Validation loss: 0.071263	Best loss: 0.057505	Accuracy: 98.16%
16	Validation loss: 0.106703	Best loss: 0.057505	Accuracy: 97.69%
17	Validation loss: 0.163186	Best loss: 0.057505	Accuracy: 97.07%
18	Validation loss: 1.661205	Best loss: 0.057505	Accuracy: 23.69%
19	Validation loss: 1.633599	Best loss: 0.057505	Accuracy: 20.91%
20	Validation loss: 1.645523	Best loss: 0.057505	Accuracy: 20.91%
21	Validation loss: 1.638539	Best loss: 0.057505	Accuracy: 19.27%
22	Validation loss: 1.628914	Best loss: 0.057505	Accuracy: 19.27%
23	Validatio

11	Validation loss: 0.111674	Best loss: 0.111674	Accuracy: 96.52%
12	Validation loss: 0.120103	Best loss: 0.111674	Accuracy: 96.44%
13	Validation loss: 0.116540	Best loss: 0.111674	Accuracy: 96.68%
14	Validation loss: 0.131577	Best loss: 0.111674	Accuracy: 96.05%
15	Validation loss: 0.114561	Best loss: 0.111674	Accuracy: 96.44%
16	Validation loss: 0.122875	Best loss: 0.111674	Accuracy: 96.68%
17	Validation loss: 0.126301	Best loss: 0.111674	Accuracy: 95.90%
18	Validation loss: 0.121655	Best loss: 0.111674	Accuracy: 96.40%
19	Validation loss: 0.126490	Best loss: 0.111674	Accuracy: 96.68%
20	Validation loss: 0.123501	Best loss: 0.111674	Accuracy: 96.44%
21	Validation loss: 0.142561	Best loss: 0.111674	Accuracy: 96.25%
22	Validation loss: 0.121936	Best loss: 0.111674	Accuracy: 96.56%
23	Validation loss: 0.124475	Best loss: 0.111674	Accuracy: 96.48%
24	Validation loss: 0.136242	Best loss: 0.111674	Accuracy: 96.25%
25	Validation loss: 0.113483	Best loss: 0.111674	Accuracy: 96.52%
26	Validat

3	Validation loss: 0.204894	Best loss: 0.183075	Accuracy: 94.92%
4	Validation loss: 0.638365	Best loss: 0.183075	Accuracy: 78.66%
5	Validation loss: 0.463306	Best loss: 0.183075	Accuracy: 77.44%
6	Validation loss: 0.739119	Best loss: 0.183075	Accuracy: 67.47%
7	Validation loss: 0.836412	Best loss: 0.183075	Accuracy: 60.24%
8	Validation loss: 0.971388	Best loss: 0.183075	Accuracy: 52.42%
9	Validation loss: 0.706860	Best loss: 0.183075	Accuracy: 60.79%
10	Validation loss: 0.815490	Best loss: 0.183075	Accuracy: 59.23%
11	Validation loss: 0.792384	Best loss: 0.183075	Accuracy: 59.93%
12	Validation loss: 0.931413	Best loss: 0.183075	Accuracy: 53.71%
13	Validation loss: 1.600673	Best loss: 0.183075	Accuracy: 20.41%
14	Validation loss: 1.312802	Best loss: 0.183075	Accuracy: 36.98%
15	Validation loss: 1.314196	Best loss: 0.183075	Accuracy: 36.94%
16	Validation loss: 1.159288	Best loss: 0.183075	Accuracy: 42.34%
17	Validation loss: 1.207461	Best loss: 0.183075	Accuracy: 37.92%
18	Validation los

20	Validation loss: 0.173646	Best loss: 0.048215	Accuracy: 97.77%
21	Validation loss: 0.103537	Best loss: 0.048215	Accuracy: 97.69%
22	Validation loss: 0.120076	Best loss: 0.048215	Accuracy: 98.55%
23	Validation loss: 0.466013	Best loss: 0.048215	Accuracy: 98.55%
24	Validation loss: 0.139663	Best loss: 0.048215	Accuracy: 97.15%
25	Validation loss: 0.212927	Best loss: 0.048215	Accuracy: 97.93%
26	Validation loss: 0.159866	Best loss: 0.048215	Accuracy: 98.20%
Early stopping!
[CV]  n_neurons=120, batch_size=100, activation=<function relu at 0x000001A7BE30C048>, learning_rate=0.01, total=  34.8s
[CV] n_neurons=120, batch_size=10, activation=<function relu at 0x000001A7BE30C048>, learning_rate=0.01 
0	Validation loss: 0.177730	Best loss: 0.177730	Accuracy: 94.53%
1	Validation loss: 0.155086	Best loss: 0.155086	Accuracy: 95.58%
2	Validation loss: 0.289644	Best loss: 0.155086	Accuracy: 90.62%
3	Validation loss: 0.230656	Best loss: 0.155086	Accuracy: 95.00%
4	Validation loss: 0.167033	Best los

23	Validation loss: 0.251154	Best loss: 0.251154	Accuracy: 96.79%
24	Validation loss: 0.357798	Best loss: 0.251154	Accuracy: 95.97%
25	Validation loss: 0.296905	Best loss: 0.251154	Accuracy: 96.79%
26	Validation loss: 0.343756	Best loss: 0.251154	Accuracy: 95.70%
27	Validation loss: 0.273392	Best loss: 0.251154	Accuracy: 95.54%
28	Validation loss: 0.251059	Best loss: 0.251059	Accuracy: 96.95%
29	Validation loss: 0.499325	Best loss: 0.251059	Accuracy: 95.90%
30	Validation loss: 0.384826	Best loss: 0.251059	Accuracy: 95.27%
31	Validation loss: 0.303310	Best loss: 0.251059	Accuracy: 96.99%
32	Validation loss: 0.288117	Best loss: 0.251059	Accuracy: 96.64%
33	Validation loss: 0.308343	Best loss: 0.251059	Accuracy: 97.03%
34	Validation loss: 0.370919	Best loss: 0.251059	Accuracy: 96.21%
35	Validation loss: 0.296865	Best loss: 0.251059	Accuracy: 96.56%
36	Validation loss: 0.290316	Best loss: 0.251059	Accuracy: 96.79%
37	Validation loss: 0.357526	Best loss: 0.251059	Accuracy: 95.86%
38	Validat

42	Validation loss: 0.925552	Best loss: 0.728538	Accuracy: 96.25%
43	Validation loss: 0.890564	Best loss: 0.728538	Accuracy: 96.76%
44	Validation loss: 0.853202	Best loss: 0.728538	Accuracy: 96.01%
45	Validation loss: 0.928082	Best loss: 0.728538	Accuracy: 96.68%
46	Validation loss: 0.839204	Best loss: 0.728538	Accuracy: 96.48%
47	Validation loss: 1.013823	Best loss: 0.728538	Accuracy: 95.23%
48	Validation loss: 0.787675	Best loss: 0.728538	Accuracy: 96.64%
49	Validation loss: 0.802847	Best loss: 0.728538	Accuracy: 95.93%
50	Validation loss: 0.824373	Best loss: 0.728538	Accuracy: 97.03%
51	Validation loss: 1.106146	Best loss: 0.728538	Accuracy: 96.25%
52	Validation loss: 0.677037	Best loss: 0.677037	Accuracy: 97.15%
53	Validation loss: 0.854502	Best loss: 0.677037	Accuracy: 96.68%
54	Validation loss: 1.007826	Best loss: 0.677037	Accuracy: 96.99%
55	Validation loss: 0.911254	Best loss: 0.677037	Accuracy: 96.91%
56	Validation loss: 0.823436	Best loss: 0.677037	Accuracy: 97.26%
57	Validat

9	Validation loss: 0.050394	Best loss: 0.050394	Accuracy: 98.83%
10	Validation loss: 0.071967	Best loss: 0.050394	Accuracy: 98.44%
11	Validation loss: 0.071422	Best loss: 0.050394	Accuracy: 98.79%
12	Validation loss: 0.057855	Best loss: 0.050394	Accuracy: 98.59%
13	Validation loss: 0.049854	Best loss: 0.049854	Accuracy: 98.83%
14	Validation loss: 0.058492	Best loss: 0.049854	Accuracy: 98.87%
15	Validation loss: 0.061254	Best loss: 0.049854	Accuracy: 98.59%
16	Validation loss: 0.065606	Best loss: 0.049854	Accuracy: 98.63%
17	Validation loss: 0.062952	Best loss: 0.049854	Accuracy: 98.91%
18	Validation loss: 0.066961	Best loss: 0.049854	Accuracy: 98.71%
19	Validation loss: 0.069544	Best loss: 0.049854	Accuracy: 98.12%
20	Validation loss: 0.097761	Best loss: 0.049854	Accuracy: 98.48%
21	Validation loss: 0.071329	Best loss: 0.049854	Accuracy: 98.67%
22	Validation loss: 0.077133	Best loss: 0.049854	Accuracy: 98.59%
23	Validation loss: 0.089784	Best loss: 0.049854	Accuracy: 98.63%
24	Validati

5	Validation loss: 1.676018	Best loss: 1.315558	Accuracy: 22.01%
6	Validation loss: 1.621794	Best loss: 1.315558	Accuracy: 19.27%
7	Validation loss: 1.678816	Best loss: 1.315558	Accuracy: 18.73%
8	Validation loss: 1.846662	Best loss: 1.315558	Accuracy: 20.91%
9	Validation loss: 1.905608	Best loss: 1.315558	Accuracy: 22.01%
10	Validation loss: 1.684334	Best loss: 1.315558	Accuracy: 22.01%
11	Validation loss: 1.647433	Best loss: 1.315558	Accuracy: 20.91%
12	Validation loss: 1.853975	Best loss: 1.315558	Accuracy: 20.91%
13	Validation loss: 1.669575	Best loss: 1.315558	Accuracy: 22.01%
14	Validation loss: 1.703588	Best loss: 1.315558	Accuracy: 19.27%
15	Validation loss: 1.998330	Best loss: 1.315558	Accuracy: 19.08%
16	Validation loss: 1.656304	Best loss: 1.315558	Accuracy: 20.91%
17	Validation loss: 1.947683	Best loss: 1.315558	Accuracy: 20.91%
18	Validation loss: 2.143590	Best loss: 1.315558	Accuracy: 20.91%
19	Validation loss: 1.900221	Best loss: 1.315558	Accuracy: 20.91%
20	Validation l

2	Validation loss: 874.859375	Best loss: 0.144977	Accuracy: 20.84%
3	Validation loss: 36.771667	Best loss: 0.144977	Accuracy: 36.55%
4	Validation loss: 5.345474	Best loss: 0.144977	Accuracy: 69.86%
5	Validation loss: 35.652065	Best loss: 0.144977	Accuracy: 69.43%
6	Validation loss: 25.374514	Best loss: 0.144977	Accuracy: 73.30%
7	Validation loss: 34.498878	Best loss: 0.144977	Accuracy: 65.87%
8	Validation loss: 114.895729	Best loss: 0.144977	Accuracy: 75.80%
9	Validation loss: 13.905932	Best loss: 0.144977	Accuracy: 79.87%
10	Validation loss: 19.104145	Best loss: 0.144977	Accuracy: 81.16%
11	Validation loss: 17.086853	Best loss: 0.144977	Accuracy: 78.58%
12	Validation loss: 8.967341	Best loss: 0.144977	Accuracy: 88.74%
13	Validation loss: 5.616719	Best loss: 0.144977	Accuracy: 89.60%
14	Validation loss: 9.283744	Best loss: 0.144977	Accuracy: 78.38%
15	Validation loss: 7.535301	Best loss: 0.144977	Accuracy: 85.69%
16	Validation loss: 10.759251	Best loss: 0.144977	Accuracy: 87.69%
17	Val

12	Validation loss: 1.912340	Best loss: 1.666488	Accuracy: 18.73%
13	Validation loss: 2.118673	Best loss: 1.666488	Accuracy: 19.08%
14	Validation loss: 2.516825	Best loss: 1.666488	Accuracy: 20.91%
15	Validation loss: 1.914148	Best loss: 1.666488	Accuracy: 22.01%
16	Validation loss: 2.225626	Best loss: 1.666488	Accuracy: 18.73%
17	Validation loss: 1.658228	Best loss: 1.658228	Accuracy: 20.91%
18	Validation loss: 2.221774	Best loss: 1.658228	Accuracy: 19.08%
19	Validation loss: 2.665047	Best loss: 1.658228	Accuracy: 22.01%
20	Validation loss: 2.008376	Best loss: 1.658228	Accuracy: 22.01%
21	Validation loss: 2.444493	Best loss: 1.658228	Accuracy: 20.91%
22	Validation loss: 2.119733	Best loss: 1.658228	Accuracy: 22.01%
23	Validation loss: 2.245312	Best loss: 1.658228	Accuracy: 19.27%
24	Validation loss: 1.920231	Best loss: 1.658228	Accuracy: 20.91%
25	Validation loss: 1.993325	Best loss: 1.658228	Accuracy: 20.91%
26	Validation loss: 1.718368	Best loss: 1.658228	Accuracy: 19.08%
27	Validat

38	Validation loss: 0.062871	Best loss: 0.045197	Accuracy: 98.79%
39	Validation loss: 0.059217	Best loss: 0.045197	Accuracy: 99.02%
40	Validation loss: 0.054708	Best loss: 0.045197	Accuracy: 99.06%
Early stopping!
[CV]  n_neurons=50, batch_size=500, activation=<function relu at 0x000001A7BE30C048>, learning_rate=0.01, total=  22.5s
[CV] n_neurons=50, batch_size=500, activation=<function relu at 0x000001A7BE30C048>, learning_rate=0.01 
0	Validation loss: 0.091367	Best loss: 0.091367	Accuracy: 97.34%
1	Validation loss: 0.061174	Best loss: 0.061174	Accuracy: 98.32%
2	Validation loss: 0.056769	Best loss: 0.056769	Accuracy: 98.32%
3	Validation loss: 0.060861	Best loss: 0.056769	Accuracy: 98.01%
4	Validation loss: 0.055006	Best loss: 0.055006	Accuracy: 98.44%
5	Validation loss: 0.066205	Best loss: 0.055006	Accuracy: 98.32%
6	Validation loss: 0.048693	Best loss: 0.048693	Accuracy: 98.63%
7	Validation loss: 0.059344	Best loss: 0.048693	Accuracy: 98.44%
8	Validation loss: 0.052244	Best loss: 0.

12	Validation loss: 1.730028	Best loss: 1.621925	Accuracy: 19.08%
13	Validation loss: 1.821192	Best loss: 1.621925	Accuracy: 22.01%
14	Validation loss: 1.741654	Best loss: 1.621925	Accuracy: 18.73%
15	Validation loss: 1.742154	Best loss: 1.621925	Accuracy: 18.73%
16	Validation loss: 1.830311	Best loss: 1.621925	Accuracy: 22.01%
17	Validation loss: 2.668368	Best loss: 1.621925	Accuracy: 19.27%
18	Validation loss: 1.795995	Best loss: 1.621925	Accuracy: 22.01%
19	Validation loss: 1.760214	Best loss: 1.621925	Accuracy: 19.27%
20	Validation loss: 1.771631	Best loss: 1.621925	Accuracy: 20.91%
21	Validation loss: 1.793955	Best loss: 1.621925	Accuracy: 22.01%
22	Validation loss: 2.183897	Best loss: 1.621925	Accuracy: 19.27%
23	Validation loss: 1.770351	Best loss: 1.621925	Accuracy: 19.27%
Early stopping!
[CV]  n_neurons=70, batch_size=50, activation=<function elu at 0x000001A7BE2E2268>, learning_rate=0.1, total=  32.0s
[CV] n_neurons=70, batch_size=50, activation=<function elu at 0x000001A7BE2

0	Validation loss: 1.258626	Best loss: 1.258626	Accuracy: 37.53%
1	Validation loss: 1.374313	Best loss: 1.258626	Accuracy: 40.03%
2	Validation loss: 1.149034	Best loss: 1.149034	Accuracy: 40.19%
3	Validation loss: 1.152328	Best loss: 1.149034	Accuracy: 40.15%
4	Validation loss: 1.211809	Best loss: 1.149034	Accuracy: 38.74%
5	Validation loss: 1.159806	Best loss: 1.149034	Accuracy: 40.23%
6	Validation loss: 1.152898	Best loss: 1.149034	Accuracy: 40.54%
7	Validation loss: 1.142614	Best loss: 1.142614	Accuracy: 40.58%
8	Validation loss: 1.150664	Best loss: 1.142614	Accuracy: 39.91%
9	Validation loss: 1.206614	Best loss: 1.142614	Accuracy: 39.44%
10	Validation loss: 1.159095	Best loss: 1.142614	Accuracy: 40.15%
11	Validation loss: 1.198097	Best loss: 1.142614	Accuracy: 39.80%
12	Validation loss: 1.378122	Best loss: 1.142614	Accuracy: 41.91%
13	Validation loss: 1.174155	Best loss: 1.142614	Accuracy: 40.42%
14	Validation loss: 1.177303	Best loss: 1.142614	Accuracy: 39.44%
15	Validation loss: 

13	Validation loss: 0.388907	Best loss: 0.267604	Accuracy: 95.90%
14	Validation loss: 0.276283	Best loss: 0.267604	Accuracy: 96.13%
15	Validation loss: 0.272073	Best loss: 0.267604	Accuracy: 95.90%
16	Validation loss: 0.313487	Best loss: 0.267604	Accuracy: 95.19%
17	Validation loss: 0.217006	Best loss: 0.217006	Accuracy: 96.64%
18	Validation loss: 0.299807	Best loss: 0.217006	Accuracy: 96.44%
19	Validation loss: 0.252067	Best loss: 0.217006	Accuracy: 96.83%
20	Validation loss: 0.219029	Best loss: 0.217006	Accuracy: 96.17%
21	Validation loss: 0.196291	Best loss: 0.196291	Accuracy: 96.95%
22	Validation loss: 0.188056	Best loss: 0.188056	Accuracy: 97.15%
23	Validation loss: 0.173780	Best loss: 0.173780	Accuracy: 97.46%
24	Validation loss: 0.175268	Best loss: 0.173780	Accuracy: 97.30%
25	Validation loss: 0.183506	Best loss: 0.173780	Accuracy: 97.26%
26	Validation loss: 0.165146	Best loss: 0.165146	Accuracy: 97.42%
27	Validation loss: 0.169769	Best loss: 0.165146	Accuracy: 97.38%
28	Validat

58	Validation loss: 0.289582	Best loss: 0.200900	Accuracy: 95.82%
59	Validation loss: 0.241095	Best loss: 0.200900	Accuracy: 96.48%
60	Validation loss: 1.861024	Best loss: 0.200900	Accuracy: 93.75%
61	Validation loss: 0.376959	Best loss: 0.200900	Accuracy: 97.22%
62	Validation loss: 0.288130	Best loss: 0.200900	Accuracy: 97.38%
63	Validation loss: 0.263565	Best loss: 0.200900	Accuracy: 96.91%
64	Validation loss: 0.290306	Best loss: 0.200900	Accuracy: 95.82%
65	Validation loss: 0.218606	Best loss: 0.200900	Accuracy: 96.99%
66	Validation loss: 0.227780	Best loss: 0.200900	Accuracy: 97.58%
67	Validation loss: 0.181837	Best loss: 0.181837	Accuracy: 97.50%
68	Validation loss: 0.205703	Best loss: 0.181837	Accuracy: 97.07%
69	Validation loss: 0.181706	Best loss: 0.181706	Accuracy: 97.65%
70	Validation loss: 0.214718	Best loss: 0.181706	Accuracy: 96.36%
71	Validation loss: 0.172101	Best loss: 0.172101	Accuracy: 97.54%
72	Validation loss: 0.170280	Best loss: 0.170280	Accuracy: 97.38%
73	Validat

[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed: 126.3min finished


0	Validation loss: 0.061664	Best loss: 0.061664	Accuracy: 98.36%
1	Validation loss: 0.055720	Best loss: 0.055720	Accuracy: 98.20%
2	Validation loss: 0.038473	Best loss: 0.038473	Accuracy: 98.59%
3	Validation loss: 0.047277	Best loss: 0.038473	Accuracy: 98.51%
4	Validation loss: 0.050905	Best loss: 0.038473	Accuracy: 98.63%
5	Validation loss: 0.080570	Best loss: 0.038473	Accuracy: 97.97%
6	Validation loss: 0.041476	Best loss: 0.038473	Accuracy: 98.83%
7	Validation loss: 0.044341	Best loss: 0.038473	Accuracy: 98.63%
8	Validation loss: 0.037654	Best loss: 0.037654	Accuracy: 98.87%
9	Validation loss: 0.054451	Best loss: 0.037654	Accuracy: 98.67%
10	Validation loss: 0.032014	Best loss: 0.032014	Accuracy: 99.26%
11	Validation loss: 0.058825	Best loss: 0.032014	Accuracy: 98.79%
12	Validation loss: 0.030225	Best loss: 0.030225	Accuracy: 99.18%
13	Validation loss: 0.051989	Best loss: 0.030225	Accuracy: 98.75%
14	Validation loss: 0.078669	Best loss: 0.030225	Accuracy: 98.48%
15	Validation loss: 

RandomizedSearchCV(cv=None, error_score='raise',
          estimator=DNNClassifier(activation=<function elu at 0x000001A7BE2E2268>,
       batch_norm_momentum=None, batch_size=20, dropout_rate=None,
       initializer=<function variance_scaling_initializer.<locals>._initializer at 0x000001A7BC93C730>,
       learning_rate=0.01, n_hidden_layers=5, n_neurons=100,
       optimizer_class=<class 'tensorflow.python.training.adam.AdamOptimizer'>,
       random_state=42),
          fit_params={'n_epochs': 1000, 'X_valid': array([[0., 0., ..., 0., 0.],
       [0., 0., ..., 0., 0.],
       ...,
       [0., 0., ..., 0., 0.],
       [0., 0., ..., 0., 0.]], dtype=float32), 'y_valid': array([0, 4, ..., 1, 2], dtype=uint8)},
          iid=True, n_iter=50, n_jobs=1,
          param_distributions={'n_neurons': [10, 30, 50, 70, 90, 100, 120, 140, 160], 'batch_size': [10, 50, 100, 500], 'activation': [<function relu at 0x000001A7BE30C048>, <function elu at 0x000001A7BE2E2268>, <function leaky_relu.<local

In [16]:
rnd_search.best_params_

{'activation': <function tensorflow.python.ops.gen_nn_ops.relu>,
 'batch_size': 500,
 'learning_rate': 0.01,
 'n_neurons': 50}

In [17]:
y_pred = rnd_search.predict(X_test1)
accuracy_score(y_test1, y_pred)

0.989492119089317

In [18]:
rnd_search.best_estimator_.save("./my_best_mnist_model_0_to_4")

In [20]:
# Let's train the best model found, once again, to see how fast it converges
dnn_clf = DNNClassifier(activation=leaky_relu(alpha=0.1), batch_size=500, learning_rate=0.01,
                        n_neurons=50, random_state=42)
dnn_clf.fit(X_train1, y_train1, n_epochs=1000, X_valid=X_valid1, y_valid=y_valid1)

0	Validation loss: 0.072400	Best loss: 0.072400	Accuracy: 97.62%
1	Validation loss: 0.067381	Best loss: 0.067381	Accuracy: 97.93%
2	Validation loss: 0.050353	Best loss: 0.050353	Accuracy: 98.40%
3	Validation loss: 0.057750	Best loss: 0.050353	Accuracy: 98.28%
4	Validation loss: 0.052084	Best loss: 0.050353	Accuracy: 98.51%
5	Validation loss: 0.048122	Best loss: 0.048122	Accuracy: 98.59%
6	Validation loss: 0.053389	Best loss: 0.048122	Accuracy: 98.67%
7	Validation loss: 0.046709	Best loss: 0.046709	Accuracy: 98.91%
8	Validation loss: 0.051118	Best loss: 0.046709	Accuracy: 98.71%
9	Validation loss: 0.049088	Best loss: 0.046709	Accuracy: 98.87%
10	Validation loss: 0.064235	Best loss: 0.046709	Accuracy: 98.36%
11	Validation loss: 0.045773	Best loss: 0.045773	Accuracy: 98.79%
12	Validation loss: 0.052007	Best loss: 0.045773	Accuracy: 98.79%
13	Validation loss: 0.054095	Best loss: 0.045773	Accuracy: 98.87%
14	Validation loss: 0.051559	Best loss: 0.045773	Accuracy: 98.87%
15	Validation loss: 

DNNClassifier(activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0x000001A7C7909D08>,
       batch_norm_momentum=None, batch_size=500, dropout_rate=None,
       initializer=<function variance_scaling_initializer.<locals>._initializer at 0x000001A7BC93C730>,
       learning_rate=0.01, n_hidden_layers=5, n_neurons=50,
       optimizer_class=<class 'tensorflow.python.training.adam.AdamOptimizer'>,
       random_state=42)

In [30]:
y_pred = dnn_clf.predict(X_test1)
accuracy_score(y_test1, y_pred)

# Here the accuracy is different because I put leaky_relu in the training instead of relu as the rnd_search_best_params says.
# However, the accuracy is better...Wtf?!

0.9920217941233703

In [23]:
# Let's try to add Batch Normalization
dnn_clf_bn = DNNClassifier(activation=leaky_relu(alpha=0.1), batch_size=500, learning_rate=0.01,
                           n_neurons=50, random_state=42,
                           batch_norm_momentum=0.95)
dnn_clf_bn.fit(X_train1, y_train1, n_epochs=1000, X_valid=X_valid1, y_valid=y_valid1)

0	Validation loss: 0.057204	Best loss: 0.057204	Accuracy: 98.24%
1	Validation loss: 0.046775	Best loss: 0.046775	Accuracy: 98.20%
2	Validation loss: 0.038639	Best loss: 0.038639	Accuracy: 98.75%
3	Validation loss: 0.039907	Best loss: 0.038639	Accuracy: 98.75%
4	Validation loss: 0.045205	Best loss: 0.038639	Accuracy: 98.59%
5	Validation loss: 0.034912	Best loss: 0.034912	Accuracy: 98.94%
6	Validation loss: 0.032480	Best loss: 0.032480	Accuracy: 99.02%
7	Validation loss: 0.045129	Best loss: 0.032480	Accuracy: 98.98%
8	Validation loss: 0.044904	Best loss: 0.032480	Accuracy: 98.87%
9	Validation loss: 0.031486	Best loss: 0.031486	Accuracy: 99.10%
10	Validation loss: 0.049647	Best loss: 0.031486	Accuracy: 98.79%
11	Validation loss: 0.045901	Best loss: 0.031486	Accuracy: 98.91%
12	Validation loss: 0.043318	Best loss: 0.031486	Accuracy: 98.79%
13	Validation loss: 0.040476	Best loss: 0.031486	Accuracy: 99.02%
14	Validation loss: 0.043031	Best loss: 0.031486	Accuracy: 99.30%
15	Validation loss: 

DNNClassifier(activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0x000001A7C7909C80>,
       batch_norm_momentum=0.95, batch_size=500, dropout_rate=None,
       initializer=<function variance_scaling_initializer.<locals>._initializer at 0x000001A7BC93C730>,
       learning_rate=0.01, n_hidden_layers=5, n_neurons=50,
       optimizer_class=<class 'tensorflow.python.training.adam.AdamOptimizer'>,
       random_state=42)

In [24]:
y_pred = dnn_clf_bn.predict(X_test1)
accuracy_score(y_test1, y_pred)

0.9902704806382565

In [25]:
# Mmm, Batch Normalization did not improve the accuracy. We should try to do another tuning for hyperparameters with BN
# and try again.
# ...

In [26]:
# Now let's go back to our previous model and see how well perform on the training set
y_pred = dnn_clf.predict(X_train1)
accuracy_score(y_train1, y_pred)

0.9992153505956203

In [28]:
# Much better than the test set, so probably it is overfitting the training set. Let's try using dropout
dnn_clf_dropout = DNNClassifier(activation=leaky_relu(alpha=0.1), batch_size=500, learning_rate=0.01,
                                n_neurons=50, random_state=42,
                                dropout_rate=0.5)
dnn_clf_dropout.fit(X_train1, y_train1, n_epochs=1000, X_valid=X_valid1, y_valid=y_valid1)

0	Validation loss: 0.502725	Best loss: 0.502725	Accuracy: 84.83%
1	Validation loss: 0.174035	Best loss: 0.174035	Accuracy: 94.72%
2	Validation loss: 0.132589	Best loss: 0.132589	Accuracy: 96.05%
3	Validation loss: 0.122869	Best loss: 0.122869	Accuracy: 96.64%
4	Validation loss: 0.114765	Best loss: 0.114765	Accuracy: 96.87%
5	Validation loss: 0.105026	Best loss: 0.105026	Accuracy: 97.46%
6	Validation loss: 0.096043	Best loss: 0.096043	Accuracy: 97.19%
7	Validation loss: 0.103087	Best loss: 0.096043	Accuracy: 97.62%
8	Validation loss: 0.093458	Best loss: 0.093458	Accuracy: 97.30%
9	Validation loss: 0.096742	Best loss: 0.093458	Accuracy: 97.46%
10	Validation loss: 0.098123	Best loss: 0.093458	Accuracy: 97.58%
11	Validation loss: 0.097413	Best loss: 0.093458	Accuracy: 97.38%
12	Validation loss: 0.089873	Best loss: 0.089873	Accuracy: 97.69%
13	Validation loss: 0.089845	Best loss: 0.089845	Accuracy: 97.62%
14	Validation loss: 0.082723	Best loss: 0.082723	Accuracy: 97.73%
15	Validation loss: 

DNNClassifier(activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0x000001A7D0CEEC80>,
       batch_norm_momentum=None, batch_size=500, dropout_rate=0.5,
       initializer=<function variance_scaling_initializer.<locals>._initializer at 0x000001A7BC93C730>,
       learning_rate=0.01, n_hidden_layers=5, n_neurons=50,
       optimizer_class=<class 'tensorflow.python.training.adam.AdamOptimizer'>,
       random_state=42)

In [29]:
y_pred = dnn_clf_dropout.predict(X_test1)
accuracy_score(y_test1, y_pred)

0.9854057209573847

In [31]:
# Dropout doesn't seem to help. As said before, we could try to tune the network with dropout and see what we got.
# ...

<h1>Transfer Learning</h1>

<p>Let's try to reuse the previous model on digits from 5 to 9, using only 100 images per digit!</p>

In [3]:
restore_saver = tf.train.import_meta_graph("./my_best_mnist_model_0_to_4.meta")

X = tf.get_default_graph().get_tensor_by_name("X:0")
y = tf.get_default_graph().get_tensor_by_name("y:0")
loss = tf.get_default_graph().get_tensor_by_name("loss:0")
Y_proba = tf.get_default_graph().get_tensor_by_name("Y_proba:0")
logits = Y_proba.op.inputs[0]
accuracy = tf.get_default_graph().get_tensor_by_name("accuracy:0")

In [4]:
learning_rate = 0.01

output_layer_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="logits")
optimizer = tf.train.AdamOptimizer(learning_rate, name="Adam2")
# Freeze all the hidden layers
training_op = optimizer.minimize(loss, var_list=output_layer_vars)

In [5]:
correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")

init = tf.global_variables_initializer()
five_frozen_saver = tf.train.Saver()

In [9]:
X_train2_full = mnist.train.images[mnist.train.labels >= 5]
y_train2_full = mnist.train.labels[mnist.train.labels >= 5] - 5
X_valid2_full = mnist.validation.images[mnist.validation.labels >= 5]
y_valid2_full = mnist.validation.labels[mnist.validation.labels >= 5] - 5
X_test2 = mnist.test.images[mnist.test.labels >= 5]
y_test2 = mnist.test.labels[mnist.test.labels >= 5] - 5

In [10]:
def sample_n_instances_per_class(X, y, n=100):
    Xs, ys = [], []
    for label in np.unique(y):
        idx = (y == label)
        Xc = X[idx][:n]
        yc = y[idx][:n]
        Xs.append(Xc)
        ys.append(yc)
    return np.concatenate(Xs), np.concatenate(ys)

In [11]:
X_train2, y_train2 = sample_n_instances_per_class(X_train2_full, y_train2_full, n=100)
X_valid2, y_valid2 = sample_n_instances_per_class(X_valid2_full, y_valid2_full, n=30)

In [11]:
import time

n_epochs = 1000
batch_size = 20

max_checks_without_progress = 20
checks_without_progress = 0
best_loss = np.infty

with tf.Session() as sess:
    init.run()
    restore_saver.restore(sess, "./my_best_mnist_model_0_to_4")
    for var in output_layer_vars:
        var.initializer.run()

    t0 = time.time()
        
    for epoch in range(n_epochs):
        rnd_idx = np.random.permutation(len(X_train2))
        for rnd_indices in np.array_split(rnd_idx, len(X_train2) // batch_size):
            X_batch, y_batch = X_train2[rnd_indices], y_train2[rnd_indices]
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        loss_val, acc_val = sess.run([loss, accuracy], feed_dict={X: X_valid2, y: y_valid2})
        if loss_val < best_loss:
            save_path = five_frozen_saver.save(sess, "./my_mnist_model_5_to_9_five_frozen")
            best_loss = loss_val
            checks_without_progress = 0
        else:
            checks_without_progress += 1
            if checks_without_progress > max_checks_without_progress:
                print("Early stopping!")
                break
        print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(
            epoch, loss_val, best_loss, acc_val * 100))

    t1 = time.time()
    print("Total training time: {:.1f}s".format(t1 - t0))

with tf.Session() as sess:
    five_frozen_saver.restore(sess, "./my_mnist_model_5_to_9_five_frozen")
    acc_test = accuracy.eval(feed_dict={X: X_test2, y: y_test2})
    print("Final test accuracy: {:.2f}%".format(acc_test * 100))

INFO:tensorflow:Restoring parameters from ./my_best_mnist_model_0_to_4
0	Validation loss: 1.076381	Best loss: 1.076381	Accuracy: 58.00%
1	Validation loss: 0.995478	Best loss: 0.995478	Accuracy: 55.33%
2	Validation loss: 0.954706	Best loss: 0.954706	Accuracy: 64.67%
3	Validation loss: 0.941471	Best loss: 0.941471	Accuracy: 66.67%
4	Validation loss: 0.930525	Best loss: 0.930525	Accuracy: 66.67%
5	Validation loss: 0.912695	Best loss: 0.912695	Accuracy: 71.33%
6	Validation loss: 0.920844	Best loss: 0.912695	Accuracy: 65.33%
7	Validation loss: 0.890324	Best loss: 0.890324	Accuracy: 69.33%
8	Validation loss: 0.916895	Best loss: 0.890324	Accuracy: 64.67%
9	Validation loss: 0.965269	Best loss: 0.890324	Accuracy: 62.67%
10	Validation loss: 0.875946	Best loss: 0.875946	Accuracy: 67.33%
11	Validation loss: 0.953211	Best loss: 0.875946	Accuracy: 65.33%
12	Validation loss: 0.894069	Best loss: 0.875946	Accuracy: 66.67%
13	Validation loss: 0.890609	Best loss: 0.875946	Accuracy: 64.67%
14	Validation l

<p>As we can see, not so good...But of course, we're using 100 images per digit and we only changed the output layer.</p>

In [4]:
# Let's try to reuse only 4 hidden layers instead of 5
n_outputs = 5

restore_saver = tf.train.import_meta_graph("./my_best_mnist_model_0_to_4.meta")

X = tf.get_default_graph().get_tensor_by_name("X:0")
y = tf.get_default_graph().get_tensor_by_name("y:0")

hidden4_out = tf.get_default_graph().get_tensor_by_name("hidden4_out:0")
logits = tf.layers.dense(hidden4_out, n_outputs, kernel_initializer=he_init, name="new_logits")
Y_proba = tf.nn.softmax(logits)
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
loss = tf.reduce_mean(xentropy)
correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")

In [5]:
learning_rate = 0.01

output_layer_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="new_logits")
optimizer = tf.train.AdamOptimizer(learning_rate, name="Adam2")
training_op = optimizer.minimize(loss, var_list=output_layer_vars)

init = tf.global_variables_initializer()
four_frozen_saver = tf.train.Saver()

In [12]:
n_epochs = 1000
batch_size = 20

max_checks_without_progress = 20
checks_without_progress = 0
best_loss = np.infty

with tf.Session() as sess:
    init.run()
    restore_saver.restore(sess, "./my_best_mnist_model_0_to_4")
        
    for epoch in range(n_epochs):
        rnd_idx = np.random.permutation(len(X_train2))
        for rnd_indices in np.array_split(rnd_idx, len(X_train2) // batch_size):
            X_batch, y_batch = X_train2[rnd_indices], y_train2[rnd_indices]
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        loss_val, acc_val = sess.run([loss, accuracy], feed_dict={X: X_valid2, y: y_valid2})
        if loss_val < best_loss:
            save_path = four_frozen_saver.save(sess, "./my_mnist_model_5_to_9_four_frozen")
            best_loss = loss_val
            checks_without_progress = 0
        else:
            checks_without_progress += 1
            if checks_without_progress > max_checks_without_progress:
                print("Early stopping!")
                break
        print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(
            epoch, loss_val, best_loss, acc_val * 100))

with tf.Session() as sess:
    four_frozen_saver.restore(sess, "./my_mnist_model_5_to_9_four_frozen")
    acc_test = accuracy.eval(feed_dict={X: X_test2, y: y_test2})
    print("Final test accuracy: {:.2f}%".format(acc_test * 100))

INFO:tensorflow:Restoring parameters from ./my_best_mnist_model_0_to_4
0	Validation loss: 1.303004	Best loss: 1.303004	Accuracy: 52.00%
1	Validation loss: 1.046397	Best loss: 1.046397	Accuracy: 54.00%
2	Validation loss: 0.990561	Best loss: 0.990561	Accuracy: 57.33%
3	Validation loss: 0.943077	Best loss: 0.943077	Accuracy: 61.33%
4	Validation loss: 0.924572	Best loss: 0.924572	Accuracy: 61.33%
5	Validation loss: 0.889116	Best loss: 0.889116	Accuracy: 63.33%
6	Validation loss: 0.896398	Best loss: 0.889116	Accuracy: 64.00%
7	Validation loss: 0.858603	Best loss: 0.858603	Accuracy: 66.67%
8	Validation loss: 0.873803	Best loss: 0.858603	Accuracy: 69.33%
9	Validation loss: 0.841726	Best loss: 0.841726	Accuracy: 68.00%
10	Validation loss: 0.862488	Best loss: 0.841726	Accuracy: 67.33%
11	Validation loss: 0.821854	Best loss: 0.821854	Accuracy: 70.67%
12	Validation loss: 0.809741	Best loss: 0.809741	Accuracy: 71.33%
13	Validation loss: 0.810555	Best loss: 0.809741	Accuracy: 70.00%
14	Validation l

<p>Well, a bit better...</p>

In [14]:
# Let's try now to unfreeze the last two layers
learning_rate = 0.01

unfrozen_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="hidden[34]|new_logits")
optimizer = tf.train.AdamOptimizer(learning_rate, name="Adam3")
training_op = optimizer.minimize(loss, var_list=unfrozen_vars)

init = tf.global_variables_initializer()
two_frozen_saver = tf.train.Saver()

In [15]:
n_epochs = 1000
batch_size = 20

max_checks_without_progress = 20
checks_without_progress = 0
best_loss = np.infty

with tf.Session() as sess:
    init.run()
    four_frozen_saver.restore(sess, "./my_mnist_model_5_to_9_four_frozen")
        
    for epoch in range(n_epochs):
        rnd_idx = np.random.permutation(len(X_train2))
        for rnd_indices in np.array_split(rnd_idx, len(X_train2) // batch_size):
            X_batch, y_batch = X_train2[rnd_indices], y_train2[rnd_indices]
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        loss_val, acc_val = sess.run([loss, accuracy], feed_dict={X: X_valid2, y: y_valid2})
        if loss_val < best_loss:
            save_path = two_frozen_saver.save(sess, "./my_mnist_model_5_to_9_two_frozen")
            best_loss = loss_val
            checks_without_progress = 0
        else:
            checks_without_progress += 1
            if checks_without_progress > max_checks_without_progress:
                print("Early stopping!")
                break
        print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(
            epoch, loss_val, best_loss, acc_val * 100))

with tf.Session() as sess:
    two_frozen_saver.restore(sess, "./my_mnist_model_5_to_9_two_frozen")
    acc_test = accuracy.eval(feed_dict={X: X_test2, y: y_test2})
    print("Final test accuracy: {:.2f}%".format(acc_test * 100))

INFO:tensorflow:Restoring parameters from ./my_mnist_model_5_to_9_four_frozen
0	Validation loss: 0.853785	Best loss: 0.853785	Accuracy: 72.67%
1	Validation loss: 0.870564	Best loss: 0.853785	Accuracy: 67.33%
2	Validation loss: 0.652311	Best loss: 0.652311	Accuracy: 76.00%
3	Validation loss: 0.698115	Best loss: 0.652311	Accuracy: 78.00%
4	Validation loss: 0.690634	Best loss: 0.652311	Accuracy: 75.33%
5	Validation loss: 0.768487	Best loss: 0.652311	Accuracy: 77.33%
6	Validation loss: 0.848543	Best loss: 0.652311	Accuracy: 80.00%
7	Validation loss: 0.697138	Best loss: 0.652311	Accuracy: 83.33%
8	Validation loss: 0.712297	Best loss: 0.652311	Accuracy: 84.67%
9	Validation loss: 0.779837	Best loss: 0.652311	Accuracy: 80.00%
10	Validation loss: 0.833671	Best loss: 0.652311	Accuracy: 81.33%
11	Validation loss: 0.803134	Best loss: 0.652311	Accuracy: 82.00%
12	Validation loss: 0.954865	Best loss: 0.652311	Accuracy: 78.67%
13	Validation loss: 0.921988	Best loss: 0.652311	Accuracy: 80.00%
14	Valid

<p>Not bad...And what if we unfreeze all the layers?</p>

In [16]:
learning_rate = 0.01

optimizer = tf.train.AdamOptimizer(learning_rate, name="Adam4")
training_op = optimizer.minimize(loss)

init = tf.global_variables_initializer()
no_frozen_saver = tf.train.Saver()

In [17]:
n_epochs = 1000
batch_size = 20

max_checks_without_progress = 20
checks_without_progress = 0
best_loss = np.infty

with tf.Session() as sess:
    init.run()
    two_frozen_saver.restore(sess, "./my_mnist_model_5_to_9_two_frozen")
        
    for epoch in range(n_epochs):
        rnd_idx = np.random.permutation(len(X_train2))
        for rnd_indices in np.array_split(rnd_idx, len(X_train2) // batch_size):
            X_batch, y_batch = X_train2[rnd_indices], y_train2[rnd_indices]
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        loss_val, acc_val = sess.run([loss, accuracy], feed_dict={X: X_valid2, y: y_valid2})
        if loss_val < best_loss:
            save_path = no_frozen_saver.save(sess, "./my_mnist_model_5_to_9_no_frozen")
            best_loss = loss_val
            checks_without_progress = 0
        else:
            checks_without_progress += 1
            if checks_without_progress > max_checks_without_progress:
                print("Early stopping!")
                break
        print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(
            epoch, loss_val, best_loss, acc_val * 100))

with tf.Session() as sess:
    no_frozen_saver.restore(sess, "./my_mnist_model_5_to_9_no_frozen")
    acc_test = accuracy.eval(feed_dict={X: X_test2, y: y_test2})
    print("Final test accuracy: {:.2f}%".format(acc_test * 100))

INFO:tensorflow:Restoring parameters from ./my_mnist_model_5_to_9_two_frozen
0	Validation loss: 0.753302	Best loss: 0.753302	Accuracy: 79.33%
1	Validation loss: 0.518203	Best loss: 0.518203	Accuracy: 87.33%
2	Validation loss: 0.449906	Best loss: 0.449906	Accuracy: 89.33%
3	Validation loss: 0.387903	Best loss: 0.387903	Accuracy: 92.00%
4	Validation loss: 0.649829	Best loss: 0.387903	Accuracy: 88.00%
5	Validation loss: 0.645186	Best loss: 0.387903	Accuracy: 91.33%
6	Validation loss: 1.225373	Best loss: 0.387903	Accuracy: 88.67%
7	Validation loss: 1.132690	Best loss: 0.387903	Accuracy: 88.67%
8	Validation loss: 0.816616	Best loss: 0.387903	Accuracy: 90.00%
9	Validation loss: 0.769069	Best loss: 0.387903	Accuracy: 90.67%
10	Validation loss: 0.990865	Best loss: 0.387903	Accuracy: 89.33%
11	Validation loss: 1.153069	Best loss: 0.387903	Accuracy: 89.33%
12	Validation loss: 1.028986	Best loss: 0.387903	Accuracy: 90.00%
13	Validation loss: 1.057120	Best loss: 0.387903	Accuracy: 92.00%
14	Valida

In [20]:
# Let's compare this result with a DNN trained from scratch
dnn_clf_5_to_9 = DNNClassifier(n_hidden_layers=4, random_state=42)
dnn_clf_5_to_9.fit(X_train2, y_train2, n_epochs=1000, X_valid=X_valid2, y_valid=y_valid2)

0	Validation loss: 0.803556	Best loss: 0.803556	Accuracy: 71.33%
1	Validation loss: 0.966740	Best loss: 0.803556	Accuracy: 85.33%
2	Validation loss: 1.158966	Best loss: 0.803556	Accuracy: 78.00%
3	Validation loss: 0.615953	Best loss: 0.615953	Accuracy: 88.00%
4	Validation loss: 0.612615	Best loss: 0.612615	Accuracy: 92.00%
5	Validation loss: 0.686583	Best loss: 0.612615	Accuracy: 89.33%
6	Validation loss: 0.804757	Best loss: 0.612615	Accuracy: 89.33%
7	Validation loss: 0.748284	Best loss: 0.612615	Accuracy: 88.00%
8	Validation loss: 0.947906	Best loss: 0.612615	Accuracy: 84.00%
9	Validation loss: 1.652254	Best loss: 0.612615	Accuracy: 89.33%
10	Validation loss: 0.982898	Best loss: 0.612615	Accuracy: 92.67%
11	Validation loss: 1.143990	Best loss: 0.612615	Accuracy: 90.00%
12	Validation loss: 1.167457	Best loss: 0.612615	Accuracy: 92.00%
13	Validation loss: 1.120672	Best loss: 0.612615	Accuracy: 92.00%
14	Validation loss: 1.773683	Best loss: 0.612615	Accuracy: 82.00%
15	Validation loss: 

DNNClassifier(activation=<function elu at 0x000001D68D09B1E0>,
       batch_norm_momentum=None, batch_size=20, dropout_rate=None,
       initializer=<function variance_scaling_initializer.<locals>._initializer at 0x000001D68DFD1268>,
       learning_rate=0.01, n_hidden_layers=4, n_neurons=100,
       optimizer_class=<class 'tensorflow.python.training.adam.AdamOptimizer'>,
       random_state=42)

In [22]:
from sklearn.metrics import accuracy_score

y_pred = dnn_clf_5_to_9.predict(X_test2)
accuracy_score(y_test2, y_pred)

0.8996091339230611

<p>Unfortunately in this case transfer learning did not help too much.</p>