In [1]:
import numpy as np

def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)
    
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt

Xavier and He Initialization

In [2]:
import tensorflow as tf

In [3]:
n_inputs = 28 * 28  # MNIST dataset
n_hidden1 = 300

X = tf.placeholder(tf.float32, shape = (None, n_inputs), name = "X")

In [4]:
he_init = tf.variance_scaling_initializer()
hidden1 = tf.layers.dense(X, n_hidden1, activation = tf.nn.relu, kernel_initializer = he_init, name = "hidden1")

Instructions for updating:
Use keras.layers.dense instead.
Instructions for updating:
Colocations handled automatically by placer.


Nonsaturating Activation Functions

Leaky ReLU implementation in TensorFlow

In [5]:
X = tf.placeholder(tf.float32, shape = (None, n_inputs), name = "X")

In [6]:
def leaky_relu(z, name=None):
    return tf.maximum(0.01 * z, z, name = name)

hidden1 = tf.layers.dense(X, n_hidden1, activation = leaky_relu, name = "hidden1")

ValueError: Variable hidden1/kernel already exists, disallowed. Did you mean to set reuse=True or reuse=tf.AUTO_REUSE in VarScope? Originally defined at:

  File "C:\Users\manog\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 1801, in __init__
    self._traceback = tf_stack.extract_stack()
  File "C:\Users\manog\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 3300, in create_op
    op_def=op_def)
  File "C:\Users\manog\Anaconda3\lib\site-packages\tensorflow\python\util\deprecation.py", line 507, in new_func
    return func(*args, **kwargs)


ELU implementation in TensorFlow

In [7]:
X = tf.placeholder(tf.float32, shape = (None, n_inputs), name = "X")

In [8]:
hidden1 = tf.layers.dense(X, n_hidden1, activation = tf.nn.elu, name = "hidden1")

ValueError: Variable hidden1/kernel already exists, disallowed. Did you mean to set reuse=True or reuse=tf.AUTO_REUSE in VarScope? Originally defined at:

  File "C:\Users\manog\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 1801, in __init__
    self._traceback = tf_stack.extract_stack()
  File "C:\Users\manog\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 3300, in create_op
    op_def=op_def)
  File "C:\Users\manog\Anaconda3\lib\site-packages\tensorflow\python\util\deprecation.py", line 507, in new_func
    return func(*args, **kwargs)


Batch Normalization

In order to run batch norm just before each hidden layer's activation function, we apply the ELU activation function manually, right after the batch norm layer.

In [9]:
import tensorflow as tf

n_inputs = 28 * 28
n_hidden1 = 300
n_hidden2 = 100
n_outputs = 10

X = tf.placeholder(tf.float32, shape = (None, n_inputs), name = "X")

training = tf.placeholder_with_default(False, shape = (), name = 'training')

hidden_1 = tf.layers.dense(X, n_hidden1, name = "hidden_1")
bn1 = tf.layers.batch_normalization(hidden_1, training = training, momentum = 0.9)
bn1_act = tf.nn.elu(bn1)

hidden_2 = tf.layers.dense(bn1_act, n_hidden2, name = "hidden_2")
bn2 = tf.layers.batch_normalization(hidden_2, training = training, momentum = 0.9)
bn2_act = tf.nn.elu(bn2)

logits_before_bn = tf.layers.dense(bn2_act, n_outputs, name = "outputs")
logits = tf.layers.batch_normalization(logits_before_bn, training = training, momentum = 0.9)

Instructions for updating:
Use keras.layers.batch_normalization instead.


In [10]:
X = tf.placeholder(tf.float32, shape = (None, n_inputs), name = "X")
training = tf.placeholder_with_default(False, shape = (), name = 'training')

To avoid repeating the same parameters over and over again, we will use Python's partial() function.

In [11]:
from functools import partial

my_batch_norm_layer = partial(tf.layers.batch_normalization, training = training, momentum = 0.9)

Hidden1 = tf.layers.dense(X, n_hidden1, name="Hidden1")
bn1 = my_batch_norm_layer(Hidden1)
bn1_act = tf.nn.elu(bn1)
Hidden2 = tf.layers.dense(bn1_act, n_hidden2, name="Hidden2")
bn2 = my_batch_norm_layer(Hidden2)
bn2_act = tf.nn.elu(bn2)
logits_before_bn = tf.layers.dense(bn2_act, n_outputs, name="output")
logits = my_batch_norm_layer(logits_before_bn)

Now, we build a neural net for MNIST, using the ELU activation function and Batch Normalization at each layer.

In [12]:
reset_graph()

batch_norm_momentum = 0.9

X = tf.placeholder(tf.float32, shape = (None, n_inputs), name = "X")
y = tf.placeholder(tf.int32, shape = (None), name = "y")
training = tf.placeholder_with_default(False, shape = (), name = 'training')

with tf.name_scope("dnn"):
    he_init = tf.variance_scaling_initializer()

    my_batch_norm_layer = partial(tf.layers.batch_normalization, training = training, momentum = batch_norm_momentum)

    my_dense_layer = partial(tf.layers.dense, kernel_initializer = he_init)

    Hidden1 = my_dense_layer(X, n_hidden1, name = "Hidden1")
    bn1 = tf.nn.elu(my_batch_norm_layer(Hidden1))
    Hidden2 = my_dense_layer(bn1, n_hidden2, name = "Hidden2")
    bn2 = tf.nn.elu(my_batch_norm_layer(Hidden2))
    logits_before_bn = my_dense_layer(bn2, n_outputs, name = "output")
    logits = my_batch_norm_layer(logits_before_bn)

with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels = y, logits = logits)
    loss = tf.reduce_mean(xentropy, name = "loss")

with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate = 0.01)
    training_op = optimizer.minimize(loss)

with tf.name_scope("eval"):
    correct = tf.cast(tf.nn.in_top_k(logits, y, 1), tf.float32)
    accuracy = tf.reduce_mean(correct)
    
init = tf.global_variables_initializer()
saver = tf.train.Saver()

Instructions for updating:
Use tf.cast instead.


In [13]:
n_epochs = 20
batch_size = 200

In [14]:
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
X_train = X_train.astype(np.float32).reshape(-1, 28*28) / 255.0
X_test = X_test.astype(np.float32).reshape(-1, 28*28) / 255.0
y_train = y_train.astype(np.int32)
y_test = y_test.astype(np.int32)
X_valid, X_train = X_train[:5000], X_train[5000:]
y_valid, y_train = y_train[:5000], y_train[5000:]

In [15]:
def shuffle_batch(X, y, batch_size):
    rnd_idx = np.random.permutation(len(X))
    n_batches = len(X) // batch_size
    for batch_idx in np.array_split(rnd_idx, n_batches):
        X_batch, y_batch = X[batch_idx], y[batch_idx]
        yield X_batch, y_batch

In [16]:
extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run([training_op, extra_update_ops], feed_dict = {training: True, X: X_batch, y: y_batch})
        accuracy_val = accuracy.eval(feed_dict = {X: X_valid, y: y_valid})
        print(epoch, "Validation accuracy:", accuracy_val)

    save_path = saver.save(sess, "./my_model_final.ckpt")

0 Validation accuracy: 0.8952
1 Validation accuracy: 0.9202
2 Validation accuracy: 0.9318
3 Validation accuracy: 0.9422
4 Validation accuracy: 0.9468
5 Validation accuracy: 0.954
6 Validation accuracy: 0.9568
7 Validation accuracy: 0.96
8 Validation accuracy: 0.962
9 Validation accuracy: 0.9638
10 Validation accuracy: 0.9662
11 Validation accuracy: 0.9682
12 Validation accuracy: 0.9672
13 Validation accuracy: 0.9696
14 Validation accuracy: 0.9706
15 Validation accuracy: 0.9704
16 Validation accuracy: 0.9718
17 Validation accuracy: 0.9726
18 Validation accuracy: 0.9738
19 Validation accuracy: 0.9742


Gradient Clipping

Now, we create a simple neural net for MNIST and add gradient clipping.

In [17]:
reset_graph()

n_inputs = 28*28  # MNIST
n_hidden1 = 300
n_hidden2 = 50
n_hidden3 = 50
n_hidden4 = 50
n_hidden5 = 50
n_outputs = 10

X = tf.placeholder(tf.float32, shape = (None, n_inputs), name = "X")
y = tf.placeholder(tf.int32, shape = (None), name = "y")

with tf.name_scope("dnn"):
    hidden1 = tf.layers.dense(X, n_hidden1, activation = tf.nn.relu, name = "hidden1")
    hidden2 = tf.layers.dense(hidden1, n_hidden2, activation = tf.nn.relu, name = "hidden2")
    hidden3 = tf.layers.dense(hidden2, n_hidden3, activation = tf.nn.relu, name = "hidden3")
    hidden4 = tf.layers.dense(hidden3, n_hidden4, activation = tf.nn.relu, name = "hidden4")
    hidden5 = tf.layers.dense(hidden4, n_hidden5, activation = tf.nn.relu, name = "hidden5")
    logits = tf.layers.dense(hidden5, n_outputs, name = "outputs")

with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels = y, logits = logits)
    loss = tf.reduce_mean(xentropy, name = "loss")

In [18]:
learning_rate = 0.01

Now, we apply gradient clipping. For this, we need to get the gradients, use the clip_by_value() function to clip them, then apply them.

In [19]:
threshold = 1.0

optimizer = tf.train.GradientDescentOptimizer(learning_rate)
grads_and_vars = optimizer.compute_gradients(loss)
capped_gvs = [(tf.clip_by_value(grad, -threshold, threshold), var) for grad, var in grads_and_vars]
training_op = optimizer.apply_gradients(capped_gvs)

In [20]:
with tf.name_scope("eval"):
    correct = tf.cast(tf.nn.in_top_k(logits, y, 1), tf.float32)
    accuracy = tf.reduce_mean(correct, name="accuracy")

In [21]:
init = tf.global_variables_initializer()
saver = tf.train.Saver()

In [22]:
n_epochs = 20
batch_size = 200

In [23]:
with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run(training_op, feed_dict = {X: X_batch, y: y_batch})
        accuracy_val = accuracy.eval(feed_dict = {X: X_valid, y: y_valid})
        print(epoch, "Validation accuracy:", accuracy_val)

    save_path = saver.save(sess, "./my_model_final.ckpt")

0 Validation accuracy: 0.2878
1 Validation accuracy: 0.7942
2 Validation accuracy: 0.8796
3 Validation accuracy: 0.9064
4 Validation accuracy: 0.9162
5 Validation accuracy: 0.922
6 Validation accuracy: 0.929
7 Validation accuracy: 0.9356
8 Validation accuracy: 0.9382
9 Validation accuracy: 0.9418
10 Validation accuracy: 0.9458
11 Validation accuracy: 0.9472
12 Validation accuracy: 0.9476
13 Validation accuracy: 0.9536
14 Validation accuracy: 0.9568
15 Validation accuracy: 0.9566
16 Validation accuracy: 0.9576
17 Validation accuracy: 0.9592
18 Validation accuracy: 0.9622
19 Validation accuracy: 0.9612


Faster Optimizers

Momentum optimization

In [24]:
optimizer = tf.train.MomentumOptimizer(learning_rate = learning_rate, momentum = 0.9)

Nesterov Accelerated Gradient

In [25]:
optimizer = tf.train.MomentumOptimizer(learning_rate = learning_rate, momentum = 0.9, use_nesterov = True)

AdaGrad optimization

In [26]:
optimizer = tf.train.AdagradOptimizer(learning_rate = learning_rate)

RMSProp optimization

In [27]:
optimizer = tf.train.RMSPropOptimizer(learning_rate = learning_rate, momentum = 0.9, decay = 0.9, epsilon = 1e-10)

Adam optimization

In [28]:
optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate)

Learning Rate Scheduling

In [29]:
reset_graph()

n_inputs = 28*28  # MNIST
n_hidden1 = 300
n_hidden2 = 50
n_outputs = 10

X = tf.placeholder(tf.float32, shape = (None, n_inputs), name = "X")
y = tf.placeholder(tf.int32, shape = (None), name = "y")

with tf.name_scope("dnn"):
    hidden1 = tf.layers.dense(X, n_hidden1, activation = tf.nn.relu, name = "hidden1")
    hidden2 = tf.layers.dense(hidden1, n_hidden2, activation = tf.nn.relu, name = "hidden2")
    logits = tf.layers.dense(hidden2, n_outputs, name = "outputs")

with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels = y, logits = logits)
    loss = tf.reduce_mean(xentropy, name = "loss")

with tf.name_scope("eval"):
    correct = tf.cast(tf.nn.in_top_k(logits, y, 1), tf.float32)
    accuracy = tf.reduce_mean(correct, name="accuracy")

In [30]:
with tf.name_scope("train"):
    initial_learning_rate = 0.1
    decay_steps = 10000
    decay_rate = 1/10
    global_step = tf.Variable(0, trainable = False, name = "global_step")
    learning_rate = tf.train.exponential_decay(initial_learning_rate, global_step, decay_steps, decay_rate)
    optimizer = tf.train.MomentumOptimizer(learning_rate, momentum = 0.9)
    training_op = optimizer.minimize(loss, global_step = global_step)

In [31]:
init = tf.global_variables_initializer()
saver = tf.train.Saver()

In [32]:
n_epochs = 10
batch_size = 50

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run(training_op, feed_dict = {X: X_batch, y: y_batch})
        accuracy_val = accuracy.eval(feed_dict = {X: X_valid, y: y_valid})
        print(epoch, "Validation accuracy:", accuracy_val)

    save_path = saver.save(sess, "./my_model_final.ckpt")

0 Validation accuracy: 0.9656
1 Validation accuracy: 0.9724
2 Validation accuracy: 0.9746
3 Validation accuracy: 0.9812
4 Validation accuracy: 0.9824
5 Validation accuracy: 0.9846
6 Validation accuracy: 0.984
7 Validation accuracy: 0.9848
8 Validation accuracy: 0.9846
9 Validation accuracy: 0.9842


Avoiding Overfitting Through Regularization

$\ell_1$ and $\ell_2$ regularization

Now, we will implement $\ell_1$ regularization manually. First, we create the model, as usual (with just one hidden layer this time, for simplicity).

In [33]:
reset_graph()

n_inputs = 28*28  # MNIST
n_hidden1 = 300
n_outputs = 10

X = tf.placeholder(tf.float32, shape = (None, n_inputs), name = "X")
y = tf.placeholder(tf.int32, shape = (None), name = "y")

with tf.name_scope("dnn"):
    hidden1 = tf.layers.dense(X, n_hidden1, activation = tf.nn.relu, name = "hidden1")
    logits = tf.layers.dense(hidden1, n_outputs, name = "outputs")

Next, we get a handle on the layer weights, and we compute the total loss, which is equal to the sum of the usual cross entropy loss and the $\ell_1$ loss (i.e., the absolute values of the weights).

In [34]:
W1 = tf.get_default_graph().get_tensor_by_name("hidden1/kernel:0")
W2 = tf.get_default_graph().get_tensor_by_name("outputs/kernel:0")

scale = 0.001    #l1 regularization hyperparameter

with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels = y, logits = logits)
    base_loss = tf.reduce_mean(xentropy, name = "avg_xentropy")
    reg_losses = tf.reduce_sum(tf.abs(W1)) + tf.reduce_sum(tf.abs(W2))
    loss = tf.add(base_loss, scale * reg_losses, name = "loss")

In [35]:
with tf.name_scope("eval"):
    correct = tf.cast(tf.nn.in_top_k(logits, y, 1), tf.float32)
    accuracy = tf.reduce_mean(correct, name="accuracy")

learning_rate = 0.01

with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)

init = tf.global_variables_initializer()
saver = tf.train.Saver()

In [36]:
n_epochs = 20
batch_size = 200

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run(training_op, feed_dict = {X: X_batch, y: y_batch})
        accuracy_val = accuracy.eval(feed_dict = {X: X_valid, y: y_valid})
        print(epoch, "Validation accuracy:", accuracy_val)

    save_path = saver.save(sess, "./my_model_final.ckpt")

0 Validation accuracy: 0.831
1 Validation accuracy: 0.871
2 Validation accuracy: 0.8838
3 Validation accuracy: 0.8934
4 Validation accuracy: 0.8966
5 Validation accuracy: 0.8988
6 Validation accuracy: 0.9016
7 Validation accuracy: 0.9044
8 Validation accuracy: 0.9058
9 Validation accuracy: 0.906
10 Validation accuracy: 0.9068
11 Validation accuracy: 0.9054
12 Validation accuracy: 0.907
13 Validation accuracy: 0.9084
14 Validation accuracy: 0.9088
15 Validation accuracy: 0.9064
16 Validation accuracy: 0.9066
17 Validation accuracy: 0.9066
18 Validation accuracy: 0.9066
19 Validation accuracy: 0.9052


Dropout

In [37]:
reset_graph()

X = tf.placeholder(tf.float32, shape = (None, n_inputs), name = "X")
y = tf.placeholder(tf.int32, shape = (None), name = "y")

In [38]:
training = tf.placeholder_with_default(False, shape = (), name = 'training')

dropout_rate = 0.5     #equal to (1 - keep_prob)
X_drop = tf.layers.dropout(X, dropout_rate, training = training)

with tf.name_scope("dnn"):
    hidden1 = tf.layers.dense(X_drop, n_hidden1, activation = tf.nn.relu, name = "hidden1")
    hidden1_drop = tf.layers.dropout(hidden1, dropout_rate, training = training)
    hidden2 = tf.layers.dense(hidden1_drop, n_hidden2, activation = tf.nn.relu, name = "hidden2")
    hidden2_drop = tf.layers.dropout(hidden2, dropout_rate, training = training)
    logits = tf.layers.dense(hidden2_drop, n_outputs, name = "outputs")

Instructions for updating:
Use keras.layers.dropout instead.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [39]:
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels = y, logits = logits)
    loss = tf.reduce_mean(xentropy, name = "loss")

with tf.name_scope("train"):
    optimizer = tf.train.MomentumOptimizer(learning_rate, momentum = 0.9)
    training_op = optimizer.minimize(loss)

with tf.name_scope("eval"):
    correct = tf.cast(tf.nn.in_top_k(logits, y, 1), tf.float32)
    accuracy = tf.reduce_mean(correct)
    
init = tf.global_variables_initializer()
saver = tf.train.Saver()

In [40]:
n_epochs = 20
batch_size = 50

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run(training_op, feed_dict = {X: X_batch, y: y_batch, training: True})
        accuracy_val = accuracy.eval(feed_dict = {X: X_valid, y: y_valid})
        print(epoch, "Validation accuracy:", accuracy_val)

    save_path = saver.save(sess, "./my_model_final.ckpt")

0 Validation accuracy: 0.9264
1 Validation accuracy: 0.9464
2 Validation accuracy: 0.9518
3 Validation accuracy: 0.9554
4 Validation accuracy: 0.9592
5 Validation accuracy: 0.963
6 Validation accuracy: 0.9618
7 Validation accuracy: 0.965
8 Validation accuracy: 0.971
9 Validation accuracy: 0.9686
10 Validation accuracy: 0.9706
11 Validation accuracy: 0.9714
12 Validation accuracy: 0.9692
13 Validation accuracy: 0.9712
14 Validation accuracy: 0.9724
15 Validation accuracy: 0.9704
16 Validation accuracy: 0.9728
17 Validation accuracy: 0.973
18 Validation accuracy: 0.973
19 Validation accuracy: 0.976


Max-Norm Regularization

In [41]:
def max_norm_regularizer(threshold, axes = 1, name = "max_norm", collection = "max_norm"):
    def max_norm(weights):
        clipped = tf.clip_by_norm(weights, clip_norm = threshold, axes = axes)
        clip_weights = tf.assign(weights, clipped, name = name)
        tf.add_to_collection(collection, clip_weights)
        return None     #there is no regularization loss term
    return max_norm

Then we can call this function to get a max norm regularizer (with the threshold we want). When we create a hidden layer, we can pass this regularizer to the kernel_regularizer argument.

In [42]:
reset_graph()

n_inputs = 28*28
n_hidden1 = 300
n_hidden2 = 50
n_outputs = 10

learning_rate = 0.01
momentum = 0.9

X = tf.placeholder(tf.float32, shape = (None, n_inputs), name = "X")
y = tf.placeholder(tf.int32, shape= (None), name = "y")

In [43]:
max_norm_reg = max_norm_regularizer(threshold = 1.0)

with tf.name_scope("dnn"):
    hidden1 = tf.layers.dense(X, n_hidden1, activation = tf.nn.relu, kernel_regularizer = max_norm_reg, name = "hidden1")
    hidden2 = tf.layers.dense(hidden1, n_hidden2, activation = tf.nn.relu, kernel_regularizer = max_norm_reg, name = "hidden2")
    logits = tf.layers.dense(hidden2, n_outputs, name = "outputs")

In [44]:
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels = y, logits = logits)
    loss = tf.reduce_mean(xentropy, name = "loss")

with tf.name_scope("train"):
    optimizer = tf.train.MomentumOptimizer(learning_rate, momentum)
    training_op = optimizer.minimize(loss)

with tf.name_scope("eval"):
    correct = tf.cast(tf.nn.in_top_k(logits, y, 1), tf.float32)
    accuracy = tf.reduce_mean(correct)

init = tf.global_variables_initializer()
saver = tf.train.Saver()

In [45]:
n_epochs = 20
batch_size = 50

In [46]:
clip_all_weights = tf.get_collection("max_norm")

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run(training_op, feed_dict = {X: X_batch, y: y_batch})
            sess.run(clip_all_weights)
        acc_valid = accuracy.eval(feed_dict = {X: X_valid, y: y_valid})
        print(epoch, "Validation accuracy:", acc_valid)

    save_path = saver.save(sess, "./my_model_final.ckpt")

0 Validation accuracy: 0.9558
1 Validation accuracy: 0.97
2 Validation accuracy: 0.9732
3 Validation accuracy: 0.9756
4 Validation accuracy: 0.9766
5 Validation accuracy: 0.9782
6 Validation accuracy: 0.9808
7 Validation accuracy: 0.9806
8 Validation accuracy: 0.9814
9 Validation accuracy: 0.9812
10 Validation accuracy: 0.9816
11 Validation accuracy: 0.9816
12 Validation accuracy: 0.9812
13 Validation accuracy: 0.9826
14 Validation accuracy: 0.9818
15 Validation accuracy: 0.9816
16 Validation accuracy: 0.9822
17 Validation accuracy: 0.9828
18 Validation accuracy: 0.9824
19 Validation accuracy: 0.983
