https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/udacity/3_regularization.ipynb

# Deep Learning

# Assignment 3

Previously in 2_fullyconnected.ipynb, you trained a logistic regression and a neural network model.

The goal of this assignment is to explore regularization techniques.

In [1]:
# These are all the modules we'll be using later. Make sure you can import them
# before proceeding further.
#from __future__ import print_function
import numpy as np
import tensorflow as tf
from six.moves import cPickle as pickle

First reload the data we generated in 1_notmnist.ipynb.

In [2]:
pickle_file = 'notMNIST.pickle'

with open(pickle_file, 'rb') as f:
  save = pickle.load(f)
  train_dataset = save['train_dataset']
  train_labels = save['train_labels']
  valid_dataset = save['valid_dataset']
  valid_labels = save['valid_labels']
  test_dataset = save['test_dataset']
  test_labels = save['test_labels']
  del save  # hint to help gc free up memory
  print('Training set', train_dataset.shape, train_labels.shape)
  print('Validation set', valid_dataset.shape, valid_labels.shape)
  print('Test set', test_dataset.shape, test_labels.shape)

Training set (200000, 28, 28) (200000,)
Validation set (10000, 28, 28) (10000,)
Test set (10000, 28, 28) (10000,)


Reformat into a shape that's more adapted to the models we're going to train:
- data as a flat matrix,
- labels as float 1-hot encodings.

In [3]:
def accuracy(predictions, labels):
  return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
          / predictions.shape[0])

# Problem 1

Introduce and tune L2 regularization for both logistic and neural network models. Remember that L2 amounts to adding a penalty on the norm of the weights to the loss. In TensorFlow, you can compute the L2 loss for a tensor t using nn.l2_loss(t). The right amount of regularization should improve your validation / test accuracy.

In [4]:
image_size = 28
num_labels = 10

In [5]:
from sklearn.linear_model import LogisticRegression

n = 5000
C_array = [10,1,0.1,0.001]

for C in C_array:
    print ("C: %s" % C)
    model = LogisticRegression(penalty='l2', C=C, multi_class='multinomial', solver='newton-cg')
    train_x = train_dataset[0:n].reshape(n, (image_size*image_size))
    train_y = train_labels[0:n]
    test_x = test_dataset.reshape(len(test_dataset), (image_size*image_size))
    test_y = test_labels
    model.fit(train_x, train_y)
    print ("Training error (%s samples):" % n, model.score(train_x, train_y))
    print ("Test error", model.score(test_x, test_y))
    print (" ")

C: 10
Training error (5000 samples): 0.9976
Test error 0.8174
 
C: 1
Training error (5000 samples): 0.9742
Test error 0.8453
 
C: 0.1
Training error (5000 samples): 0.9138
Test error 0.8734
 
C: 0.001
Training error (5000 samples): 0.8132
Test error 0.8681
 


In [6]:
def reformat(dataset, labels):
  dataset = dataset.reshape((-1, image_size * image_size)).astype(np.float32)
  # Map 0 to [1.0, 0.0, 0.0 ...], 1 to [0.0, 1.0, 0.0 ...]
  labels = (np.arange(num_labels) == labels[:,None]).astype(np.float32)
  return dataset, labels
train_dataset, train_labels = reformat(train_dataset, train_labels)
valid_dataset, valid_labels = reformat(valid_dataset, valid_labels)
test_dataset, test_labels = reformat(test_dataset, test_labels)
print('Training set', train_dataset.shape, train_labels.shape)
print('Validation set', valid_dataset.shape, valid_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)

Training set (200000, 784) (200000, 10)
Validation set (10000, 784) (10000, 10)
Test set (10000, 784) (10000, 10)


In [7]:
batch_size = 128
num_hidden_units = 1024

graph = tf.Graph()
with graph.as_default():

    # Input data. For the training data, we use a placeholder that will be fed
    # at run time with a training minibatch.
    tf_train_dataset = tf.placeholder(tf.float32, shape=(batch_size, image_size * image_size))
    tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
    tf_valid_dataset = tf.constant(valid_dataset)
    tf_test_dataset = tf.constant(test_dataset)

    # Variables.
    w1 = tf.Variable(tf.truncated_normal([image_size * image_size, 1024]))
    b1 = tf.Variable(tf.zeros([num_hidden_units]))
    w2 = tf.Variable(tf.truncated_normal([num_hidden_units, num_labels]))
    b2 = tf.Variable(tf.zeros([num_labels]))

    # Training computation.
    layer_1 = tf.matmul(tf_train_dataset, w1) + b1
    hidden_layer = tf.nn.relu(layer_1)
    layer_2 = tf.matmul(hidden_layer, w2) + b2
    
    l2_penalty = 1e-3
    unregularized_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=tf_train_labels, logits=layer_2))
    l2_loss = l2_penalty*(tf.nn.l2_loss(w1) + tf.nn.l2_loss(w2))
    loss = unregularized_loss + l2_loss

    # Optimizer.
    optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss)

    # Predictions for the training, validation, and test data.
    train_prediction = tf.nn.softmax(layer_2)
    valid_prediction = tf.nn.softmax(tf.matmul(tf.nn.relu(tf.matmul(tf_valid_dataset, w1) + b1), w2) + b2)
    test_prediction = tf.nn.softmax(tf.matmul(tf.nn.relu(tf.matmul(tf_test_dataset, w1) + b1), w2) + b2)

In [8]:
num_steps = 3001

with tf.Session(graph=graph) as session:
  tf.initialize_all_variables().run()
  print("Initialized")
  for step in range(num_steps):
    # Pick an offset within the training data, which has been randomized.
    # Note: we could use better randomization across epochs.
    offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
    # Generate a minibatch.
    batch_data = train_dataset[offset:(offset + batch_size), :]
    batch_labels = train_labels[offset:(offset + batch_size), :]
    # Prepare a dictionary telling the session where to feed the minibatch.
    # The key of the dictionary is the placeholder node of the graph to be fed,
    # and the value is the numpy array to feed to it.
    feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
    _, l, predictions = session.run(
      [optimizer, loss, train_prediction], feed_dict=feed_dict)
    if (step % 500 == 0):
      print("Minibatch loss at step %d: %f" % (step, l))
      print("Minibatch accuracy: %.1f%%" % accuracy(predictions, batch_labels))
      print("Validation accuracy: %.1f%%" % accuracy(
        valid_prediction.eval(), valid_labels))
  print("Test accuracy: %.1f%%" % accuracy(test_prediction.eval(), test_labels))

Initialized
Minibatch loss at step 0: 800.405334
Minibatch accuracy: 6.2%
Validation accuracy: 35.0%
Minibatch loss at step 500: 198.564819
Minibatch accuracy: 80.5%
Validation accuracy: 80.8%
Minibatch loss at step 1000: 117.065598
Minibatch accuracy: 78.1%
Validation accuracy: 81.9%
Minibatch loss at step 1500: 68.562645
Minibatch accuracy: 89.8%
Validation accuracy: 83.4%
Minibatch loss at step 2000: 41.306641
Minibatch accuracy: 93.0%
Validation accuracy: 84.8%
Minibatch loss at step 2500: 25.302708
Minibatch accuracy: 87.5%
Validation accuracy: 85.8%
Minibatch loss at step 3000: 15.562697
Minibatch accuracy: 85.2%
Validation accuracy: 86.7%
Test accuracy: 93.2%


# Problem 2

Let's demonstrate an extreme case of overfitting. Restrict your training data to just a few batches. What happens?

In [9]:
batch_size = 128
num_hidden_units = 1024

graph = tf.Graph()
with graph.as_default():

    # Input data. For the training data, we use a placeholder that will be fed
    # at run time with a training minibatch.
    tf_train_dataset = tf.placeholder(tf.float32, shape=(batch_size, image_size * image_size))
    tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
    tf_valid_dataset = tf.constant(valid_dataset)
    tf_test_dataset = tf.constant(test_dataset)

    # Variables.
    w1 = tf.Variable(tf.truncated_normal([image_size * image_size, 1024]))
    b1 = tf.Variable(tf.zeros([num_hidden_units]))
    w2 = tf.Variable(tf.truncated_normal([num_hidden_units, num_labels]))
    b2 = tf.Variable(tf.zeros([num_labels]))

    # Training computation.
    layer_1 = tf.matmul(tf_train_dataset, w1) + b1
    hidden_layer = tf.nn.relu(layer_1)
    layer_2 = tf.matmul(hidden_layer, w2) + b2
    
    l2_penalty = 1e-3
    unregularized_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=tf_train_labels, logits=layer_2))
    l2_loss = l2_penalty*(tf.nn.l2_loss(w1) + tf.nn.l2_loss(w2))
    loss = unregularized_loss + l2_loss

    # Optimizer.
    optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss)

    # Predictions for the training, validation, and test data.
    train_prediction = tf.nn.softmax(layer_2)
    valid_prediction = tf.nn.softmax(tf.matmul(tf.nn.relu(tf.matmul(tf_valid_dataset, w1) + b1), w2) + b2)
    test_prediction = tf.nn.softmax(tf.matmul(tf.nn.relu(tf.matmul(tf_test_dataset, w1) + b1), w2) + b2)

In [10]:
num_steps = 101
num_batches = 3

with tf.Session(graph=graph) as session:
  tf.initialize_all_variables().run()
  print("Initialized")
  for step in range(num_steps):
    # Pick an offset within the training data, which has been randomized.
    # Note: we could use better randomization across epochs.
    # offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
    offset = step % num_batches
    # Generate a minibatch.
    batch_data = train_dataset[offset:(offset + batch_size), :]
    batch_labels = train_labels[offset:(offset + batch_size), :]
    # Prepare a dictionary telling the session where to feed the minibatch.
    # The key of the dictionary is the placeholder node of the graph to be fed,
    # and the value is the numpy array to feed to it.
    feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
    _, l, predictions = session.run(
      [optimizer, loss, train_prediction], feed_dict=feed_dict)
    if (step % 2 == 0):
      print("Minibatch loss at step %d: %f" % (step, l))
      print("Minibatch accuracy: %.1f%%" % accuracy(predictions, batch_labels))
      print("Validation accuracy: %.1f%%" % accuracy(
        valid_prediction.eval(), valid_labels))
  print("Test accuracy: %.1f%%" % accuracy(test_prediction.eval(), test_labels))

Initialized
Minibatch loss at step 0: 709.333191
Minibatch accuracy: 7.0%
Validation accuracy: 25.0%
Minibatch loss at step 2: 1410.632690
Minibatch accuracy: 39.8%
Validation accuracy: 34.5%
Minibatch loss at step 4: 703.539795
Minibatch accuracy: 47.7%
Validation accuracy: 44.5%
Minibatch loss at step 6: 346.122101
Minibatch accuracy: 89.8%
Validation accuracy: 63.1%
Minibatch loss at step 8: 313.623413
Minibatch accuracy: 99.2%
Validation accuracy: 63.7%
Minibatch loss at step 10: 311.292450
Minibatch accuracy: 100.0%
Validation accuracy: 63.7%
Minibatch loss at step 12: 310.670105
Minibatch accuracy: 100.0%
Validation accuracy: 63.7%
Minibatch loss at step 14: 310.049561
Minibatch accuracy: 100.0%
Validation accuracy: 63.7%
Minibatch loss at step 16: 309.429840
Minibatch accuracy: 100.0%
Validation accuracy: 63.7%
Minibatch loss at step 18: 308.811554
Minibatch accuracy: 100.0%
Validation accuracy: 63.7%
Minibatch loss at step 20: 308.193726
Minibatch accuracy: 100.0%
Validation ac

# Problem 3

Introduce Dropout on the hidden layer of the neural network. Remember: Dropout should only be introduced during training, not evaluation, otherwise your evaluation results would be stochastic as well. TensorFlow provides nn.dropout() for that, but you have to make sure it's only inserted during training.

In [11]:
batch_size = 128
num_hidden_units = 1024

graph = tf.Graph()
with graph.as_default():

    # Input data. For the training data, we use a placeholder that will be fed
    # at run time with a training minibatch.
    tf_train_dataset = tf.placeholder(tf.float32, shape=(batch_size, image_size * image_size))
    tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
    tf_valid_dataset = tf.constant(valid_dataset)
    tf_test_dataset = tf.constant(test_dataset)
    
    keep_prob = tf.placeholder(tf.float32)

    # Variables.
    w1 = tf.Variable(tf.truncated_normal([image_size * image_size, 1024]))
    b1 = tf.Variable(tf.zeros([num_hidden_units]))
    w2 = tf.Variable(tf.truncated_normal([num_hidden_units, num_labels]))
    b2 = tf.Variable(tf.zeros([num_labels]))

    # Training computation.
    layer_1 = tf.matmul(tf_train_dataset, w1) + b1
    hidden_layer = tf.nn.relu(layer_1)
    hidden_layer_with_dropout = tf.nn.dropout(hidden_layer, keep_prob)
    layer_2 = tf.matmul(hidden_layer_with_dropout, w2) + b2
    
    l2_penalty = 1e-3
    unregularized_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=tf_train_labels, logits=layer_2))
    l2_loss = l2_penalty*(tf.nn.l2_loss(w1) + tf.nn.l2_loss(w2))
    loss = unregularized_loss + l2_loss

    # Optimizer.
    optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss)

    # Predictions for the training, validation, and test data.
    train_prediction = tf.nn.softmax(layer_2)
    valid_prediction = tf.nn.softmax(tf.matmul(tf.nn.relu(tf.matmul(tf_valid_dataset, w1) + b1), w2) + b2)
    test_prediction = tf.nn.softmax(tf.matmul(tf.nn.relu(tf.matmul(tf_test_dataset, w1) + b1), w2) + b2)

In [12]:
num_steps = 3001

with tf.Session(graph=graph) as session:
  tf.initialize_all_variables().run()
  print("Initialized")
  for step in range(num_steps):
    # Pick an offset within the training data, which has been randomized.
    # Note: we could use better randomization across epochs.
    offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
    # Generate a minibatch.
    batch_data = train_dataset[offset:(offset + batch_size), :]
    batch_labels = train_labels[offset:(offset + batch_size), :]
    # Prepare a dictionary telling the session where to feed the minibatch.
    # The key of the dictionary is the placeholder node of the graph to be fed,
    # and the value is the numpy array to feed to it.
    feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels, keep_prob : 0.5}
    _, l, predictions = session.run(
      [optimizer, loss, train_prediction], feed_dict=feed_dict)
    if (step % 500 == 0):
      print("Minibatch loss at step %d: %f" % (step, l))
      print("Minibatch accuracy: %.1f%%" % accuracy(predictions, batch_labels))
      print("Validation accuracy: %.1f%%" % accuracy(
        valid_prediction.eval(), valid_labels))
  print("Test accuracy: %.1f%%" % accuracy(test_prediction.eval(), test_labels))

Initialized
Minibatch loss at step 0: 826.076172
Minibatch accuracy: 8.6%
Validation accuracy: 32.9%
Minibatch loss at step 500: 208.884918
Minibatch accuracy: 78.9%
Validation accuracy: 80.0%
Minibatch loss at step 1000: 119.736115
Minibatch accuracy: 76.6%
Validation accuracy: 80.8%
Minibatch loss at step 1500: 70.128761
Minibatch accuracy: 81.2%
Validation accuracy: 81.8%
Minibatch loss at step 2000: 41.356827
Minibatch accuracy: 85.2%
Validation accuracy: 83.4%
Minibatch loss at step 2500: 25.253700
Minibatch accuracy: 79.7%
Validation accuracy: 84.1%
Minibatch loss at step 3000: 15.514812
Minibatch accuracy: 86.7%
Validation accuracy: 84.9%
Test accuracy: 91.8%


What happens to our extreme overfitting case?

In [13]:
num_steps = 101
num_batches = 3

with tf.Session(graph=graph) as session:
  tf.initialize_all_variables().run()
  print("Initialized")
  for step in range(num_steps):
    # Pick an offset within the training data, which has been randomized.
    # Note: we could use better randomization across epochs.
    # offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
    offset = step % num_batches
    # Generate a minibatch.
    batch_data = train_dataset[offset:(offset + batch_size), :]
    batch_labels = train_labels[offset:(offset + batch_size), :]
    # Prepare a dictionary telling the session where to feed the minibatch.
    # The key of the dictionary is the placeholder node of the graph to be fed,
    # and the value is the numpy array to feed to it.
    feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels, keep_prob : 0.5}
    _, l, predictions = session.run(
      [optimizer, loss, train_prediction], feed_dict=feed_dict)
    if (step % 2 == 0):
      print("Minibatch loss at step %d: %f" % (step, l))
      print("Minibatch accuracy: %.1f%%" % accuracy(predictions, batch_labels))
      print("Validation accuracy: %.1f%%" % accuracy(
        valid_prediction.eval(), valid_labels))
  print("Test accuracy: %.1f%%" % accuracy(test_prediction.eval(), test_labels))

Initialized
Minibatch loss at step 0: 801.023071
Minibatch accuracy: 8.6%
Validation accuracy: 30.1%
Minibatch loss at step 2: 1221.412964
Minibatch accuracy: 43.8%
Validation accuracy: 40.8%
Minibatch loss at step 4: 663.739807
Minibatch accuracy: 57.8%
Validation accuracy: 53.1%
Minibatch loss at step 6: 360.492706
Minibatch accuracy: 82.0%
Validation accuracy: 64.6%
Minibatch loss at step 8: 317.308533
Minibatch accuracy: 95.3%
Validation accuracy: 66.8%
Minibatch loss at step 10: 321.388733
Minibatch accuracy: 94.5%
Validation accuracy: 65.0%
Minibatch loss at step 12: 318.195404
Minibatch accuracy: 96.1%
Validation accuracy: 66.5%
Minibatch loss at step 14: 326.480316
Minibatch accuracy: 96.9%
Validation accuracy: 67.9%
Minibatch loss at step 16: 313.340485
Minibatch accuracy: 96.9%
Validation accuracy: 65.7%
Minibatch loss at step 18: 311.364746
Minibatch accuracy: 98.4%
Validation accuracy: 66.9%
Minibatch loss at step 20: 317.171783
Minibatch accuracy: 97.7%
Validation accuracy

# Problem 4

Try to get the best performance you can using a multi-layer model! The best reported test accuracy using a deep network is 97.1%.

One avenue you can explore is to add multiple layers.

Another one is to use learning rate decay:

global_step = tf.Variable(0)  # count the number of steps taken.

learning_rate = tf.train.exponential_decay(0.5, global_step, ...)

optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)

In [14]:
batch_size = 128
num_hidden_layer_1_units = 1024
num_hidden_layer_2_units = 512

graph = tf.Graph()
with graph.as_default():

    # Input data. For the training data, we use a placeholder that will be fed
    # at run time with a training minibatch.
    tf_train_dataset = tf.placeholder(tf.float32, shape=(batch_size, image_size * image_size))
    tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
    tf_valid_dataset = tf.constant(valid_dataset)
    tf_test_dataset = tf.constant(test_dataset)

    # Variables.
    w1 = tf.Variable(tf.truncated_normal([image_size * image_size, num_hidden_layer_1_units], stddev=np.sqrt(2.0 / (image_size * image_size))))
    b1 = tf.Variable(tf.zeros([num_hidden_layer_1_units]))
    w2 = tf.Variable(tf.truncated_normal([num_hidden_layer_1_units, num_hidden_layer_2_units], stddev=np.sqrt(2.0 / num_hidden_layer_1_units)))
    b2 = tf.Variable(tf.zeros([num_hidden_layer_2_units]))
    w3 = tf.Variable(tf.truncated_normal([num_hidden_layer_2_units, num_labels], stddev=np.sqrt(2.0 / num_hidden_layer_2_units)))
    b3 = tf.Variable(tf.zeros([num_labels]))
    global_step = tf.Variable(0) # count the number of steps taken

    # Training computation.
    hidden_layer_1 = tf.nn.dropout(tf.nn.relu(tf.matmul(tf_train_dataset, w1) + b1), keep_prob=0.5)
    hidden_layer_2 = tf.nn.dropout(tf.nn.relu(tf.matmul(hidden_layer_1, w2) + b2), keep_prob=0.5)
    layer_3 = tf.matmul(hidden_layer_2, w3) + b3
    
    l2_penalty = 1e-3
    unregularized_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=tf_train_labels, logits=layer_3))
    l2_loss = l2_penalty*(tf.nn.l2_loss(w1) + tf.nn.l2_loss(w2) + tf.nn.l2_loss(w3))
    loss = unregularized_loss + l2_loss

    # Optimizer.
    learning_rate = tf.train.exponential_decay(0.5, global_step, 10000, 0.7, staircase=True)
    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)

    # Predictions for the training, validation, and test data.
    train_prediction = tf.nn.softmax(layer_3)
    valid_prediction = tf.nn.softmax(tf.matmul(tf.nn.relu(tf.matmul(tf.nn.relu(tf.matmul(tf_valid_dataset, w1) + \
                                                                               b1), w2) + b2), w3) + b3)
    test_prediction = tf.nn.softmax(tf.matmul(tf.nn.relu(tf.matmul(tf.nn.relu(tf.matmul(tf_test_dataset, w1) + \
                                                                              b1), w2) + b2), w3) + b3)

In [15]:
num_steps = 5001

with tf.Session(graph=graph) as session:
  tf.initialize_all_variables().run()
  print("Initialized")
  for step in range(num_steps):
    # Pick an offset within the training data, which has been randomized.
    # Note: we could use better randomization across epochs.
    offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
    # Generate a minibatch.
    batch_data = train_dataset[offset:(offset + batch_size), :]
    batch_labels = train_labels[offset:(offset + batch_size), :]
    # Prepare a dictionary telling the session where to feed the minibatch.
    # The key of the dictionary is the placeholder node of the graph to be fed,
    # and the value is the numpy array to feed to it.
    feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
    _, l, predictions = session.run(
      [optimizer, loss, train_prediction], feed_dict=feed_dict)
    if (step % 500 == 0):
      print("Minibatch loss at step %d: %f" % (step, l))
      print("Minibatch accuracy: %.1f%%" % accuracy(predictions, batch_labels))
      print("Validation accuracy: %.1f%%" % accuracy(
        valid_prediction.eval(), valid_labels))
  print("Test accuracy: %.1f%%" % accuracy(test_prediction.eval(), test_labels))

Initialized
Minibatch loss at step 0: 3.816103
Minibatch accuracy: 9.4%
Validation accuracy: 26.9%
Minibatch loss at step 500: 1.254481
Minibatch accuracy: 86.7%
Validation accuracy: 84.5%
Minibatch loss at step 1000: 1.073093
Minibatch accuracy: 83.6%
Validation accuracy: 85.2%
Minibatch loss at step 1500: 0.758763
Minibatch accuracy: 88.3%
Validation accuracy: 86.4%
Minibatch loss at step 2000: 0.609600
Minibatch accuracy: 91.4%
Validation accuracy: 86.2%
Minibatch loss at step 2500: 0.660078
Minibatch accuracy: 86.7%
Validation accuracy: 86.5%
Minibatch loss at step 3000: 0.685304
Minibatch accuracy: 85.9%
Validation accuracy: 86.5%
Minibatch loss at step 3500: 0.644487
Minibatch accuracy: 81.2%
Validation accuracy: 87.1%
Minibatch loss at step 4000: 0.543210
Minibatch accuracy: 89.1%
Validation accuracy: 87.2%
Minibatch loss at step 4500: 0.539137
Minibatch accuracy: 88.3%
Validation accuracy: 86.8%
Minibatch loss at step 5000: 0.641714
Minibatch accuracy: 85.2%
Validation accuracy