Deep Learning
=============

Assignment 4
------------

Previously in `2_fullyconnected.ipynb` and `3_regularization.ipynb`, we trained fully connected networks to classify [notMNIST](http://yaroslavvb.blogspot.com/2011/09/notmnist-dataset.html) characters.

The goal of this assignment is make the neural network convolutional.

In [1]:
# These are all the modules we'll be using later. Make sure you can import them
# before proceeding further.
from __future__ import print_function
import numpy as np
import tensorflow as tf
from six.moves import cPickle as pickle
from six.moves import range

In [5]:
pickle_file = 'notMNIST.pickle'

with open(pickle_file, 'rb') as f:
  save = pickle.load(f)
  train_dataset = save['train_dataset']
  train_labels = save['train_labels']
  valid_dataset = save['valid_dataset']
  valid_labels = save['valid_labels']
  test_dataset = save['test_dataset']
  test_labels = save['test_labels']
  del save  # hint to help gc free up memory
  print('Training set', train_dataset.shape, train_labels.shape)
  print('Validation set', valid_dataset.shape, valid_labels.shape)
  print('Test set', test_dataset.shape, test_labels.shape)

Training set (200000, 28, 28) (200000,)
Validation set (10000, 28, 28) (10000,)
Test set (10000, 28, 28) (10000,)


Reformat into a TensorFlow-friendly shape:
- convolutions need the image data formatted as a cube (width by height by #channels)
- labels as float 1-hot encodings.

In [6]:
image_size = 28
num_labels = 10
num_channels = 1 # grayscale

import numpy as np

def reformat(dataset, labels):
  dataset = dataset.reshape(
    (-1, image_size, image_size, num_channels)).astype(np.float32)
  labels = (np.arange(num_labels) == labels[:,None]).astype(np.float32)
  return dataset, labels
train_dataset_c, train_labels_c = reformat(train_dataset, train_labels)
valid_dataset_c, valid_labels_c = reformat(valid_dataset, valid_labels)
test_dataset_c, test_labels_c = reformat(test_dataset, test_labels)
print('Training set', train_dataset_c.shape, train_labels_c.shape)
print('Validation set', valid_dataset_c.shape, valid_labels_c.shape)
print('Test set', test_dataset_c.shape, test_labels_c.shape)

Training set (200000, 28, 28, 1) (200000, 10)
Validation set (10000, 28, 28, 1) (10000, 10)
Test set (10000, 28, 28, 1) (10000, 10)


In [7]:
def accuracy(predictions, labels):
  return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
          / predictions.shape[0])

Let's build a small network with two convolutional layers, followed by one fully connected layer. Convolutional networks are more expensive computationally, so we'll limit its depth and number of fully connected nodes.

In [283]:
batch_size = 200
num_hidden = 200
# neural network structure for this sample:
#
# · · · · · · · · · ·    (input data, 1-deep)                 X [batch, 28, 28, 1]
# @ @ @ @ @ @ @ @ @ @ -- conv. layer 6x6x1=>6 stride 1        W1 [6, 6, 1, 6]        B1 [6]
# ∶∶∶∶∶∶∶∶∶∶∶∶∶∶∶∶∶∶∶                                         Y1 [batch, 28, 28, 6]
#   @ @ @ @ @ @ @ @   -- conv. layer 5x5x6=>12 stride 2       W2 [5, 5, 6, 12]        B2 [12]
#   ∶∶∶∶∶∶∶∶∶∶∶∶∶∶∶                                           Y2 [batch, 14, 14, 12]
#     @ @ @ @ @ @     -- conv. layer 4x4x12=>24 stride 2      W3 [4, 4, 12, 24]       B3 [24]
#     ∶∶∶∶∶∶∶∶∶∶∶                                             Y3 [batch, 7, 7, 24] => reshaped to YY [batch, 7*7*24]
#      \x/x\x\x/ ✞    -- fully connected layer (relu+dropout) W4 [7*7*24, 200]       B4 [200]
#       · · · ·                                               Y4 [batch, 200]
#       \x/x\x/       -- fully connected layer (softmax)      W5 [200, 10]           B5 [10]
#        · · ·                                                Y [batch, 20]

graph = tf.Graph()

with graph.as_default():

  # Input data.
  tf_train_dataset = tf.placeholder(tf.float32, shape=(batch_size, image_size, image_size, num_channels))
  tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
  tf_valid_dataset = tf.constant(valid_dataset_c)
  tf_test_dataset = tf.constant(test_dataset_c)
  
  #Variables.
  # variable learning rate
  lr = tf.placeholder(tf.float32)
  # pkeep for dropout
  pkeep = tf.placeholder(tf.float32)
  # three convolutional layers with their channel counts, and a
  # fully connected layer (tha last layer has 10 softmax neurons)
  L1 = 6  # first convolutional layer output depth
  L2 = 12  # second convolutional layer output depth
  L3 = 24  # third convolutional layer
  L4 = 200  # fully connected layer

  W1 = tf.Variable(tf.truncated_normal([6, 6, 1, L1], stddev=np.sqrt(2/28*28)))  # 6x6 patch, 1 input channel, L1 output channels
  B1 = tf.Variable(tf.constant(0.1, tf.float32, [L1]))
  W2 = tf.Variable(tf.truncated_normal([5, 5, L1, L2], stddev=0.1))
  B2 = tf.Variable(tf.constant(0.1, tf.float32, [L2]))
  W3 = tf.Variable(tf.truncated_normal([4, 4, L2, L3], stddev=0.1))
  B3 = tf.Variable(tf.constant(0.1, tf.float32, [L3]))

  W4 = tf.Variable(tf.truncated_normal([7 * 7 * L3, L4], stddev=0.1))
  B4 = tf.Variable(tf.constant(0.1, tf.float32, [L4]))
  W5 = tf.Variable(tf.truncated_normal([L4, 10], stddev=0.1))
  B5 = tf.Variable(tf.constant(0.1, tf.float32, [10]))

  # Model.
  def model(data, dropout=True):
    if dropout:
        stride = 1  # output is 28x28
        Y1 = tf.nn.relu(tf.nn.conv2d(data, W1, strides=[1, stride, stride, 1], padding='SAME') + B1)
        stride = 2  # output is 14x14
        Y2 = tf.nn.relu(tf.nn.conv2d(Y1, W2, strides=[1, stride, stride, 1], padding='SAME') + B2)
        stride = 2  # output is 7x7
        Y3 = tf.nn.relu(tf.nn.conv2d(Y2, W3, strides=[1, stride, stride, 1], padding='SAME') + B3)

        # reshape the output from the third convolution for the fully connected layer
        YY = tf.reshape(Y3, shape=[-1, 7 * 7 * L3])

        Y4 = tf.nn.relu(tf.matmul(YY, W4) + B4)
        Y4d = tf.nn.dropout(Y4, pkeep)

        return tf.matmul(Y4d, W5) + B5
    if not dropout:
        stride = 1  # output is 28x28
        Y1 = tf.nn.relu(tf.nn.conv2d(data, W1, strides=[1, stride, stride, 1], padding='SAME') + B1)
        stride = 2  # output is 14x14
        Y2 = tf.nn.relu(tf.nn.conv2d(Y1, W2, strides=[1, stride, stride, 1], padding='SAME') + B2)
        stride = 2  # output is 7x7
        Y3 = tf.nn.relu(tf.nn.conv2d(Y2, W3, strides=[1, stride, stride, 1], padding='SAME') + B3)

        # reshape the output from the third convolution for the fully connected layer
        YY = tf.reshape(Y3, shape=[-1, 7 * 7 * L3])

        Y4 = tf.nn.relu(tf.matmul(YY, W4) + B4)
        Y4d = tf.nn.dropout(Y4, pkeep)
        return tf.matmul(Y4, W5) + B5

 
  # Training computation.
  logits = model(tf_train_dataset)
  loss = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(labels=tf_train_labels, logits=logits))
    
  batch = tf.Variable(0, trainable=False)
  train_size = train_labels.shape[0]
  learning_rate = tf.train.exponential_decay(0.1, 
                                               batch, 
                                               train_size,
                                               0.01,
                                               staircase = True)

  optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=batch)
  
  # Predictions for the training, validation, and test data.
  train_prediction = tf.nn.softmax(logits)
  valid_prediction = tf.nn.softmax(model(tf_valid_dataset, False))
  test_prediction = tf.nn.softmax(model(tf_test_dataset, False))

In [73]:
num_steps = 3001

with tf.Session(graph=graph) as session:
  tf.global_variables_initializer().run()
  print('Initialized')
  for step in range(num_steps):
    offset = (step * batch_size) % (train_labels_c.shape[0] - batch_size)
    batch_data = train_dataset_c[offset:(offset + batch_size), :, :, :]
    batch_labels = train_labels_c[offset:(offset + batch_size), :]
    feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels, pkeep : 0.5}
    _, l, predictions = session.run(
      [optimizer, loss, train_prediction], feed_dict=feed_dict)
    if (step % 50 == 0):
      print('Minibatch loss at step %d: %f' % (step, l))
      print('Minibatch accuracy: %.1f%%' % accuracy(predictions, batch_labels))
      print('Validation accuracy: %.1f%%' % accuracy(valid_prediction.eval(), valid_labels_c))
  print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(), test_labels_c))

# original = 89.1%
# with biases set to 0.1 or num of entry points / 10 = 90.2%
#  learning_rate = tf.train.exponential_decay(lr start = 0.099 and step decay 0.0001 = 90.7%
# batch_size = 300 91.9%
# bs = 200, ps = 6, depth = 24, num_hidden = 64, dropout = 0.9  - accuracy = 92.3% 
# bs = 200, ps = 6, depth = 32, num_hidden = 100, dropout = 0.9  - accuracy = 92.9% 
# bs = 200, ps = 6, depth = 32, num_hidden = 200, dropout = 0.5  - accuracy = 92.9% 
# bs = 200, ps = 6, depth = 48, num_hidden = 100, dropout = 0.9  - accuracy = 93.1% 
# bs = 200, ps = 6, depth = 64, num_hidden = 200, dropout = 0.9  - accuracy = 93.4% 
# bs = 300, ps = 6, depth = 64, num_hidden = 200, dropout = 0.9  - accuracy = 93% 
# bs = 200, ps = 5, depth = 64, num_hidden = 200, dropout = 0.9  - accuracy = 93.3% 
# bs = 200, ps = 5, depth = 32, num_hidden = 200, dropout = 0.9  - accuracy = 93.7% 
# bs = 200, ps = 6, depth = 44, num_hidden = 200, dropout = 0.9  - accuracy = 93% 
# bs = 200, ps = 6, depth = 44, num_hidden = 32, dropout = 0.9  - accuracy = 91.8% 
# bs = 200, ps = 5, depth = 32, num_hidden = 600, dropout = 0.9  - accuracy = 93.7%
# bs = 200, ps = 5, depth = 32, num_hidden = 800, dropout = 0.9  - accuracy = 93.6%
# bs = 200, ps = 5, depth = 32, num_hidden = 200, dropout = 0.75  - accuracy = 92.8%
# bs = 200, ps = 5, depth = 32, num_hidden = 200, dropout = 0.5  - accuracy = 92.6%
# new architecture
# L1, L2, L3, L4 = 6, 12, 24, 200, bs = 200, lr decay = 0.099, 0.0001, steps = 3001 = 94.6%
# L1, L2, L3, L4 = 6, 12, 24, 200, bs = 200, lr decay = 0.1, 0.01, steps = 3001 = 94.9%
# L1, L2, L3, L4 = 6, 12, 24, 200, bs = 200, lr decay = 0.2, 0.01, steps = 1001 = 93.6%
# L1, L2, L3, L4 = 6, 12, 24, 200, bs = 200, lr decay = 0.2, 0.01, steps = 3001 = 93.7%
# L1, L2, L3, L4 = 6, 12, 24, 200, bs = 200, lr decay = 0.09, 0.01, steps = 3001 = 94.4%

Initialized
Minibatch loss at step 0: 3.652546
Minibatch accuracy: 7.0%
Validation accuracy: 9.1%


KeyboardInterrupt: 

---
Problem 1
---------

The convolutional model above uses convolutions with stride 2 to reduce the dimensionality. Replace the strides by a max pooling operation (`nn.max_pool()`) of stride 2 and kernel size 2.

---

In [100]:
batch_size = 128
num_hidden = 200
image_size = 28
num_channels = 1
# neural network structure for this sample:
#
# · · · · · · · · · ·    (input data, 1-deep)                 X [batch, 28, 28, 1]
# @ @ @ @ @ @ @ @ @ @ -- conv. layer 6x6x1=>6 stride 1        W1 [6, 6, 1, 6]        B1 [6]
# ∶∶∶∶∶∶∶∶∶∶∶∶∶∶∶∶∶∶∶                                         Y1 [batch, 28, 28, 6]
#   @ @ @ @ @ @ @ @   -- conv. layer 5x5x6=>12 stride 2       W2 [5, 5, 6, 12]        B2 [12]
#   ∶∶∶∶∶∶∶∶∶∶∶∶∶∶∶                                           Y2 [batch, 14, 14, 12]
#     @ @ @ @ @ @     -- conv. layer 4x4x12=>24 stride 2      W3 [4, 4, 12, 24]       B3 [24]
#     ∶∶∶∶∶∶∶∶∶∶∶                                             Y3 [batch, 7, 7, 24] => reshaped to YY [batch, 7*7*24]
#      \x/x\x\x/ ✞    -- fully connected layer (relu+dropout) W4 [7*7*24, 200]       B4 [200]
#       · · · ·                                               Y4 [batch, 200]
#       \x/x\x/       -- fully connected layer (softmax)      W5 [200, 10]           B5 [10]
#        · · ·                                                Y [batch, 20]

graph = tf.Graph()

with graph.as_default():

  # Input data.
  tf_train_dataset = tf.placeholder(tf.float32, shape=(batch_size, image_size, image_size, num_channels))
  tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
  tf_valid_dataset = tf.constant(valid_dataset_c)
  tf_test_dataset = tf.constant(test_dataset_c)
  
  #Variables.
  # variable learning rate
  lr = tf.placeholder(tf.float32)
  # pkeep for dropout
  pkeep = tf.placeholder(tf.float32)
  # three convolutional layers with their channel counts, and a
  # fully connected layer (tha last layer has 10 softmax neurons)
  L1 = 6  # first convolutional layer output depth
  L2 = 16  # second convolutional layer output depth
  L3 = 120  # third convolutional layer
  L4 = 84  # fully connected layer

  W1 = tf.Variable(tf.truncated_normal([6, 6, 1, L1], stddev=0.1))  # 6x6 patch, 1 input channel, L1 output channels
  B1 = tf.Variable(tf.constant(0.1, tf.float32, [L1]))
  W2 = tf.Variable(tf.truncated_normal([5, 5, L1, L2], stddev=0.1))
  B2 = tf.Variable(tf.constant(0.1, tf.float32, [L2]))
  W3 = tf.Variable(tf.truncated_normal([4, 4, L2, L3], stddev=0.1))
  B3 = tf.Variable(tf.constant(0.1, tf.float32, [L3]))

  W4 = tf.Variable(tf.truncated_normal([7 * 7 * L3, L4], stddev=0.1))
  B4 = tf.Variable(tf.constant(0.1, tf.float32, [L4]))
  W5 = tf.Variable(tf.truncated_normal([L4, 10], stddev=0.1))
  B5 = tf.Variable(tf.constant(0.1, tf.float32, [10]))



  # Model.
  def model(data, dropout=True):
    if dropout:
        stride = 1  # output is 28x28
        k = 2
        Y1 = tf.nn.relu(tf.nn.conv2d(data, W1, strides=[1, stride, stride, 1], padding='SAME') + B1)
        Y1 = tf.nn.max_pool(Y1, ksize=[1, 2, 2, 1], strides=[1, k, k, 1], padding='SAME')
        # after max_pool with stride 2 (the same as kernel) output is 14x14
        stride = 1  
        Y2 = tf.nn.relu(tf.nn.conv2d(Y1, W2, strides=[1, stride, stride, 1], padding='SAME') + B2)
        Y2 = tf.nn.max_pool(Y2, ksize=[1, 2, 2, 1], strides=[1, k, k, 1], padding='SAME')
        # after max_pool output is 7x7
        stride = 1 
        Y3 = tf.nn.relu(tf.nn.conv2d(Y2, W3, strides=[1, stride, stride, 1], padding='SAME') + B3)
        Y3 = tf.nn.max_pool(Y3, ksize=[1, 2, 2, 1], strides=[1, stride, stride, 1], padding='SAME')
        # again # after max_pool output is 7x7
        # reshape the output from the third convolution for the fully connected layer
        YY = tf.reshape(Y3, shape=[-1, 7 * 7 * L3])

        Y4 = tf.nn.relu(tf.matmul(YY, W4) + B4)
        Y4d = tf.nn.dropout(Y4, pkeep)

        return tf.matmul(Y4d, W5) + B5
    if not dropout:
        stride = 1  # output is 28x28
        k = 2
        Y1 = tf.nn.relu(tf.nn.conv2d(data, W1, strides=[1, stride, stride, 1], padding='SAME') + B1)
        Y1 = tf.nn.max_pool(Y1, ksize=[1, 2, 2, 1], strides=[1, k, k, 1], padding='SAME')
        # after max_pool with stride 2 (the same as kernel) output is 14x14
        stride = 1  
        Y2 = tf.nn.relu(tf.nn.conv2d(Y1, W2, strides=[1, stride, stride, 1], padding='SAME') + B2)
        Y2 = tf.nn.max_pool(Y2, ksize=[1, 2, 2, 1], strides=[1, k, k, 1], padding='SAME')
        # after max_pool output is 7x7
        stride = 1 
        Y3 = tf.nn.relu(tf.nn.conv2d(Y2, W3, strides=[1, stride, stride, 1], padding='SAME') + B3)
        Y3 = tf.nn.max_pool(Y3, ksize=[1, 2, 2, 1], strides=[1, stride, stride, 1], padding='SAME')
        # again # after max_pool output is 7x7
        # reshape the output from the third convolution for the fully connected layer
        YY = tf.reshape(Y3, shape=[-1, 7 * 7 * L3])

        Y4 = tf.nn.relu(tf.matmul(YY, W4) + B4)
        return tf.matmul(Y4, W5) + B5

 
  # Training computation.
  logits = model(tf_train_dataset)
  loss = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(labels=tf_train_labels, logits=logits))
    
#   batch = tf.Variable(0, trainable=False)
#   train_size = train_labels.shape[0]
#   learning_rate = tf.train.exponential_decay(0.01, 
#                                                batch, 
#                                                train_size,
#                                                0.001,
#                                                staircase = True)

#   optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=batch)
  optimizer = tf.train.AdagradOptimizer(0.07).minimize(loss)
  
  # Predictions for the training, validation, and test data.
  train_prediction = tf.nn.softmax(logits)
  valid_prediction = tf.nn.softmax(model(tf_valid_dataset, False))
  test_prediction = tf.nn.softmax(model(tf_test_dataset, False))

In [101]:
%%time
num_steps = 100001

with tf.Session(graph=graph) as session:
  tf.global_variables_initializer().run()
  print('Initialized')
  for step in range(num_steps):
    offset = (step * batch_size) % (train_labels_c.shape[0] - batch_size)
    batch_data = train_dataset_c[offset:(offset + batch_size), :, :, :]
    batch_labels = train_labels_c[offset:(offset + batch_size), :]
    feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels, pkeep : 0.5}
    _, l, predictions = session.run(
      [optimizer, loss, train_prediction], feed_dict=feed_dict)
    if (step % 50 == 0):
      print('Minibatch loss at step %d: %f' % (step, l))
      print('Minibatch accuracy: %.1f%%' % accuracy(predictions, batch_labels))
      print('Validation accuracy: %.1f%%' % accuracy(valid_prediction.eval(), valid_labels_c))
  print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(), test_labels_c))

# bs = 16, Layers: 6,16,120,84, Adagrad(0.05), num_steps = 3001 - 93%
# bs = 16, Layers: 8,16,120,84, Adagrad(0.05), num_steps = 3001 - 90%
# bs = 16, Layers: 4,16,120,84, Adagrad(0.05), num_steps = 3001 - 93%
# bs = 32, Layers: 4,16,120,84, Adagrad(0.05), num_steps = 3001 - 93.8%
# bs = 64, Layers: 4,16,120,84, Adagrad(0.05), num_steps = 3001 - 94.2%
# bs = 128, Layers: 4,16,120,84, Adagrad(0.05), num_steps = 3001 - 94.3%
# bs = 128, Layers: 4,16,120,84, Adagrad(0.04), num_steps = 3001 - 95.3%
# bs = 128, Layers: 4,16,120,84, Adagrad(0.03), num_steps = 6001 - 95.2%
# bs = 128, Layers: 4,16,120,84, Adagrad(0.06), num_steps = 6001 - 95.5%
# bs = 128, Layers: 4,16,120,84, Adagrad(0.07), num_steps = 6001 - 95.7%
# bs = 128, Layers: 4,16,120,100, Adagrad(0.07), num_steps = 6001 - 95.3%
# bs = 128, Layers: 4,16,120,60, Adagrad(0.07), num_steps = 6001 - 95.5%
# bs = 128, Layers: 4,16,120,42, Adagrad(0.07), num_steps = 6001 - 95.5%
# bs = 256, Layers: 4,16,120,84, Adagrad(0.07), num_steps = 6001 - 95.5%
# bs = 64, Layers: 4,16,120,84, Adagrad(0.07), num_steps = 6001 - 95.%
# bs = 128, Layers: 4,16,120,84, Adagrad(0.08), num_steps = 6001 - 95.5%
# bs = 256, Layers: 4,16,120,84, Adagrad(0.08), num_steps = 6001 - 95.8%
# bs = 128, Layers: 4,16,120,84, Adagrad(0.05), num_steps = 30001 - 96.4%
# bs = 128, Layers: 4,16,120,84, Adagrad(0.05), num_steps = 60001 - 96.8%
# bs = 128, Layers: 6,16,120,84, Adam(0.01), num_steps = 60001 - 92.2%
# bs = 128, Layers: 6,16,120,84, Adam(0.001), num_steps = 60001 - 95%
# bs = 128, Layers: 6,16,120,84, Adam(0.0001), num_steps = 60001 - 91.9%
# bs = 128, Layers: 6,16,120,84, Adam(0.01 - 0.0001), num_steps = 60001 - 92.1%
# bs = 128, Layers: 6,16,120,84, Adagrad(0.05), num_steps = 60001 - 95.8%
# bs = 128, Layers: 6,16,120,84, Adagrad(0.05), num_steps = 100001 - 97% - 2.36 hour
# bs = 128, Layers: 4,16,120,84, Adagrad(0.07), num_steps = 100001 - 96.9% - 2.36 hour
# bs = 128, Layers: 6,16,120,84, Adagrad(0.07), num_steps = 100001 - 96.9% - 2.36 hour

Initialized
Minibatch loss at step 0: 4.140816
Minibatch accuracy: 9.4%
Validation accuracy: 13.5%
Minibatch loss at step 50: 1.417216
Minibatch accuracy: 51.6%
Validation accuracy: 66.8%
Minibatch loss at step 100: 0.899910
Minibatch accuracy: 68.0%
Validation accuracy: 79.6%
Minibatch loss at step 150: 0.609660
Minibatch accuracy: 81.2%
Validation accuracy: 82.2%
Minibatch loss at step 200: 0.721183
Minibatch accuracy: 76.6%
Validation accuracy: 83.1%
Minibatch loss at step 250: 0.561992
Minibatch accuracy: 84.4%
Validation accuracy: 84.4%
Minibatch loss at step 300: 0.604293
Minibatch accuracy: 81.2%
Validation accuracy: 85.2%
Minibatch loss at step 350: 0.664483
Minibatch accuracy: 75.8%
Validation accuracy: 85.7%
Minibatch loss at step 400: 0.771832
Minibatch accuracy: 77.3%
Validation accuracy: 85.6%
Minibatch loss at step 450: 0.542106
Minibatch accuracy: 82.8%
Validation accuracy: 86.3%
Minibatch loss at step 500: 0.605320
Minibatch accuracy: 82.8%
Validation accuracy: 86.5%
Mi

Validation accuracy: 90.6%
Minibatch loss at step 4550: 0.329879
Minibatch accuracy: 89.1%
Validation accuracy: 90.7%
Minibatch loss at step 4600: 0.295143
Minibatch accuracy: 92.2%
Validation accuracy: 90.6%
Minibatch loss at step 4650: 0.333241
Minibatch accuracy: 90.6%
Validation accuracy: 90.7%
Minibatch loss at step 4700: 0.501882
Minibatch accuracy: 85.2%
Validation accuracy: 90.6%
Minibatch loss at step 4750: 0.222544
Minibatch accuracy: 93.8%
Validation accuracy: 90.8%
Minibatch loss at step 4800: 0.351486
Minibatch accuracy: 89.1%
Validation accuracy: 90.7%
Minibatch loss at step 4850: 0.354538
Minibatch accuracy: 88.3%
Validation accuracy: 90.7%
Minibatch loss at step 4900: 0.328308
Minibatch accuracy: 89.8%
Validation accuracy: 90.8%
Minibatch loss at step 4950: 0.364648
Minibatch accuracy: 90.6%
Validation accuracy: 90.8%
Minibatch loss at step 5000: 0.384628
Minibatch accuracy: 86.7%
Validation accuracy: 90.7%
Minibatch loss at step 5050: 0.256014
Minibatch accuracy: 93.8%

Minibatch loss at step 9050: 0.287912
Minibatch accuracy: 91.4%
Validation accuracy: 91.4%
Minibatch loss at step 9100: 0.551311
Minibatch accuracy: 85.2%
Validation accuracy: 91.5%
Minibatch loss at step 9150: 0.297324
Minibatch accuracy: 91.4%
Validation accuracy: 91.5%
Minibatch loss at step 9200: 0.485146
Minibatch accuracy: 85.2%
Validation accuracy: 91.5%
Minibatch loss at step 9250: 0.223340
Minibatch accuracy: 93.8%
Validation accuracy: 91.2%
Minibatch loss at step 9300: 0.432949
Minibatch accuracy: 87.5%
Validation accuracy: 91.4%
Minibatch loss at step 9350: 0.258200
Minibatch accuracy: 91.4%
Validation accuracy: 91.5%
Minibatch loss at step 9400: 0.375331
Minibatch accuracy: 86.7%
Validation accuracy: 91.5%
Minibatch loss at step 9450: 0.366652
Minibatch accuracy: 89.1%
Validation accuracy: 91.5%
Minibatch loss at step 9500: 0.419371
Minibatch accuracy: 88.3%
Validation accuracy: 91.6%
Minibatch loss at step 9550: 0.323879
Minibatch accuracy: 92.2%
Validation accuracy: 91.5%

Validation accuracy: 92.0%
Minibatch loss at step 13550: 0.262309
Minibatch accuracy: 91.4%
Validation accuracy: 92.0%
Minibatch loss at step 13600: 0.163464
Minibatch accuracy: 93.8%
Validation accuracy: 91.9%
Minibatch loss at step 13650: 0.199357
Minibatch accuracy: 92.2%
Validation accuracy: 91.8%
Minibatch loss at step 13700: 0.276445
Minibatch accuracy: 92.2%
Validation accuracy: 91.9%
Minibatch loss at step 13750: 0.184146
Minibatch accuracy: 93.0%
Validation accuracy: 91.8%
Minibatch loss at step 13800: 0.349666
Minibatch accuracy: 89.1%
Validation accuracy: 91.8%
Minibatch loss at step 13850: 0.299544
Minibatch accuracy: 89.8%
Validation accuracy: 91.9%
Minibatch loss at step 13900: 0.343092
Minibatch accuracy: 89.8%
Validation accuracy: 91.8%
Minibatch loss at step 13950: 0.211842
Minibatch accuracy: 93.8%
Validation accuracy: 91.9%
Minibatch loss at step 14000: 0.196275
Minibatch accuracy: 94.5%
Validation accuracy: 92.0%
Minibatch loss at step 14050: 0.262328
Minibatch accu

Minibatch loss at step 18000: 0.232852
Minibatch accuracy: 92.2%
Validation accuracy: 92.2%
Minibatch loss at step 18050: 0.257735
Minibatch accuracy: 92.2%
Validation accuracy: 92.3%
Minibatch loss at step 18100: 0.190028
Minibatch accuracy: 93.8%
Validation accuracy: 92.3%
Minibatch loss at step 18150: 0.209957
Minibatch accuracy: 93.8%
Validation accuracy: 92.2%
Minibatch loss at step 18200: 0.397541
Minibatch accuracy: 87.5%
Validation accuracy: 92.3%
Minibatch loss at step 18250: 0.165783
Minibatch accuracy: 94.5%
Validation accuracy: 92.3%
Minibatch loss at step 18300: 0.341134
Minibatch accuracy: 89.8%
Validation accuracy: 92.2%
Minibatch loss at step 18350: 0.212272
Minibatch accuracy: 93.0%
Validation accuracy: 92.2%
Minibatch loss at step 18400: 0.303430
Minibatch accuracy: 93.8%
Validation accuracy: 92.2%
Minibatch loss at step 18450: 0.273800
Minibatch accuracy: 92.2%
Validation accuracy: 92.2%
Minibatch loss at step 18500: 0.234247
Minibatch accuracy: 94.5%
Validation accu

Validation accuracy: 92.3%
Minibatch loss at step 22500: 0.196950
Minibatch accuracy: 93.0%
Validation accuracy: 92.5%
Minibatch loss at step 22550: 0.175766
Minibatch accuracy: 93.0%
Validation accuracy: 92.2%
Minibatch loss at step 22600: 0.208331
Minibatch accuracy: 93.0%
Validation accuracy: 92.3%
Minibatch loss at step 22650: 0.191018
Minibatch accuracy: 93.8%
Validation accuracy: 92.4%
Minibatch loss at step 22700: 0.326838
Minibatch accuracy: 89.8%
Validation accuracy: 92.6%
Minibatch loss at step 22750: 0.286814
Minibatch accuracy: 93.0%
Validation accuracy: 92.2%
Minibatch loss at step 22800: 0.208007
Minibatch accuracy: 93.8%
Validation accuracy: 92.5%
Minibatch loss at step 22850: 0.133358
Minibatch accuracy: 96.9%
Validation accuracy: 92.5%
Minibatch loss at step 22900: 0.312159
Minibatch accuracy: 89.8%
Validation accuracy: 92.4%
Minibatch loss at step 22950: 0.294632
Minibatch accuracy: 93.0%
Validation accuracy: 92.3%
Minibatch loss at step 23000: 0.303841
Minibatch accu

Minibatch loss at step 26950: 0.153930
Minibatch accuracy: 94.5%
Validation accuracy: 92.5%
Minibatch loss at step 27000: 0.283686
Minibatch accuracy: 89.1%
Validation accuracy: 92.5%
Minibatch loss at step 27050: 0.194960
Minibatch accuracy: 92.2%
Validation accuracy: 92.5%
Minibatch loss at step 27100: 0.217808
Minibatch accuracy: 94.5%
Validation accuracy: 92.5%
Minibatch loss at step 27150: 0.180563
Minibatch accuracy: 95.3%
Validation accuracy: 92.4%
Minibatch loss at step 27200: 0.139480
Minibatch accuracy: 96.9%
Validation accuracy: 92.5%
Minibatch loss at step 27250: 0.159860
Minibatch accuracy: 93.8%
Validation accuracy: 92.4%
Minibatch loss at step 27300: 0.246691
Minibatch accuracy: 92.2%
Validation accuracy: 92.3%
Minibatch loss at step 27350: 0.203478
Minibatch accuracy: 93.0%
Validation accuracy: 92.4%
Minibatch loss at step 27400: 0.078169
Minibatch accuracy: 98.4%
Validation accuracy: 92.3%
Minibatch loss at step 27450: 0.234958
Minibatch accuracy: 92.2%
Validation accu

Validation accuracy: 92.6%
Minibatch loss at step 31450: 0.173392
Minibatch accuracy: 93.0%
Validation accuracy: 92.7%
Minibatch loss at step 31500: 0.200130
Minibatch accuracy: 93.0%
Validation accuracy: 92.5%
Minibatch loss at step 31550: 0.110108
Minibatch accuracy: 97.7%
Validation accuracy: 92.6%
Minibatch loss at step 31600: 0.269600
Minibatch accuracy: 90.6%
Validation accuracy: 92.7%
Minibatch loss at step 31650: 0.115092
Minibatch accuracy: 95.3%
Validation accuracy: 92.6%
Minibatch loss at step 31700: 0.164203
Minibatch accuracy: 94.5%
Validation accuracy: 92.5%
Minibatch loss at step 31750: 0.301933
Minibatch accuracy: 89.1%
Validation accuracy: 92.6%
Minibatch loss at step 31800: 0.200041
Minibatch accuracy: 93.0%
Validation accuracy: 92.7%
Minibatch loss at step 31850: 0.276941
Minibatch accuracy: 91.4%
Validation accuracy: 92.4%
Minibatch loss at step 31900: 0.146809
Minibatch accuracy: 95.3%
Validation accuracy: 92.5%
Minibatch loss at step 31950: 0.192263
Minibatch accu

Minibatch loss at step 35900: 0.138062
Minibatch accuracy: 95.3%
Validation accuracy: 92.6%
Minibatch loss at step 35950: 0.292838
Minibatch accuracy: 89.8%
Validation accuracy: 92.5%
Minibatch loss at step 36000: 0.120911
Minibatch accuracy: 95.3%
Validation accuracy: 92.7%
Minibatch loss at step 36050: 0.217764
Minibatch accuracy: 93.0%
Validation accuracy: 92.7%
Minibatch loss at step 36100: 0.219365
Minibatch accuracy: 93.8%
Validation accuracy: 92.6%
Minibatch loss at step 36150: 0.194533
Minibatch accuracy: 91.4%
Validation accuracy: 92.7%
Minibatch loss at step 36200: 0.236799
Minibatch accuracy: 91.4%
Validation accuracy: 92.7%
Minibatch loss at step 36250: 0.314890
Minibatch accuracy: 89.8%
Validation accuracy: 92.7%
Minibatch loss at step 36300: 0.281711
Minibatch accuracy: 87.5%
Validation accuracy: 92.6%
Minibatch loss at step 36350: 0.190296
Minibatch accuracy: 93.8%
Validation accuracy: 92.5%
Minibatch loss at step 36400: 0.290595
Minibatch accuracy: 89.1%
Validation accu

Validation accuracy: 92.6%
Minibatch loss at step 40400: 0.205588
Minibatch accuracy: 93.0%
Validation accuracy: 92.6%
Minibatch loss at step 40450: 0.088024
Minibatch accuracy: 97.7%
Validation accuracy: 92.6%
Minibatch loss at step 40500: 0.111400
Minibatch accuracy: 96.1%
Validation accuracy: 92.7%
Minibatch loss at step 40550: 0.296859
Minibatch accuracy: 94.5%
Validation accuracy: 92.8%
Minibatch loss at step 40600: 0.155190
Minibatch accuracy: 94.5%
Validation accuracy: 92.7%
Minibatch loss at step 40650: 0.178656
Minibatch accuracy: 93.0%
Validation accuracy: 92.7%
Minibatch loss at step 40700: 0.312788
Minibatch accuracy: 89.8%
Validation accuracy: 92.8%
Minibatch loss at step 40750: 0.141718
Minibatch accuracy: 95.3%
Validation accuracy: 92.6%
Minibatch loss at step 40800: 0.183855
Minibatch accuracy: 93.0%
Validation accuracy: 92.6%
Minibatch loss at step 40850: 0.111801
Minibatch accuracy: 94.5%
Validation accuracy: 92.8%
Minibatch loss at step 40900: 0.203010
Minibatch accu

Minibatch loss at step 44850: 0.165202
Minibatch accuracy: 93.8%
Validation accuracy: 92.6%
Minibatch loss at step 44900: 0.165247
Minibatch accuracy: 93.8%
Validation accuracy: 92.8%
Minibatch loss at step 44950: 0.134708
Minibatch accuracy: 94.5%
Validation accuracy: 92.8%
Minibatch loss at step 45000: 0.170675
Minibatch accuracy: 93.0%
Validation accuracy: 92.7%
Minibatch loss at step 45050: 0.126419
Minibatch accuracy: 95.3%
Validation accuracy: 92.8%
Minibatch loss at step 45100: 0.192821
Minibatch accuracy: 93.0%
Validation accuracy: 92.8%
Minibatch loss at step 45150: 0.280499
Minibatch accuracy: 91.4%
Validation accuracy: 92.9%
Minibatch loss at step 45200: 0.123194
Minibatch accuracy: 93.8%
Validation accuracy: 92.8%
Minibatch loss at step 45250: 0.156808
Minibatch accuracy: 94.5%
Validation accuracy: 92.8%
Minibatch loss at step 45300: 0.095323
Minibatch accuracy: 96.9%
Validation accuracy: 92.8%
Minibatch loss at step 45350: 0.279946
Minibatch accuracy: 89.1%
Validation accu

Validation accuracy: 92.8%
Minibatch loss at step 49350: 0.151352
Minibatch accuracy: 95.3%
Validation accuracy: 92.8%
Minibatch loss at step 49400: 0.225111
Minibatch accuracy: 90.6%
Validation accuracy: 92.6%
Minibatch loss at step 49450: 0.197773
Minibatch accuracy: 93.8%
Validation accuracy: 92.7%
Minibatch loss at step 49500: 0.223031
Minibatch accuracy: 93.0%
Validation accuracy: 92.7%
Minibatch loss at step 49550: 0.153415
Minibatch accuracy: 94.5%
Validation accuracy: 92.7%
Minibatch loss at step 49600: 0.155731
Minibatch accuracy: 93.8%
Validation accuracy: 92.7%
Minibatch loss at step 49650: 0.145436
Minibatch accuracy: 96.1%
Validation accuracy: 92.5%
Minibatch loss at step 49700: 0.328649
Minibatch accuracy: 90.6%
Validation accuracy: 92.6%
Minibatch loss at step 49750: 0.224467
Minibatch accuracy: 93.0%
Validation accuracy: 92.6%
Minibatch loss at step 49800: 0.140393
Minibatch accuracy: 93.8%
Validation accuracy: 92.7%
Minibatch loss at step 49850: 0.211296
Minibatch accu

Minibatch loss at step 53800: 0.083188
Minibatch accuracy: 96.1%
Validation accuracy: 92.5%
Minibatch loss at step 53850: 0.128103
Minibatch accuracy: 95.3%
Validation accuracy: 92.6%
Minibatch loss at step 53900: 0.230141
Minibatch accuracy: 91.4%
Validation accuracy: 92.7%
Minibatch loss at step 53950: 0.160435
Minibatch accuracy: 93.8%
Validation accuracy: 92.7%
Minibatch loss at step 54000: 0.110523
Minibatch accuracy: 94.5%
Validation accuracy: 92.7%
Minibatch loss at step 54050: 0.239990
Minibatch accuracy: 89.8%
Validation accuracy: 92.7%
Minibatch loss at step 54100: 0.227589
Minibatch accuracy: 89.8%
Validation accuracy: 92.8%
Minibatch loss at step 54150: 0.117361
Minibatch accuracy: 96.9%
Validation accuracy: 92.6%
Minibatch loss at step 54200: 0.078644
Minibatch accuracy: 97.7%
Validation accuracy: 92.7%
Minibatch loss at step 54250: 0.191073
Minibatch accuracy: 94.5%
Validation accuracy: 92.5%
Minibatch loss at step 54300: 0.123342
Minibatch accuracy: 95.3%
Validation accu

Validation accuracy: 92.9%
Minibatch loss at step 58300: 0.149688
Minibatch accuracy: 93.0%
Validation accuracy: 92.9%
Minibatch loss at step 58350: 0.132355
Minibatch accuracy: 95.3%
Validation accuracy: 92.9%
Minibatch loss at step 58400: 0.164633
Minibatch accuracy: 94.5%
Validation accuracy: 92.9%
Minibatch loss at step 58450: 0.181325
Minibatch accuracy: 93.8%
Validation accuracy: 92.8%
Minibatch loss at step 58500: 0.109534
Minibatch accuracy: 95.3%
Validation accuracy: 92.8%
Minibatch loss at step 58550: 0.202870
Minibatch accuracy: 92.2%
Validation accuracy: 92.7%
Minibatch loss at step 58600: 0.163798
Minibatch accuracy: 95.3%
Validation accuracy: 92.8%
Minibatch loss at step 58650: 0.206525
Minibatch accuracy: 91.4%
Validation accuracy: 92.9%
Minibatch loss at step 58700: 0.126759
Minibatch accuracy: 95.3%
Validation accuracy: 92.7%
Minibatch loss at step 58750: 0.151828
Minibatch accuracy: 95.3%
Validation accuracy: 92.8%
Minibatch loss at step 58800: 0.125261
Minibatch accu

Minibatch loss at step 62750: 0.132738
Minibatch accuracy: 96.1%
Validation accuracy: 92.9%
Minibatch loss at step 62800: 0.213335
Minibatch accuracy: 92.2%
Validation accuracy: 92.8%
Minibatch loss at step 62850: 0.196120
Minibatch accuracy: 93.0%
Validation accuracy: 92.8%
Minibatch loss at step 62900: 0.099557
Minibatch accuracy: 96.9%
Validation accuracy: 92.7%
Minibatch loss at step 62950: 0.127146
Minibatch accuracy: 95.3%
Validation accuracy: 92.6%
Minibatch loss at step 63000: 0.155686
Minibatch accuracy: 93.8%
Validation accuracy: 92.8%
Minibatch loss at step 63050: 0.045634
Minibatch accuracy: 98.4%
Validation accuracy: 92.7%
Minibatch loss at step 63100: 0.105908
Minibatch accuracy: 96.9%
Validation accuracy: 92.8%
Minibatch loss at step 63150: 0.078080
Minibatch accuracy: 96.9%
Validation accuracy: 92.7%
Minibatch loss at step 63200: 0.074025
Minibatch accuracy: 98.4%
Validation accuracy: 92.8%
Minibatch loss at step 63250: 0.147393
Minibatch accuracy: 95.3%
Validation accu

Validation accuracy: 92.8%
Minibatch loss at step 67250: 0.110577
Minibatch accuracy: 95.3%
Validation accuracy: 92.9%
Minibatch loss at step 67300: 0.171277
Minibatch accuracy: 93.8%
Validation accuracy: 93.0%
Minibatch loss at step 67350: 0.155361
Minibatch accuracy: 95.3%
Validation accuracy: 93.0%
Minibatch loss at step 67400: 0.112892
Minibatch accuracy: 95.3%
Validation accuracy: 93.0%
Minibatch loss at step 67450: 0.135634
Minibatch accuracy: 95.3%
Validation accuracy: 92.9%
Minibatch loss at step 67500: 0.164381
Minibatch accuracy: 93.0%
Validation accuracy: 92.9%
Minibatch loss at step 67550: 0.103869
Minibatch accuracy: 96.9%
Validation accuracy: 92.8%
Minibatch loss at step 67600: 0.116070
Minibatch accuracy: 96.1%
Validation accuracy: 92.8%
Minibatch loss at step 67650: 0.077598
Minibatch accuracy: 96.9%
Validation accuracy: 92.9%
Minibatch loss at step 67700: 0.158966
Minibatch accuracy: 94.5%
Validation accuracy: 92.8%
Minibatch loss at step 67750: 0.229288
Minibatch accu

Minibatch loss at step 71700: 0.188715
Minibatch accuracy: 91.4%
Validation accuracy: 92.8%
Minibatch loss at step 71750: 0.067279
Minibatch accuracy: 96.9%
Validation accuracy: 92.8%
Minibatch loss at step 71800: 0.176419
Minibatch accuracy: 94.5%
Validation accuracy: 92.8%
Minibatch loss at step 71850: 0.089341
Minibatch accuracy: 96.9%
Validation accuracy: 92.9%
Minibatch loss at step 71900: 0.078422
Minibatch accuracy: 96.9%
Validation accuracy: 92.8%
Minibatch loss at step 71950: 0.205587
Minibatch accuracy: 92.2%
Validation accuracy: 92.8%
Minibatch loss at step 72000: 0.157131
Minibatch accuracy: 93.8%
Validation accuracy: 92.7%
Minibatch loss at step 72050: 0.104985
Minibatch accuracy: 96.1%
Validation accuracy: 92.7%
Minibatch loss at step 72100: 0.162610
Minibatch accuracy: 93.8%
Validation accuracy: 92.7%
Minibatch loss at step 72150: 0.117346
Minibatch accuracy: 96.1%
Validation accuracy: 92.8%
Minibatch loss at step 72200: 0.116834
Minibatch accuracy: 95.3%
Validation accu

Validation accuracy: 92.8%
Minibatch loss at step 76200: 0.165654
Minibatch accuracy: 93.8%
Validation accuracy: 92.8%
Minibatch loss at step 76250: 0.095496
Minibatch accuracy: 97.7%
Validation accuracy: 92.8%
Minibatch loss at step 76300: 0.169292
Minibatch accuracy: 93.8%
Validation accuracy: 92.8%
Minibatch loss at step 76350: 0.064301
Minibatch accuracy: 98.4%
Validation accuracy: 92.8%
Minibatch loss at step 76400: 0.169356
Minibatch accuracy: 93.8%
Validation accuracy: 92.7%
Minibatch loss at step 76450: 0.143357
Minibatch accuracy: 93.8%
Validation accuracy: 92.8%
Minibatch loss at step 76500: 0.172982
Minibatch accuracy: 94.5%
Validation accuracy: 92.9%
Minibatch loss at step 76550: 0.088792
Minibatch accuracy: 96.1%
Validation accuracy: 92.8%
Minibatch loss at step 76600: 0.120180
Minibatch accuracy: 94.5%
Validation accuracy: 92.8%
Minibatch loss at step 76650: 0.122199
Minibatch accuracy: 95.3%
Validation accuracy: 92.8%
Minibatch loss at step 76700: 0.111902
Minibatch accu

Minibatch loss at step 80650: 0.115993
Minibatch accuracy: 94.5%
Validation accuracy: 92.8%
Minibatch loss at step 80700: 0.104090
Minibatch accuracy: 96.1%
Validation accuracy: 92.7%
Minibatch loss at step 80750: 0.060808
Minibatch accuracy: 98.4%
Validation accuracy: 92.8%
Minibatch loss at step 80800: 0.106710
Minibatch accuracy: 94.5%
Validation accuracy: 92.8%
Minibatch loss at step 80850: 0.068746
Minibatch accuracy: 98.4%
Validation accuracy: 92.7%
Minibatch loss at step 80900: 0.132110
Minibatch accuracy: 93.8%
Validation accuracy: 92.9%
Minibatch loss at step 80950: 0.130903
Minibatch accuracy: 95.3%
Validation accuracy: 92.8%
Minibatch loss at step 81000: 0.123129
Minibatch accuracy: 95.3%
Validation accuracy: 92.8%
Minibatch loss at step 81050: 0.193334
Minibatch accuracy: 90.6%
Validation accuracy: 92.9%
Minibatch loss at step 81100: 0.160971
Minibatch accuracy: 93.0%
Validation accuracy: 92.7%
Minibatch loss at step 81150: 0.064900
Minibatch accuracy: 96.9%
Validation accu

Validation accuracy: 92.8%
Minibatch loss at step 85150: 0.054131
Minibatch accuracy: 97.7%
Validation accuracy: 92.9%
Minibatch loss at step 85200: 0.161934
Minibatch accuracy: 92.2%
Validation accuracy: 92.8%
Minibatch loss at step 85250: 0.100951
Minibatch accuracy: 96.1%
Validation accuracy: 93.0%
Minibatch loss at step 85300: 0.123291
Minibatch accuracy: 93.8%
Validation accuracy: 93.0%
Minibatch loss at step 85350: 0.114481
Minibatch accuracy: 96.9%
Validation accuracy: 92.8%
Minibatch loss at step 85400: 0.069422
Minibatch accuracy: 96.1%
Validation accuracy: 93.0%
Minibatch loss at step 85450: 0.123513
Minibatch accuracy: 95.3%
Validation accuracy: 92.9%
Minibatch loss at step 85500: 0.106405
Minibatch accuracy: 95.3%
Validation accuracy: 92.9%
Minibatch loss at step 85550: 0.205912
Minibatch accuracy: 93.0%
Validation accuracy: 92.8%
Minibatch loss at step 85600: 0.207738
Minibatch accuracy: 92.2%
Validation accuracy: 92.7%
Minibatch loss at step 85650: 0.084834
Minibatch accu

Minibatch loss at step 89600: 0.108794
Minibatch accuracy: 95.3%
Validation accuracy: 92.9%
Minibatch loss at step 89650: 0.070282
Minibatch accuracy: 97.7%
Validation accuracy: 93.0%
Minibatch loss at step 89700: 0.105585
Minibatch accuracy: 95.3%
Validation accuracy: 92.8%
Minibatch loss at step 89750: 0.113896
Minibatch accuracy: 95.3%
Validation accuracy: 92.7%
Minibatch loss at step 89800: 0.107058
Minibatch accuracy: 95.3%
Validation accuracy: 92.9%
Minibatch loss at step 89850: 0.164451
Minibatch accuracy: 91.4%
Validation accuracy: 92.7%
Minibatch loss at step 89900: 0.104011
Minibatch accuracy: 95.3%
Validation accuracy: 92.8%
Minibatch loss at step 89950: 0.135589
Minibatch accuracy: 96.1%
Validation accuracy: 92.8%
Minibatch loss at step 90000: 0.150823
Minibatch accuracy: 93.8%
Validation accuracy: 92.8%
Minibatch loss at step 90050: 0.109147
Minibatch accuracy: 96.9%
Validation accuracy: 92.7%
Minibatch loss at step 90100: 0.103042
Minibatch accuracy: 96.1%
Validation accu

Validation accuracy: 93.1%
Minibatch loss at step 94100: 0.101958
Minibatch accuracy: 95.3%
Validation accuracy: 93.0%
Minibatch loss at step 94150: 0.046605
Minibatch accuracy: 98.4%
Validation accuracy: 93.0%
Minibatch loss at step 94200: 0.081705
Minibatch accuracy: 96.1%
Validation accuracy: 93.0%
Minibatch loss at step 94250: 0.085059
Minibatch accuracy: 96.9%
Validation accuracy: 93.0%
Minibatch loss at step 94300: 0.129862
Minibatch accuracy: 95.3%
Validation accuracy: 93.0%
Minibatch loss at step 94350: 0.142478
Minibatch accuracy: 93.8%
Validation accuracy: 92.9%
Minibatch loss at step 94400: 0.076500
Minibatch accuracy: 96.9%
Validation accuracy: 93.0%
Minibatch loss at step 94450: 0.145833
Minibatch accuracy: 94.5%
Validation accuracy: 92.9%
Minibatch loss at step 94500: 0.100596
Minibatch accuracy: 96.1%
Validation accuracy: 92.9%
Minibatch loss at step 94550: 0.079857
Minibatch accuracy: 96.1%
Validation accuracy: 92.9%
Minibatch loss at step 94600: 0.107033
Minibatch accu

Minibatch loss at step 98550: 0.258133
Minibatch accuracy: 90.6%
Validation accuracy: 92.8%
Minibatch loss at step 98600: 0.174732
Minibatch accuracy: 93.8%
Validation accuracy: 92.7%
Minibatch loss at step 98650: 0.120373
Minibatch accuracy: 94.5%
Validation accuracy: 92.8%
Minibatch loss at step 98700: 0.142688
Minibatch accuracy: 94.5%
Validation accuracy: 92.8%
Minibatch loss at step 98750: 0.167755
Minibatch accuracy: 93.8%
Validation accuracy: 92.8%
Minibatch loss at step 98800: 0.136343
Minibatch accuracy: 93.0%
Validation accuracy: 93.0%
Minibatch loss at step 98850: 0.105079
Minibatch accuracy: 96.1%
Validation accuracy: 92.9%
Minibatch loss at step 98900: 0.136148
Minibatch accuracy: 95.3%
Validation accuracy: 92.7%
Minibatch loss at step 98950: 0.076618
Minibatch accuracy: 96.9%
Validation accuracy: 92.7%
Minibatch loss at step 99000: 0.134682
Minibatch accuracy: 94.5%
Validation accuracy: 92.8%
Minibatch loss at step 99050: 0.127330
Minibatch accuracy: 97.7%
Validation accu

### a post on accuracy intervals and significance from udacity forum
```
With my physics data analysis background, I'd claim an algo has been improved if the change in accuracy is beyond 3 standard deviations (3 sigmas). If it is smaller than 2 sigma there is 5% chance the better accuracy is just a fluke.
To be specific, for our assignment we have N=10k images in the test_dataset.
Let's pick accuracy(test_dataset)=0.94 (using fractions instead of % is better here).

acc=0.94 means there where B=(1-acc)*N= 600 miss-classified images.

Statistically speaking, 1 std error for B is sqrt(B)=24.5, hence 3 sigma = 73 images.
The 3sigma error for acc=0.940 is 73/N= 0.007 .

This means any other algorithm using 10k test images and yielding accuracy between 93.3% and 94.7% is statistically consistent with the base algo delivering accuracy of 94.0%.

To claim an algorithm is better one needs either: 
* keep 10k test images and improve accuracy beyond 94.7 % or 
* use larger set of test images to reduce statistical error for the accuracy or
* for the same 10k test images confirm exactly the same images were correctly identified plus few more correct identifications were made.
```

---
Problem 2
---------

Try to get the best performance you can using a convolutional net. Look for example at the classic [LeNet5](http://yann.lecun.com/exdb/lenet/) architecture, adding Dropout, and/or adding learning rate decay.

---

### Code of endri.deliu from udacity forum

In [None]:
batch_size = 16
patch_size = 3
depth = 16
num_hidden = 705
num_hidden_last = 205

graph = tf.Graph()

with graph.as_default():

  # Input data.
  tf_train_dataset = tf.placeholder(
    tf.float32, shape=(batch_size, image_size, image_size, num_channels))
  tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
  tf_valid_dataset = tf.constant(valid_dataset)
  tf_test_dataset = tf.constant(test_dataset)
  
  # Variables.
  layerconv1_weights = tf.Variable(tf.truncated_normal(
      [patch_size, patch_size, num_channels, depth], stddev=0.1))
  layerconv1_biases = tf.Variable(tf.zeros([depth]))
  layerconv2_weights = tf.Variable(tf.truncated_normal(
      [patch_size, patch_size, depth, depth * 2], stddev=0.1))
  layerconv2_biases = tf.Variable(tf.zeros([depth * 2]))
  
  layerconv3_weights = tf.Variable(tf.truncated_normal(
      [patch_size, patch_size, depth * 2, depth * 4], stddev=0.03))
  layerconv3_biases = tf.Variable(tf.zeros([depth * 4]))
  
  layerconv4_weights = tf.Variable(tf.truncated_normal(
      [patch_size, patch_size, depth * 4, depth * 4], stddev=0.03))
  layerconv4_biases = tf.Variable(tf.zeros([depth * 4]))
  

  layerconv5_weights = tf.Variable(tf.truncated_normal(
      [patch_size, patch_size, depth * 4, depth * 16], stddev=0.03))
  layerconv5_biases = tf.Variable(tf.zeros([depth * 16]))

    
  layer3_weights = tf.Variable(tf.truncated_normal(
      [image_size / 7 * image_size / 7 * (depth * 4), num_hidden], stddev=0.03))
  layer3_biases = tf.Variable(tf.zeros([num_hidden]))
  layer4_weights = tf.Variable(tf.truncated_normal(
      [num_hidden, num_hidden_last], stddev=0.0532))
  layer4_biases = tf.Variable(tf.zeros([num_hidden_last]))
  
  layer5_weights = tf.Variable(tf.truncated_normal(
      [num_hidden_last, num_labels], stddev=0.1))
  layer5_biases = tf.Variable(tf.zeros([num_labels]))
  

  # Model.
  def model(data, use_dropout=False):
    conv = tf.nn.conv2d(data, layerconv1_weights, [1, 1, 1, 1], padding='SAME')
    hidden = tf.nn.elu(conv + layerconv1_biases)
    pool = tf.nn.max_pool(hidden, [1, 2, 2, 1], [1, 2, 2, 1], padding='SAME')
    
    conv = tf.nn.conv2d(pool, layerconv2_weights, [1, 1, 1, 1], padding='SAME')
    hidden = tf.nn.elu(conv + layerconv2_biases)
    #pool = tf.nn.max_pool(hidden, [1, 2, 2, 1], [1, 2, 2, 1], padding='SAME')
    

    conv = tf.nn.conv2d(hidden, layerconv3_weights, [1, 1, 1, 1], padding='SAME')
    hidden = tf.nn.elu(conv + layerconv3_biases)
    pool = tf.nn.max_pool(hidden, [1, 2, 2, 1], [1, 2, 2, 1], padding='SAME')
    # norm1
    # norm1 = tf.nn.lrn(pool, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75)
    
    conv = tf.nn.conv2d(pool, layerconv4_weights, [1, 1, 1, 1], padding='SAME')
    hidden = tf.nn.elu(conv + layerconv4_biases)
    pool = tf.nn.max_pool(hidden, [1, 2, 2, 1], [1, 2, 2, 1], padding='SAME')
    # norm1 = tf.nn.lrn(pool, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75)

    
    conv = tf.nn.conv2d(pool, layerconv5_weights, [1, 1, 1, 1], padding='SAME')
    hidden = tf.nn.elu(conv + layerconv5_biases)
    pool = tf.nn.max_pool(hidden, [1, 2, 2, 1], [1, 2, 2, 1], padding='SAME')
    # norm1 = tf.nn.lrn(pool, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75)
    
    shape = pool.get_shape().as_list()
    print shape
    reshape = tf.reshape(pool, [shape[0], shape[1] * shape[2] * shape[3]])
    hidden = tf.nn.elu(tf.matmul(reshape, layer3_weights) + layer3_biases)
    
    if use_dropout:
        hidden = tf.nn.dropout(hidden, 0.75)
    
    nn_hidden_layer = tf.matmul(hidden, layer4_weights) + layer4_biases
    hidden = tf.nn.elu(nn_hidden_layer)
    
    if use_dropout:
        hidden = tf.nn.dropout(hidden, 0.75)
    
    
    return tf.matmul(hidden, layer5_weights) + layer5_biases
  
  # Training computation.
  logits = model(tf_train_dataset, True)
  loss = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_labels))
    
  global_step = tf.Variable(0)  # count the number of steps taken.
  learning_rate = tf.train.exponential_decay(0.1, global_step, 3000, 0.86, staircase=True)
  
  # Optimizer.
  optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
  
  # Predictions for the training, validation, and test data.
  train_prediction = tf.nn.softmax(logits)
  valid_prediction = tf.nn.softmax(model(tf_valid_dataset))
  test_prediction = tf.nn.softmax(model(tf_test_dataset))


num_steps = 5001
# original 95001

with tf.Session(graph=graph) as session:
  tf.initialize_all_variables().run()
  print "Initialized"
  for step in xrange(num_steps):
    offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
    batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
    batch_labels = train_labels[offset:(offset + batch_size), :]
    feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
    _, l, predictions = session.run(
      [optimizer, loss, train_prediction], feed_dict=feed_dict)
    if (step % 500 == 0):
      print "Minibatch loss at step", step, ":", l
      print "Minibatch accuracy: %.1f%%" % accuracy(predictions, batch_labels)
      print "Validation accuracy: %.1f%%" % accuracy(
        valid_prediction.eval(), valid_labels)
      print time.ctime()
  print "Test accuracy: %.1f%%" % accuracy(test_prediction.eval(), test_labels)


### Code of cobi.bento from udacity forum

In [None]:
batch_size = 16
num_channels = 1

c1_depth = 6
c1_ker_sz = 5
c3_depth = 16
c3_ker_sz = 6
c5_depth = 120
c5_ker_sz = 6

num_hidden = 84

graph = tf.Graph()

with graph.as_default():
    
        # Input data.
    tf_train_dataset = tf.placeholder(
    tf.float32, shape=(batch_size, image_size, image_size, num_channels))
    tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
    tf_valid_dataset = tf.constant(valid_dataset)
    tf_test_dataset = tf.constant(test_dataset)

    # Variables.
    c1_weights = tf.Variable(tf.truncated_normal(
      [c1_ker_sz, c1_ker_sz, num_channels, c1_depth], stddev=0.1))
    c1_biases = tf.Variable(tf.zeros([c1_depth]))
    c3_weights = tf.Variable(tf.truncated_normal(
      [c3_ker_sz, c3_ker_sz, c1_depth, c3_depth], stddev=0.1))
    c3_biases = tf.Variable(tf.constant(1.0, shape=[c3_depth]))
    c5_weights = tf.Variable(tf.truncated_normal(
      [c5_ker_sz, c5_ker_sz, c3_depth, c5_depth], stddev=0.1))
    c5_biases = tf.Variable(tf.constant(1.0, shape=[c5_depth]))
    c5_conv_dim = (((((image_size+1)//2) + 1) // 2) + 1 )//2
    fc_weights = tf.Variable(tf.truncated_normal(
      [c5_conv_dim * c5_conv_dim * c5_depth, num_hidden], stddev=0.1))
    fc_biases = tf.Variable(tf.constant(1.0, shape=[num_hidden]))
    out_weights = tf.Variable(tf.truncated_normal(
      [num_hidden, num_labels], stddev=0.1))
    out_biases = tf.Variable(tf.constant(1.0, shape=[num_labels]))

    # Model.
    def model(data):
        print(data.get_shape().as_list())
        conv = tf.nn.conv2d(data, c1_weights, [1, 1, 1, 1], padding='SAME')
        hidden = tf.nn.relu(conv + c1_biases)
        print(conv.get_shape().as_list())
        pooled = tf.nn.max_pool(hidden, [1, 2, 2, 1], [1, 2, 2, 1], padding='SAME')
        print(pooled.get_shape().as_list())
        conv = tf.nn.conv2d(pooled, c3_weights, [1, 1, 1, 1], padding='SAME')
        hidden = tf.nn.relu(conv + c3_biases)
        pooled = tf.nn.max_pool(hidden, [1, 2, 2, 1], [1, 2, 2, 1], padding='SAME')
        shape = pooled.get_shape().as_list()
        print(shape)
        conv = tf.nn.conv2d(pooled, c5_weights, [1, 1, 1, 1], padding='SAME')
        hidden = tf.nn.relu(conv + c5_biases)
        pooled = tf.nn.max_pool(hidden, [1, 2, 2, 1], [1, 2, 2, 1], padding='SAME')
        shape = pooled.get_shape().as_list()
        print(shape)
        reshape = tf.reshape(pooled, [shape[0], shape[1] * shape[2] * shape[3]])
        hidden = tf.nn.relu(tf.matmul(reshape, fc_weights) + fc_biases)
        return tf.matmul(hidden, out_weights) + out_biases

    # Training computation.
    logits = model(tf_train_dataset)
    loss = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_labels))

    # Optimizer.
    #optimizer = tf.train.GradientDescentOptimizer(0.05).minimize(loss)
    optimizer = tf.train.AdagradOptimizer(0.05).minimize(loss)

    # Predictions for the training, validation, and test data.
    train_prediction = tf.nn.softmax(logits)
    valid_prediction = tf.nn.softmax(model(tf_valid_dataset))
    test_prediction = tf.nn.softmax(model(tf_test_dataset))
