Deep Learning
=============

Assignment 4
------------

Previously in `2_fullyconnected.ipynb` and `3_regularization.ipynb`, we trained fully connected networks to classify [notMNIST](http://yaroslavvb.blogspot.com/2011/09/notmnist-dataset.html) characters.

The goal of this assignment is make the neural network convolutional.

In [6]:
# These are all the modules we'll be using later. Make sure you can import them
# before proceeding further.
from __future__ import print_function
import numpy as np
import tensorflow as tf
from six.moves import cPickle as pickle
from six.moves import range

In [7]:
data_root = '/Users/kmi_local/Documents/Work/projects/Udacity/tensorflow/tensorflow/examples/udacity/data' # Change me to store data elsewhere
pickle_file = data_root + '/notMNIST.pickle'

#pickle_file = 'notMNIST.pickle'

with open(pickle_file, 'rb') as f:
  save = pickle.load(f)
  train_dataset = save['train_dataset']
  train_labels_init = save['train_labels']
  valid_dataset = save['valid_dataset']
  valid_labels_init = save['valid_labels']
  test_dataset = save['test_dataset']
  test_labels_init = save['test_labels']
  del save  # hint to help gc free up memory
  print('Training set', train_dataset.shape, train_labels_init.shape)
  print('Validation set', valid_dataset.shape, valid_labels_init.shape)
  print('Test set', test_dataset.shape, test_labels_init.shape)

Training set (200000, 28, 28) (200000,)
Validation set (10000, 28, 28) (10000,)
Test set (10000, 28, 28) (10000,)


Reformat into a TensorFlow-friendly shape:
- convolutions need the image data formatted as a cube (width by height by #channels)
- labels as float 1-hot encodings.

In [8]:
image_size = 28
num_labels = 10
num_channels = 1 # grayscale

import numpy as np

def reformat(dataset, labels):
  dataset = dataset.reshape(
    (-1, image_size, image_size, num_channels)).astype(np.float32)
  labels = (np.arange(num_labels) == labels[:,None]).astype(np.float32)
  return dataset, labels
train_dataset, train_labels = reformat(train_dataset, train_labels_init)
valid_dataset, valid_labels = reformat(valid_dataset, valid_labels_init)
test_dataset, test_labels = reformat(test_dataset, test_labels_init)
print('Training set', train_dataset.shape, train_labels.shape)
print('Validation set', valid_dataset.shape, valid_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)

Training set (200000, 28, 28, 1) (200000, 10)
Validation set (10000, 28, 28, 1) (10000, 10)
Test set (10000, 28, 28, 1) (10000, 10)


In [10]:
def accuracy(predictions, labels):
  return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
          / predictions.shape[0])

Let's build a small network with two convolutional layers, followed by one fully connected layer. Convolutional networks are more expensive computationally, so we'll limit its depth and number of fully connected nodes.

In [5]:
batch_size = 16
patch_size = 5
depth = 16
num_hidden = 64

graph = tf.Graph()

with graph.as_default():

  # Input data.
  tf_train_dataset = tf.placeholder(
    tf.float32, shape=(batch_size, image_size, image_size, num_channels))
  tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
  tf_valid_dataset = tf.constant(valid_dataset)
  tf_test_dataset = tf.constant(test_dataset)
  
  # Variables.
  layer1_weights = tf.Variable(tf.truncated_normal(
      [patch_size, patch_size, num_channels, depth], stddev=0.1))
  layer1_biases = tf.Variable(tf.zeros([depth]))
  layer2_weights = tf.Variable(tf.truncated_normal(
      [patch_size, patch_size, depth, depth], stddev=0.1))
  layer2_biases = tf.Variable(tf.constant(1.0, shape=[depth]))
  layer3_weights = tf.Variable(tf.truncated_normal(
      [image_size // 4 * image_size // 4 * depth, num_hidden], stddev=0.1))
  layer3_biases = tf.Variable(tf.constant(1.0, shape=[num_hidden]))
  layer4_weights = tf.Variable(tf.truncated_normal(
      [num_hidden, num_labels], stddev=0.1))
  layer4_biases = tf.Variable(tf.constant(1.0, shape=[num_labels]))
  
  # Model.
  def model(data):
    conv = tf.nn.conv2d(data, layer1_weights, [1, 2, 2, 1], padding='SAME')
    hidden = tf.nn.relu(conv + layer1_biases)
    conv = tf.nn.conv2d(hidden, layer2_weights, [1, 2, 2, 1], padding='SAME')
    hidden = tf.nn.relu(conv + layer2_biases)
    shape = hidden.get_shape().as_list()
    reshape = tf.reshape(hidden, [shape[0], shape[1] * shape[2] * shape[3]])
    hidden = tf.nn.relu(tf.matmul(reshape, layer3_weights) + layer3_biases)
    return tf.matmul(hidden, layer4_weights) + layer4_biases
  
  # Training computation.
  logits = model(tf_train_dataset)
  loss = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(labels=tf_train_labels, logits=logits))
    
  # Optimizer.
  optimizer = tf.train.GradientDescentOptimizer(0.05).minimize(loss)
  
  # Predictions for the training, validation, and test data.
  train_prediction = tf.nn.softmax(logits)
  valid_prediction = tf.nn.softmax(model(tf_valid_dataset))
  test_prediction = tf.nn.softmax(model(tf_test_dataset))

In [6]:
num_steps = 1001

with tf.Session(graph=graph) as session:
  tf.initialize_all_variables().run()  
  #tf.global_variables_initializer().run()
  print('Initialized')
  for step in range(num_steps):
    offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
    batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
    batch_labels = train_labels[offset:(offset + batch_size), :]
    feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
    _, l, predictions = session.run(
      [optimizer, loss, train_prediction], feed_dict=feed_dict)
    if (step % 50 == 0):
      print('Minibatch loss at step %d: %f' % (step, l))
      print('Minibatch accuracy: %.1f%%' % accuracy(predictions, batch_labels))
      print('Validation accuracy: %.1f%%' % accuracy(
        valid_prediction.eval(), valid_labels))
  print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(), test_labels))

Initialized
Minibatch loss at step 0: 2.823269
Minibatch accuracy: 12.5%
Validation accuracy: 10.5%
Minibatch loss at step 50: 2.061256
Minibatch accuracy: 12.5%
Validation accuracy: 42.1%
Minibatch loss at step 100: 1.154384
Minibatch accuracy: 68.8%
Validation accuracy: 66.1%
Minibatch loss at step 150: 0.380844
Minibatch accuracy: 87.5%
Validation accuracy: 76.1%
Minibatch loss at step 200: 1.027484
Minibatch accuracy: 75.0%
Validation accuracy: 76.0%
Minibatch loss at step 250: 1.167300
Minibatch accuracy: 68.8%
Validation accuracy: 77.7%
Minibatch loss at step 300: 0.358825
Minibatch accuracy: 87.5%
Validation accuracy: 79.2%
Minibatch loss at step 350: 0.258336
Minibatch accuracy: 93.8%
Validation accuracy: 77.8%
Minibatch loss at step 400: 0.329127
Minibatch accuracy: 100.0%
Validation accuracy: 80.7%
Minibatch loss at step 450: 0.672386
Minibatch accuracy: 87.5%
Validation accuracy: 79.1%
Minibatch loss at step 500: 0.644059
Minibatch accuracy: 87.5%
Validation accuracy: 80.7%


---
Problem 1
---------

The convolutional model above uses convolutions with stride 2 to reduce the dimensionality. Replace the strides by a max pooling operation (`nn.max_pool()`) of stride 2 and kernel size 2.

---

In [7]:
batch_size = 16
patch_size = 5
depth = 16
num_hidden = 64

graph = tf.Graph()

with graph.as_default():

  # Input data.
  tf_train_dataset = tf.placeholder(
    tf.float32, shape=(batch_size, image_size, image_size, num_channels))
  tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
  tf_valid_dataset = tf.constant(valid_dataset)
  tf_test_dataset = tf.constant(test_dataset)
  
  # Variables.
  layer1_weights = tf.Variable(tf.truncated_normal(
      [patch_size, patch_size, num_channels, depth], stddev=0.1))
  layer1_biases = tf.Variable(tf.zeros([depth]))
  layer2_weights = tf.Variable(tf.truncated_normal(
      [patch_size, patch_size, depth, depth], stddev=0.1))
  layer2_biases = tf.Variable(tf.constant(1.0, shape=[depth]))
  layer3_weights = tf.Variable(tf.truncated_normal(
      [image_size // 4 * image_size // 4 * depth, num_hidden], stddev=0.1))
  layer3_biases = tf.Variable(tf.constant(1.0, shape=[num_hidden]))
  layer4_weights = tf.Variable(tf.truncated_normal(
      [num_hidden, num_labels], stddev=0.1))
  layer4_biases = tf.Variable(tf.constant(1.0, shape=[num_labels]))
  
  # Model.
  def model(data):
        
    conv1 = tf.nn.conv2d(data, layer1_weights, [1, 1, 1, 1], padding='SAME')
    hidden1 = tf.nn.relu(conv1 + layer1_biases)
    pool1 = tf.nn.max_pool(hidden1, ksize = [1, 2, 2, 1], strides = [1, 2, 2, 1], padding='SAME')
    
    conv2 = tf.nn.conv2d(pool1, layer2_weights, [1, 1, 1, 1], padding='SAME')
    hidden2 = tf.nn.relu(conv2 + layer2_biases)
    pool2 = tf.nn.max_pool(hidden2, ksize = [1, 2, 2, 1], strides = [1, 2, 2, 1], padding='SAME')
    
    shape = pool2.get_shape().as_list()
    reshape = tf.reshape(pool2, [shape[0], shape[1] * shape[2] * shape[3]])
    hidden3 = tf.nn.relu(tf.matmul(reshape, layer3_weights) + layer3_biases)

    #if train:
    #  hidden = tf.nn.dropout(hidden, 0.5)
    return tf.matmul(hidden3, layer4_weights) + layer4_biases
  
  # Training computation.
  logits = model(tf_train_dataset)
  loss = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(labels=tf_train_labels, logits=logits))
    
  # Optimizer.
  optimizer = tf.train.GradientDescentOptimizer(0.05).minimize(loss)
  
  # Predictions for the training, validation, and test data.
  train_prediction = tf.nn.softmax(logits)
  valid_prediction = tf.nn.softmax(model(tf_valid_dataset))
  test_prediction = tf.nn.softmax(model(tf_test_dataset))

In [8]:
num_steps = 1001

with tf.Session(graph=graph) as session:
  tf.initialize_all_variables().run()  
  #tf.global_variables_initializer().run()
  print('Initialized')
  for step in range(num_steps):
    offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
    batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
    batch_labels = train_labels[offset:(offset + batch_size), :]
    feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
    _, l, predictions = session.run(
      [optimizer, loss, train_prediction], feed_dict=feed_dict)
    if (step % 50 == 0):
      print('Minibatch loss at step %d: %f' % (step, l))
      print('Minibatch accuracy: %.1f%%' % accuracy(predictions, batch_labels))
      print('Validation accuracy: %.1f%%' % accuracy(
        valid_prediction.eval(), valid_labels))
  print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(), test_labels))

Initialized
Minibatch loss at step 0: 5.572154
Minibatch accuracy: 6.2%
Validation accuracy: 10.0%
Minibatch loss at step 50: 1.884740
Minibatch accuracy: 25.0%
Validation accuracy: 39.1%
Minibatch loss at step 100: 1.476767
Minibatch accuracy: 43.8%
Validation accuracy: 51.8%
Minibatch loss at step 150: 0.610631
Minibatch accuracy: 87.5%
Validation accuracy: 71.4%
Minibatch loss at step 200: 0.877909
Minibatch accuracy: 75.0%
Validation accuracy: 78.0%
Minibatch loss at step 250: 1.356253
Minibatch accuracy: 68.8%
Validation accuracy: 77.0%
Minibatch loss at step 300: 0.372730
Minibatch accuracy: 87.5%
Validation accuracy: 80.2%
Minibatch loss at step 350: 0.532799
Minibatch accuracy: 93.8%
Validation accuracy: 79.7%
Minibatch loss at step 400: 0.201533
Minibatch accuracy: 100.0%
Validation accuracy: 81.0%
Minibatch loss at step 450: 0.819259
Minibatch accuracy: 87.5%
Validation accuracy: 79.2%
Minibatch loss at step 500: 0.634108
Minibatch accuracy: 87.5%
Validation accuracy: 81.6%
M

---
Problem 2
---------

Try to get the best performance you can using a convolutional net. Look for example at the classic [LeNet5](http://yann.lecun.com/exdb/lenet/) architecture, adding Dropout, and/or adding learning rate decay.

---

Using https://github.com/sujaybabruwad/LeNet-in-Tensorflow/blob/master/LeNet-Lab.ipynb
In order to reformat the MNIST data into a shape that LeNet will accept, we pad the data with two rows of zeros on the top and bottom, and two columns of zeros on the left and right (28+2+2 = 32).

You do not need to modify this section.

In [9]:
import numpy as np

# Pad images with 0s
train_dataset_lenet = np.pad(train_dataset, ((0,0),(2,2),(2,2),(0,0)), 'constant')
valid_dataset_lenet = np.pad(valid_dataset, ((0,0),(2,2),(2,2),(0,0)), 'constant')
test_dataset_lenet = np.pad(test_dataset, ((0,0),(2,2),(2,2),(0,0)), 'constant')
    
print("Updated Image Shape: {}".format(train_dataset_lenet.shape))
print("Updated Image Shape: {}".format(valid_dataset_lenet.shape))
print("Updated Image Shape: {}".format(test_dataset_lenet.shape))



from sklearn.utils import shuffle

train_dataset_lenet, train_labels_lenet = shuffle(train_dataset_lenet, train_labels_init)
print("Label sizes")
print(train_labels_lenet.shape)
print(valid_labels_init.shape)
print(test_labels_init.shape)


Updated Image Shape: (200000, 32, 32, 1)
Updated Image Shape: (10000, 32, 32, 1)
Updated Image Shape: (10000, 32, 32, 1)
Label sizes
(200000,)
(10000,)
(10000,)



Setup TensorFlow

The EPOCH and BATCH_SIZE values affect the training speed and model accuracy.

You do not need to modify this section.


In [10]:
EPOCHS = 10
BATCH_SIZE = 128

Implement LeNet-5

Input

The LeNet architecture accepts a 32x32xC image as input, where C is the number of color channels. Since MNIST images are grayscale, C is 1 in this case.
Architecture

Layer 1: Convolutional. The output shape should be 28x28x6.

Activation. Your choice of activation function.

Pooling. The output shape should be 14x14x6.

Layer 2: Convolutional. The output shape should be 10x10x16.

Activation. Your choice of activation function.

Pooling. The output shape should be 5x5x16.

Flatten. Flatten the output shape of the final pooling layer such that it's 1D instead of 3D. The easiest way to do is by using tf.contrib.layers.flatten, which is already imported for you.

Layer 3: Fully Connected. This should have 120 outputs.

Activation. Your choice of activation function.

Layer 4: Fully Connected. This should have 84 outputs.

Activation. Your choice of activation function.

Layer 5: Fully Connected (Logits). This should have 10 outputs.
Output

Return the result of the 2nd fully connected layer.

In [11]:
from tensorflow.contrib.layers import flatten

def LeNet(x, keep_prob):    
    # Hyperparameters
    mu = 0
    sigma = 0.1
    layer_depth = {
        'layer_1' : 6,
        'layer_2' : 16,
        'layer_3' : 120,
        'layer_f1' : 84
    }

    
    # TODO: Layer 1: Convolutional. Input = 32x32x1. Output = 28x28x6.
    conv1_w = tf.Variable(tf.truncated_normal(shape = [5,5,1,6],mean = mu, stddev = sigma))
    conv1_b = tf.Variable(tf.zeros(6))
    conv1 = tf.nn.conv2d(x,conv1_w, strides = [1,1,1,1], padding = 'VALID') + conv1_b 
    # TODO: Activation.
    conv1 = tf.nn.relu(conv1)

    # TODO: Pooling. Input = 28x28x6. Output = 14x14x6.
    pool_1 = tf.nn.max_pool(conv1,ksize = [1,2,2,1], strides = [1,2,2,1], padding = 'VALID')
    
    # TODO: Layer 2: Convolutional. Output = 10x10x16.
    conv2_w = tf.Variable(tf.truncated_normal(shape = [5,5,6,16], mean = mu, stddev = sigma))
    conv2_b = tf.Variable(tf.zeros(16))
    conv2 = tf.nn.conv2d(pool_1, conv2_w, strides = [1,1,1,1], padding = 'VALID') + conv2_b
    # TODO: Activation.
    conv2 = tf.nn.relu(conv2)

    # TODO: Pooling. Input = 10x10x16. Output = 5x5x16.
    pool_2 = tf.nn.max_pool(conv2, ksize = [1,2,2,1], strides = [1,2,2,1], padding = 'VALID') 
    
    # TODO: Flatten. Input = 5x5x16. Output = 400.
    fc1 = flatten(pool_2)
    
    # TODO: Layer 3: Fully Connected. Input = 400. Output = 120.
    fc1_w = tf.Variable(tf.truncated_normal(shape = (400,120), mean = mu, stddev = sigma))
    fc1_b = tf.Variable(tf.zeros(120))
    fc1 = tf.matmul(fc1,fc1_w) + fc1_b
    
    # TODO: Activation.
    fc1 = tf.nn.relu(fc1)

    # TODO: Layer 4: Fully Connected. Input = 120. Output = 84.
    fc2_w = tf.Variable(tf.truncated_normal(shape = (120,84), mean = mu, stddev = sigma))
    fc2_b = tf.Variable(tf.zeros(84))
    fc2 = tf.matmul(fc1,fc2_w) + fc2_b
    # TODO: Activation.
    fc2 = tf.nn.relu(fc2)
    h_fc2_drop = tf.nn.dropout(fc2, keep_prob)
    
    # TODO: Layer 5: Fully Connected. Input = 84. Output = 10.
    fc3_w = tf.Variable(tf.truncated_normal(shape = (84,10), mean = mu , stddev = sigma))
    fc3_b = tf.Variable(tf.zeros(10))
    
    logits = tf.matmul(h_fc2_drop, fc3_w) + fc3_b
    return logits

In [12]:
x = tf.placeholder(tf.float32, (None, 32, 32, 1))
y = tf.placeholder(tf.int32, (None))
one_hot_y = tf.one_hot(y, 10)

In [13]:
rate = 0.001

logits = LeNet(x, 0.5)
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits, one_hot_y)
loss_operation = tf.reduce_mean(cross_entropy)
optimizer = tf.train.AdamOptimizer(learning_rate = rate)
training_operation = optimizer.minimize(loss_operation)

correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(one_hot_y, 1))
accuracy_operation = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
saver = tf.train.Saver()

def evaluate(X_data, y_data):
    num_examples = len(X_data)
    total_accuracy = 0
    sess = tf.get_default_session()
    for offset in range(0, num_examples, BATCH_SIZE):
        batch_x, batch_y = X_data[offset:offset+BATCH_SIZE], y_data[offset:offset+BATCH_SIZE]
        accuracy = sess.run(accuracy_operation, feed_dict={x: batch_x, one_hot_y: batch_y})
        total_accuracy += (accuracy * len(batch_x))
    return total_accuracy / num_examples


Train the Model

Run the training data through the training pipeline to train the model.

Before each epoch, shuffle the training set.

After each epoch, measure the loss and accuracy of the validation set.

Save the model after training.

You do not need to modify this section.


In [17]:
with tf.Session() as sess:
    sess.run(tf.initialize_all_variables())
    num_examples = len(train_dataset_lenet)
    
    print("Training...")
    print()
    for i in range(EPOCHS):
        train_dataset_lenet, train_labels_lenet = shuffle(train_dataset_lenet, train_labels_lenet)
        for offset in range(0, num_examples, BATCH_SIZE):
            end = offset + BATCH_SIZE
            batch_x, batch_y = train_dataset_lenet[offset:end], train_labels_lenet[offset:end]
            sess.run(training_operation, feed_dict={x: batch_x, y: batch_y})
            
        #training_accuracy = evaluate(train_dataset_lenet, train_labels_lenet)
        validation_accuracy = evaluate(valid_dataset_lenet, valid_labels)
        print("EPOCH {} ...".format(i+1))
        #print("Training Accuracy = {:.3f}".format(training_accuracy))
        print("Validation Accuracy = {:.3f}".format(validation_accuracy))
        print()
        
    saver.save(sess, 'lenet')
    print("Model saved")

Training...

EPOCH 1 ...
Validation Accuracy = 0.882

EPOCH 2 ...
Validation Accuracy = 0.892

EPOCH 3 ...
Validation Accuracy = 0.901

EPOCH 4 ...
Validation Accuracy = 0.905

EPOCH 5 ...
Validation Accuracy = 0.907

EPOCH 6 ...
Validation Accuracy = 0.909

EPOCH 7 ...
Validation Accuracy = 0.912

EPOCH 8 ...
Validation Accuracy = 0.910

EPOCH 9 ...
Validation Accuracy = 0.912

EPOCH 10 ...
Validation Accuracy = 0.914

Model saved


In [18]:
with tf.Session() as sess:
    saver.restore(sess, tf.train.latest_checkpoint('.'))

    test_accuracy = evaluate(test_dataset_lenet, test_labels)
    print("Test Accuracy = {:.3f}".format(test_accuracy))


Test Accuracy = 0.965


pure lenet without dropout, etc. gives Test Accuracy = 0.965

# Problem 2: Lenet5 soluton adopted to the tensorflow deep learning class

prepare datasets so they have the same input size as in lenet5

In [11]:
import numpy as np

# Pad images with 0s
train_dataset_lenet = np.pad(train_dataset, ((0,0),(2,2),(2,2),(0,0)), 'constant')
valid_dataset_lenet = np.pad(valid_dataset, ((0,0),(2,2),(2,2),(0,0)), 'constant')
test_dataset_lenet = np.pad(test_dataset, ((0,0),(2,2),(2,2),(0,0)), 'constant')
    
print("Updated Image Shape: {}".format(train_dataset_lenet.shape))
print("Updated Image Shape: {}".format(valid_dataset_lenet.shape))
print("Updated Image Shape: {}".format(test_dataset_lenet.shape))


# Shuffle the training data
from sklearn.utils import shuffle
train_dataset_lenet, train_labels_lenet = shuffle(train_dataset_lenet, train_labels)

Updated Image Shape: (200000, 32, 32, 1)
Updated Image Shape: (10000, 32, 32, 1)
Updated Image Shape: (10000, 32, 32, 1)


In [32]:
from tensorflow.contrib.layers import flatten

batch_size = 128 #16
epochs = 10


num_channels = 1 # grayscale
image_size = 32
patch_size = 5
num_labels = 10


# Hyperparameters
mu = 0
sigma = 0.1
layer_depth = {
    'layer_1' : 6,
    'layer_2' : 16,
    'layer_3' : 120,
    'layer_f1' : 84
}

graph = tf.Graph()

with graph.as_default():

  # Input data.
  tf_train_dataset = tf.placeholder(
    tf.float32, shape=(batch_size, image_size, image_size, num_channels))
  tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
  tf_valid_dataset = tf.constant(valid_dataset_lenet)
  tf_test_dataset = tf.constant(test_dataset_lenet)    
    
  # Variables.
  # Layer 1: Convolutional. Input = 32x32x1. Output = 28x28x6.
  layer1_weights = tf.Variable(tf.truncated_normal(
      [patch_size, patch_size, num_channels, layer_depth['layer_1']], mean = mu, stddev=sigma))
  layer1_biases = tf.Variable(tf.zeros([layer_depth['layer_1']]))
  
  # Layer 2: Convolutional. Output = 10x10x16.
  layer2_weights = tf.Variable(tf.truncated_normal(
      [patch_size, patch_size,  layer_depth['layer_1'],  layer_depth['layer_2']], mean = mu, stddev = sigma))
  layer2_biases = tf.Variable(tf.constant(1.0, shape=[layer_depth['layer_2']]))
  
  # Layer 3: Fully Connected. Input = 400. Output = 120.
  layer3_weights = tf.Variable(tf.truncated_normal(
      shape = (patch_size*patch_size*layer_depth['layer_2'], layer_depth['layer_3']), mean = mu, stddev = sigma))
  layer3_biases = tf.Variable(tf.constant(1.0, shape=[layer_depth['layer_3']]))

  # Layer 4: Fully Connected. Input = 120. Output = 84.
  layer4_weights = tf.Variable(tf.truncated_normal(
      shape = (layer_depth['layer_3'], layer_depth['layer_f1']), mean = mu, stddev = sigma))
  layer4_biases = tf.Variable(tf.constant(1.0, shape=[layer_depth['layer_f1']]))

  # Layer 5: Fully Connected. Input = 84. Output = 10.
  layer5_weights = tf.Variable(tf.truncated_normal(shape = (layer_depth['layer_f1'],num_labels), mean = mu , stddev = sigma))
  layer5_biases = tf.Variable(tf.zeros(num_labels))

  
  # Model.
  def model(data, keep_prob):
    # Layer 1: Convolutional. Input = 32x32x1. Output = 28x28x6.    
    conv1 = tf.nn.conv2d(data, layer1_weights, strides = [1, 1, 1, 1], padding='VALID')
    # Activation
    hidden1 = tf.nn.relu(conv1 + layer1_biases)
    # Pooling. Input = 28x28x6. Output = 14x14x6.
    pool1 = tf.nn.max_pool(hidden1, ksize = [1, 2, 2, 1], strides = [1, 2, 2, 1], padding='VALID')
    
    # Layer 2: Convolutional. Output = 10x10x16.
    conv2 = tf.nn.conv2d(pool1, layer2_weights, [1, 1, 1, 1], padding='VALID')
    # Activation
    hidden2 = tf.nn.relu(conv2 + layer2_biases)
    # Pooling. Input = 10x10x16. Output = 5x5x16.
    pool2 = tf.nn.max_pool(hidden2, ksize = [1, 2, 2, 1], strides = [1, 2, 2, 1], padding='VALID')
    
    # Flatten. Input = 5x5x16. Output = 400.
    fc1 = flatten(pool2)
    
    # Layer 3: Fully Connected. Input = 400. Output = 120.
    fc1 = tf.matmul(fc1, layer3_weights)
    # Activation.
    fc1 = tf.nn.relu(fc1 + layer3_biases)
    
    # Layer 4: Fully Connected. Input = 120. Output = 84.
    fc2 = tf.matmul(fc1, layer4_weights)
    # Activation.
    fc2 = tf.nn.relu(fc2 + layer4_biases)

    # with dropout
    fc2_drop = tf.nn.dropout(fc2, keep_prob)
    logits = tf.matmul(fc2_drop, layer5_weights) + layer5_biases


    # Layer 5: Fully Connected. Input = 84. Output = 10.
    #without dropout
    #logits = tf.matmul(fc2, layer5_weights) + layer5_biases
    
    
    return logits
  
  # Training computation.
  logits = model(tf_train_dataset, 0.5)
  loss = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(labels=tf_train_labels, logits=logits))

  #global_step = tf.Variable(0)  # count the number of steps taken
  #learning_rate = tf.train.exponential_decay(0.005, global_step, 100, 0.8)
  #optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)


    
  # Optimizer.
  # without learning rate decay
  optimizer = tf.train.GradientDescentOptimizer(0.05).minimize(loss)  # Decaying learning rate
  #global_step = tf.Variable(0, trainable=False)
  #learning_rate = tf.train.exponential_decay(0.5, global_step, int(num_steps) * 2, 0.96, staircase = True)
  #optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)


    
  
  # Predictions for the training, validation, and test data.
  train_prediction = tf.nn.softmax(logits)
  valid_prediction = tf.nn.softmax(model(tf_valid_dataset, 1.0))
  test_prediction = tf.nn.softmax(model(tf_test_dataset, 1.0))

In [33]:
num_steps = 30001 #10600

with tf.Session(graph=graph) as session:
  tf.initialize_all_variables().run()  
  #tf.global_variables_initializer().run()
  print('Initialized')
  for step in range(num_steps):
    offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
    batch_data = train_dataset_lenet[offset:(offset + batch_size), :, :, :]
    batch_labels = train_labels_lenet[offset:(offset + batch_size), :]
    feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
    _, l, predictions = session.run(
      [optimizer, loss, train_prediction], feed_dict=feed_dict)
    if (step % 500 == 0):
      print('Minibatch loss at step %d: %f' % (step, l))
      print('Minibatch accuracy: %.1f%%' % accuracy(predictions, batch_labels))
      print('Validation accuracy: %.1f%%' % accuracy(
        valid_prediction.eval(), valid_labels))
  print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(), test_labels))

Initialized
Minibatch loss at step 0: 3.515203
Minibatch accuracy: 10.2%
Validation accuracy: 9.4%
Minibatch loss at step 500: 0.650398
Minibatch accuracy: 78.9%
Validation accuracy: 81.5%
Minibatch loss at step 1000: 0.480263
Minibatch accuracy: 88.3%
Validation accuracy: 83.6%
Minibatch loss at step 1500: 0.500360
Minibatch accuracy: 87.5%
Validation accuracy: 85.2%
Minibatch loss at step 2000: 0.462559
Minibatch accuracy: 85.9%
Validation accuracy: 85.9%
Minibatch loss at step 2500: 0.455038
Minibatch accuracy: 84.4%
Validation accuracy: 86.5%
Minibatch loss at step 3000: 0.383543
Minibatch accuracy: 87.5%
Validation accuracy: 87.0%
Minibatch loss at step 3500: 0.519633
Minibatch accuracy: 82.8%
Validation accuracy: 87.4%
Minibatch loss at step 4000: 0.433288
Minibatch accuracy: 85.9%
Validation accuracy: 87.6%
Minibatch loss at step 4500: 0.312240
Minibatch accuracy: 89.1%
Validation accuracy: 87.9%
Minibatch loss at step 5000: 0.588668
Minibatch accuracy: 82.0%
Validation accuracy

for 10600 steps reached accuracy on test dataset is 95.5%

for 30001 steps reached accuracy on test dataset is 96.1%

with dropout 95.7%