Deep Learning
=============

Assignment 4
------------

Previously in `2_fullyconnected.ipynb` and `3_regularization.ipynb`, we trained fully connected networks to classify [notMNIST](http://yaroslavvb.blogspot.com/2011/09/notmnist-dataset.html) characters.

The goal of this assignment is make the neural network convolutional.

In [1]:
# These are all the modules we'll be using later. Make sure you can import them
# before proceeding further.
from __future__ import print_function
import numpy as np
import tensorflow as tf
from six.moves import cPickle as pickle
from six.moves import range
from time import time

In [2]:
pickle_file = '/input/notMNIST.pickle'

with open(pickle_file, 'rb') as f:
  save = pickle.load(f)
  train_dataset = save['train_dataset']
  train_labels = save['train_labels']
  valid_dataset = save['valid_dataset']
  valid_labels = save['valid_labels']
  test_dataset = save['test_dataset']
  test_labels = save['test_labels']
  del save  # hint to help gc free up memory
  print('Training set', train_dataset.shape, train_labels.shape)
  print('Validation set', valid_dataset.shape, valid_labels.shape)
  print('Test set', test_dataset.shape, test_labels.shape)

Training set (200000, 28, 28) (200000,)
Validation set (10000, 28, 28) (10000,)
Test set (10000, 28, 28) (10000,)


Reformat into a TensorFlow-friendly shape:
- convolutions need the image data formatted as a cube (width by height by #channels)
- labels as float 1-hot encodings.

In [3]:
image_size = 28
num_labels = 10
num_channels = 1 # grayscale

import numpy as np

def reformat(dataset, labels):
  dataset = dataset.reshape(
    (-1, image_size, image_size, num_channels)).astype(np.float32)
  labels = (np.arange(num_labels) == labels[:,None]).astype(np.float32)
  return dataset, labels
train_dataset, train_labels = reformat(train_dataset, train_labels)
valid_dataset, valid_labels = reformat(valid_dataset, valid_labels)
test_dataset, test_labels = reformat(test_dataset, test_labels)
print('Training set', train_dataset.shape, train_labels.shape)
print('Validation set', valid_dataset.shape, valid_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)

Training set (200000, 28, 28, 1) (200000, 10)
Validation set (10000, 28, 28, 1) (10000, 10)
Test set (10000, 28, 28, 1) (10000, 10)


In [4]:
def accuracy(predictions, labels):
  return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
          / predictions.shape[0])

Let's build a small network with two convolutional layers, followed by one fully connected layer. Convolutional networks are more expensive computationally, so we'll limit its depth and number of fully connected nodes.

In [5]:
batch_size = 16
patch_size = 5
depth = 16
num_hidden = 64

graph = tf.Graph()

tf.set_random_seed(1)

with graph.as_default():

  # Input data.
  tf_train_dataset = tf.placeholder(
    tf.float32, shape=(batch_size, image_size, image_size, num_channels))
  tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
  tf_valid_dataset = tf.constant(valid_dataset)
  tf_test_dataset = tf.constant(test_dataset)
  
  # Variables.
  layer1_weights = tf.Variable(tf.truncated_normal(
      [patch_size, patch_size, num_channels, depth], stddev=0.1))
  layer1_biases = tf.Variable(tf.zeros([depth]))
  layer2_weights = tf.Variable(tf.truncated_normal(
      [patch_size, patch_size, depth, depth], stddev=0.1))
  layer2_biases = tf.Variable(tf.constant(1.0, shape=[depth]))
  layer3_weights = tf.Variable(tf.truncated_normal(
      [image_size // 4 * image_size // 4 * depth, num_hidden], stddev=0.1))
  layer3_biases = tf.Variable(tf.constant(1.0, shape=[num_hidden]))
  layer4_weights = tf.Variable(tf.truncated_normal(
      [num_hidden, num_labels], stddev=0.1))
  layer4_biases = tf.Variable(tf.constant(1.0, shape=[num_labels]))
  
  # Model.
  def model(data):
    conv = tf.nn.conv2d(data, layer1_weights, [1, 2, 2, 1], padding='SAME')
    hidden = tf.nn.relu(conv + layer1_biases)
    conv = tf.nn.conv2d(hidden, layer2_weights, [1, 2, 2, 1], padding='SAME')
    hidden = tf.nn.relu(conv + layer2_biases)
    shape = hidden.get_shape().as_list()
    reshape = tf.reshape(hidden, [shape[0], shape[1] * shape[2] * shape[3]])
    hidden = tf.nn.relu(tf.matmul(reshape, layer3_weights) + layer3_biases)
    return tf.matmul(hidden, layer4_weights) + layer4_biases
  
  # Training computation.
  logits = model(tf_train_dataset)
  loss = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(labels=tf_train_labels, logits=logits))
    
  # Optimizer.
  optimizer = tf.train.GradientDescentOptimizer(0.05).minimize(loss)
  
  # Predictions for the training, validation, and test data.
  train_prediction = tf.nn.softmax(logits)
  valid_prediction = tf.nn.softmax(model(tf_valid_dataset))
  test_prediction = tf.nn.softmax(model(tf_test_dataset))

In [16]:
def run(num_steps):
    start = time()
    with tf.Session(graph=graph) as session:
      tf.global_variables_initializer().run()
      print('Initialized')
      for step in range(num_steps):
        offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
        batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
        batch_labels = train_labels[offset:(offset + batch_size), :]
        feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
        _, l, predictions = session.run(
          [optimizer, loss, train_prediction], feed_dict=feed_dict)
        if (step % 1000 == 0):
          print('Minibatch loss at step %d: %f' % (step, l))
          print('Minibatch accuracy: %.1f%%' % accuracy(predictions, batch_labels))
          print('Validation accuracy: %.1f%%' % accuracy(
            valid_prediction.eval(), valid_labels))
      print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(), test_labels))
    print("Elapsed %.3f seconds" % (time() - start))

In [7]:
run(1001)

Initialized
Minibatch loss at step 0: 3.392307
Minibatch accuracy: 12.5%
Validation accuracy: 10.3%
Minibatch loss at step 100: 1.226505
Minibatch accuracy: 50.0%
Validation accuracy: 61.1%
Minibatch loss at step 200: 0.864938
Minibatch accuracy: 75.0%
Validation accuracy: 76.2%
Minibatch loss at step 300: 0.324942
Minibatch accuracy: 93.8%
Validation accuracy: 78.9%
Minibatch loss at step 400: 0.316904
Minibatch accuracy: 100.0%
Validation accuracy: 79.7%
Minibatch loss at step 500: 0.749492
Minibatch accuracy: 87.5%
Validation accuracy: 80.2%
Minibatch loss at step 600: 0.274475
Minibatch accuracy: 100.0%
Validation accuracy: 81.6%
Minibatch loss at step 700: 0.842918
Minibatch accuracy: 75.0%
Validation accuracy: 81.8%
Minibatch loss at step 800: 0.658106
Minibatch accuracy: 81.2%
Validation accuracy: 82.4%
Minibatch loss at step 900: 0.644018
Minibatch accuracy: 81.2%
Validation accuracy: 83.1%
Minibatch loss at step 1000: 0.190832
Minibatch accuracy: 100.0%
Validation accuracy: 82

---
Problem 1
---------

The convolutional model above uses convolutions with stride 2 to reduce the dimensionality. Replace the strides by a max pooling operation (`nn.max_pool()`) of stride 2 and kernel size 2.

---

In [8]:
batch_size = 16
patch_size = 5
depth = 16
num_hidden = 64

graph = tf.Graph()

tf.set_random_seed(1)

with graph.as_default():

  # Input data.
  tf_train_dataset = tf.placeholder(
    tf.float32, shape=(batch_size, image_size, image_size, num_channels))
  tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
  tf_valid_dataset = tf.constant(valid_dataset)
  tf_test_dataset = tf.constant(test_dataset)
  
  # Variables.
  layer1_weights = tf.Variable(tf.truncated_normal(
      [patch_size, patch_size, num_channels, depth], stddev=0.1))
  layer1_biases = tf.Variable(tf.zeros([depth]))
  layer2_weights = tf.Variable(tf.truncated_normal(
      [patch_size, patch_size, depth, depth], stddev=0.1))
  layer2_biases = tf.Variable(tf.constant(1.0, shape=[depth]))
  layer3_weights = tf.Variable(tf.truncated_normal(
      [image_size // 4 * image_size // 4 * depth, num_hidden], stddev=0.1))
  layer3_biases = tf.Variable(tf.constant(1.0, shape=[num_hidden]))
  layer4_weights = tf.Variable(tf.truncated_normal(
      [num_hidden, num_labels], stddev=0.1))
  layer4_biases = tf.Variable(tf.constant(1.0, shape=[num_labels]))
  
  # Model.
  def model(data):
    conv = tf.nn.conv2d(data, layer1_weights, [1, 1, 1, 1], padding='SAME')
    hidden = tf.nn.relu(conv + layer1_biases)
    pool = tf.nn.max_pool(hidden, [1, 2, 2, 1], [1, 2, 2, 1], padding='SAME')
    conv = tf.nn.conv2d(pool, layer2_weights, [1, 1, 1, 1], padding='SAME')
    hidden = tf.nn.relu(conv + layer2_biases)
    pool = tf.nn.max_pool(hidden, [1, 2, 2, 1], [1, 2, 2, 1], padding='SAME')
    shape = pool.get_shape().as_list()
    reshape = tf.reshape(pool, [shape[0], shape[1] * shape[2] * shape[3]])
    hidden = tf.nn.relu(tf.matmul(reshape, layer3_weights) + layer3_biases)
    return tf.matmul(hidden, layer4_weights) + layer4_biases
  
  # Training computation.
  logits = model(tf_train_dataset)
  loss = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(labels=tf_train_labels, logits=logits))
    
  # Optimizer.
  optimizer = tf.train.GradientDescentOptimizer(0.05).minimize(loss)
  
  # Predictions for the training, validation, and test data.
  train_prediction = tf.nn.softmax(logits)
  valid_prediction = tf.nn.softmax(model(tf_valid_dataset))
  test_prediction = tf.nn.softmax(model(tf_test_dataset))

In [9]:
run(1001)

Initialized
Minibatch loss at step 0: 3.462645
Minibatch accuracy: 6.2%
Validation accuracy: 9.9%
Minibatch loss at step 100: 1.053992
Minibatch accuracy: 56.2%
Validation accuracy: 54.6%
Minibatch loss at step 200: 0.989421
Minibatch accuracy: 68.8%
Validation accuracy: 77.4%
Minibatch loss at step 300: 0.466559
Minibatch accuracy: 87.5%
Validation accuracy: 79.2%
Minibatch loss at step 400: 0.334299
Minibatch accuracy: 93.8%
Validation accuracy: 80.2%
Minibatch loss at step 500: 0.697828
Minibatch accuracy: 87.5%
Validation accuracy: 81.2%
Minibatch loss at step 600: 0.345500
Minibatch accuracy: 87.5%
Validation accuracy: 82.2%
Minibatch loss at step 700: 0.986054
Minibatch accuracy: 68.8%
Validation accuracy: 82.5%
Minibatch loss at step 800: 0.524793
Minibatch accuracy: 81.2%
Validation accuracy: 83.8%
Minibatch loss at step 900: 0.577793
Minibatch accuracy: 87.5%
Validation accuracy: 83.8%
Minibatch loss at step 1000: 0.364546
Minibatch accuracy: 87.5%
Validation accuracy: 84.3%
T

---
Problem 2
---------

Try to get the best performance you can using a convolutional net. Look for example at the classic [LeNet5](http://yann.lecun.com/exdb/lenet/) architecture, adding Dropout, and/or adding learning rate decay.

---

In [22]:
batch_size = 128
patch_size = 5
depth = 50
num_hidden = 64

graph = tf.Graph()

tf.set_random_seed(1)

with graph.as_default():

  # Input data.
  tf_train_dataset = tf.placeholder(
    tf.float32, shape=(batch_size, image_size, image_size, num_channels))
  tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
  tf_valid_dataset = tf.constant(valid_dataset)
  tf_test_dataset = tf.constant(test_dataset)
  
  # Variables.
  layer1_weights = tf.Variable(tf.truncated_normal(
      [patch_size, patch_size, num_channels, depth], stddev=0.1))
  layer1_biases = tf.Variable(tf.zeros([depth]))
  layer2_weights = tf.Variable(tf.truncated_normal(
      [patch_size, patch_size, depth, depth], stddev=0.1))
  layer2_biases = tf.Variable(tf.constant(1.0, shape=[depth]))
  layer3_weights = tf.Variable(tf.truncated_normal(
      [image_size // 4 * image_size // 4 * depth, num_hidden], stddev=0.1))
  layer3_biases = tf.Variable(tf.constant(1.0, shape=[num_hidden]))
  layer4_weights = tf.Variable(tf.truncated_normal(
      [num_hidden, num_labels], stddev=0.1))
  layer4_biases = tf.Variable(tf.constant(1.0, shape=[num_labels]))
  
  # Model.
  def model(data, keep_prob=1):
    conv = tf.nn.conv2d(data, layer1_weights, [1, 1, 1, 1], padding='SAME')
    hidden = tf.nn.relu(conv + layer1_biases)
    pool = tf.nn.max_pool(hidden, [1, 2, 2, 1], [1, 2, 2, 1], padding='SAME')
    dropout = tf.nn.dropout(pool, keep_prob=keep_prob)
    conv = tf.nn.conv2d(dropout, layer2_weights, [1, 1, 1, 1], padding='SAME')
    hidden = tf.nn.relu(conv + layer2_biases)
    pool = tf.nn.max_pool(hidden, [1, 2, 2, 1], [1, 2, 2, 1], padding='SAME')
    dropout = tf.nn.dropout(pool, keep_prob=keep_prob)
    shape = dropout.get_shape().as_list()
    reshape = tf.reshape(dropout, [shape[0], shape[1] * shape[2] * shape[3]])
    hidden = tf.nn.relu(tf.matmul(reshape, layer3_weights) + layer3_biases)
    return tf.matmul(hidden, layer4_weights) + layer4_biases
  
  # Training computation.
  logits = model(tf_train_dataset, .9)
  loss = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(labels=tf_train_labels, logits=logits))
    
  # Optimizer.
  optimizer = tf.train.GradientDescentOptimizer(0.01).minimize(loss)
  
  # Predictions for the training, validation, and test data.
  train_prediction = tf.nn.softmax(logits)
  valid_prediction = tf.nn.softmax(model(tf_valid_dataset))
  test_prediction = tf.nn.softmax(model(tf_test_dataset))

In [23]:
run(10001)

Initialized
Minibatch loss at step 0: 3.895007
Minibatch accuracy: 5.5%
Validation accuracy: 13.7%
Minibatch loss at step 1000: 0.826724
Minibatch accuracy: 77.3%
Validation accuracy: 82.4%
Minibatch loss at step 2000: 0.472765
Minibatch accuracy: 87.5%
Validation accuracy: 84.3%
Minibatch loss at step 3000: 0.682720
Minibatch accuracy: 80.5%
Validation accuracy: 85.2%
Minibatch loss at step 4000: 0.474535
Minibatch accuracy: 85.2%
Validation accuracy: 85.6%
Minibatch loss at step 5000: 0.463517
Minibatch accuracy: 88.3%
Validation accuracy: 86.2%
Minibatch loss at step 6000: 0.574828
Minibatch accuracy: 84.4%
Validation accuracy: 86.6%
Minibatch loss at step 7000: 0.480592
Minibatch accuracy: 85.9%
Validation accuracy: 87.2%
Minibatch loss at step 8000: 0.609354
Minibatch accuracy: 78.9%
Validation accuracy: 87.3%
Minibatch loss at step 9000: 0.481874
Minibatch accuracy: 86.7%
Validation accuracy: 87.6%
Minibatch loss at step 10000: 0.425845
Minibatch accuracy: 86.7%
Validation accura

In [12]:
from keras.models import Sequential
from keras.layers.convolutional import Convolution2D
from keras.layers.convolutional import MaxPooling2D
from keras.layers.core import Activation
from keras.layers.core import Flatten
from keras.layers.core import Dense
from keras.optimizers import SGD

def build(width, height, depth, classes):
    model = Sequential()

    # first set of CONV => RELU => POOL
    model.add(Convolution2D(20, 5, 5, border_mode="same",
        input_shape=(width, height, depth)))
    model.add(Activation("relu"))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))

    # second set of CONV => RELU => POOL
    model.add(Convolution2D(50, 5, 5, border_mode="same"))
    model.add(Activation("relu"))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    
    # set of FC => RELU layers
    model.add(Flatten())
    model.add(Dense(500))
    model.add(Activation("relu"))

    # softmax classifier
    model.add(Dense(classes))
    model.add(Activation("softmax"))
    
    return model

def run_keras():
    opt = SGD(lr=0.01)
    model = build(width=image_size, height=image_size, depth=1, classes=num_labels)
    model.compile(loss="categorical_crossentropy", optimizer=opt,
        metrics=["accuracy"])
 
    print("[INFO] training...")
    train_dataset_keras = train_dataset[:, np.newaxis, :, :]
    test_dataset_keras = test_dataset[:, np.newaxis, :, :]
    model.fit(train_dataset, train_labels, batch_size=128, nb_epoch=20,
        verbose=2)

    # show the accuracy on the testing set
    print("[INFO] evaluating...")
    (loss, accuracy) = model.evaluate(test_dataset, test_labels,
        batch_size=128, verbose=0)
    print("[INFO] accuracy: {:.2f}%".format(accuracy * 100))

    

Using TensorFlow backend.


In [13]:
run_keras()

[INFO] training...
Epoch 1/20
20s - loss: 0.8015 - acc: 0.7825
Epoch 2/20
20s - loss: 0.4611 - acc: 0.8676
Epoch 3/20
20s - loss: 0.4018 - acc: 0.8843
Epoch 4/20
20s - loss: 0.3705 - acc: 0.8928
Epoch 5/20
20s - loss: 0.3491 - acc: 0.8983
Epoch 6/20
20s - loss: 0.3321 - acc: 0.9034
Epoch 7/20
20s - loss: 0.3182 - acc: 0.9070
Epoch 8/20
20s - loss: 0.3062 - acc: 0.9101
Epoch 9/20
20s - loss: 0.2956 - acc: 0.9129
Epoch 10/20
20s - loss: 0.2866 - acc: 0.9157
Epoch 11/20
20s - loss: 0.2771 - acc: 0.9179
Epoch 12/20
20s - loss: 0.2694 - acc: 0.9203
Epoch 13/20
20s - loss: 0.2616 - acc: 0.9229
Epoch 14/20
20s - loss: 0.2542 - acc: 0.9247
Epoch 15/20
20s - loss: 0.2474 - acc: 0.9267
Epoch 16/20
20s - loss: 0.2410 - acc: 0.9282
Epoch 17/20
20s - loss: 0.2345 - acc: 0.9303
Epoch 18/20
20s - loss: 0.2280 - acc: 0.9320
Epoch 19/20
20s - loss: 0.2224 - acc: 0.9334
Epoch 20/20
20s - loss: 0.2162 - acc: 0.9350
[INFO] evaluating...
[INFO] accuracy: 96.21%
