In [1]:
import collections
import numpy as np
import tensorflow as tf
import pickle

In [2]:
pickle_file = 'notMNIST.pickle'

with open(pickle_file, 'rb') as f:
    save = pickle.load(f)
    train_dataset = save['train_dataset']
    train_labels = save['train_labels']
    valid_dataset = save['valid_dataset']
    valid_labels = save['valid_labels']
    test_dataset = save['test_dataset']
    test_labels = save['test_labels']
    del save  
    print('Training set', train_dataset.shape, train_labels.shape)
    print('Validation set', valid_dataset.shape, valid_labels.shape)
    print('Test set', test_dataset.shape, test_labels.shape)

Training set (200000, 28, 28) (200000,)
Validation set (10000, 28, 28) (10000,)
Test set (10000, 28, 28) (10000,)


In [3]:
image_size = 28
num_labels = 10
num_channels = 1 # grayscale

def reformat(dataset, labels):
    dataset = dataset.reshape(-1,image_size, image_size, num_channels)
    labels = (labels.reshape(-1,1) == np.arange(10)).astype(int)
    return dataset, labels

train_dataset, train_labels = reformat(train_dataset, train_labels)
valid_dataset, valid_labels = reformat(valid_dataset, valid_labels)
test_dataset, test_labels = reformat(test_dataset, test_labels)
print('Training set', train_dataset.shape, train_labels.shape)
print('Validation set', valid_dataset.shape, valid_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)

Training set (200000, 28, 28, 1) (200000, 10)
Validation set (10000, 28, 28, 1) (10000, 10)
Test set (10000, 28, 28, 1) (10000, 10)


In [4]:
def accuracy(predictions, labels):
    return (100.0 * np.sum(np.argmax(predictions, axis=1) == np.argmax(labels, axis=1))
            / predictions.shape[0])

## Prepare the graph

In [5]:
def define_input(graph, batch_size):
    Dataset = collections.namedtuple('Dataset', 'X_train y_train X_validation y_validation X_test y_test')
    with graph.as_default():
        X_train = tf.placeholder(shape=(batch_size, image_size, 
                                        image_size, num_channels), dtype=tf.float32, name='X_train')
        y_train = tf.placeholder(shape=(batch_size, num_labels), dtype=tf.float32, name='y_train')
    
        X_validation = tf.constant(valid_dataset, dtype=tf.float32, name='X_validation')
        y_validation = tf.constant(valid_labels, dtype=tf.float32, name='y_validation')
    
        X_test = tf.constant(test_dataset, dtype=tf.float32, name='X_test')
        y_test = tf.constant(test_labels, dtype=tf.float32, name='y_test')
    return Dataset(X_train, y_train, X_validation, y_validation, X_test, y_validation)

def get_weights_biases(graph, shape, name):
    with graph.as_default():

        return tf.Variable(tf.truncated_normal(tf.TensorShape(shape), stddev=0.1), name='weights_' + name), \
               tf.Variable(tf.zeros(shape[-1]), name=name)



Computes a 2-D convolution given 4-D input and filter tensors.

Given an input tensor of shape [batch, in_height, in_width, in_channels] and a filter / kernel tensor of shape [filter_height, filter_width, in_channels, out_channels], this op performs the following:

Flattens the filter to a 2-D matrix with shape [filter_height * filter_width * in_channels, output_channels].
Extracts image patches from the input tensor to form a virtual tensor of shape [batch, out_height, out_width, filter_height * filter_width * in_channels].
For each patch, right-multiplies the filter matrix and the image patch vector.

In [6]:
batch_size = 150
#batch_size = 16
patch_size = 5
depth = 16
num_hidden = 64

graph = tf.Graph()
data = define_input(graph, batch_size)
with graph.as_default():
    
    #Filter -> [filter_height filter_width in_channels, output_channels]
    #Layer 1
    layer1_weights, layer1_biases = get_weights_biases(graph, shape=(patch_size, patch_size, num_channels, depth), name='1')

    #Layer 2
    layer2_weights, layer2_biases = get_weights_biases(graph, shape=(patch_size, patch_size, depth, depth*2), name='2')

    #Layer 3
    layer3_weights, layer3_biases = get_weights_biases(graph,
        shape=(image_size // 4 * image_size // 4 * depth*2,num_hidden), name='3')

    #Layer 4
    layer4_weights, layer4_biases = get_weights_biases(graph,shape=(num_hidden, num_labels), name='4')
    
    def model_pool(data):
        conv1 = tf.nn.conv2d(data, layer1_weights, [1,1,1,1], padding='SAME')
        pool1 = tf.nn.max_pool(conv1, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
        hidden1 = tf.nn.relu(pool1 + layer1_biases)
        conv2 = tf.nn.conv2d(hidden1, layer2_weights, [1,1,1,1], padding='SAME')
        pool2 = tf.nn.max_pool(conv2, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
        hidden2 = tf.nn.relu(pool2 + layer2_biases)
        hidden2_shape = hidden2.get_shape().as_list()
        hidden2_reshape = tf.reshape(hidden2, [hidden2_shape[0], hidden2_shape[1] * hidden2_shape[2] * hidden2_shape[3]])
        hidden3 = tf.nn.relu(tf.matmul(hidden2_reshape, layer3_weights) + layer3_biases)
        return tf.matmul(hidden3, layer4_weights) + layer4_biases
    
    def model(data):
        conv1 = tf.nn.conv2d(data, layer1_weights, [1,2,2,1], padding='SAME')
        hidden1 = tf.nn.relu(conv1 + layer1_biases)
        conv2 = tf.nn.conv2d(hidden1, layer2_weights, [1,2,2,1], padding='SAME')
        hidden2 = tf.nn.relu(conv2 + layer2_biases)
        hidden2_shape = hidden2.get_shape().as_list()
        hidden2_reshape = tf.reshape(hidden2, [hidden2_shape[0], hidden2_shape[1] * hidden2_shape[2] * hidden2_shape[3]])
        hidden3 = tf.nn.relu(tf.matmul(hidden2_reshape, layer3_weights) + layer3_biases)
        return tf.matmul(hidden3, layer4_weights) + layer4_biases
    
    logits = model(data.X_train)
    
    l2_regularizer = tf.nn.l2_loss(layer1_weights) + \
                     tf.nn.l2_loss(layer2_weights) + \
                     tf.nn.l2_loss(layer3_weights) + \
                     tf.nn.l2_loss(layer4_weights)

    
    loss = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=data.y_train)
    ) + (0.001*l2_regularizer)
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.05).minimize(loss)
    
    train_predictions = tf.nn.softmax(logits)
    validation_predictions = model(data.X_validation)
    test_predictions = model(data.X_test)
    
    init = tf.global_variables_initializer()

## Train

In [7]:
epochs = 10001

with tf.Session(graph=graph) as sess:
    init.run()
    for epoch in range(epochs):
        subset = np.random.choice(train_dataset.shape[0], size=batch_size, replace=False)
        feed_dict = {data.X_train: train_dataset[subset], data.y_train: train_labels[subset]}
        _, l, minibatch_prediction = sess.run([optimizer, loss, train_predictions], feed_dict=feed_dict)
        if (epoch % 100 == 0):
            print('Epoch: {}. Loss: {:.2f}. MiniBatch Accuracy: {:.2f}. Validation Accuracy: {:.2f}'.format(
                epoch,
                l, 
                accuracy(minibatch_prediction, train_labels[subset]), 
                accuracy(validation_predictions.eval(), valid_labels)))
    print('Test Accuracy: {}'.format(accuracy(test_predictions.eval(), test_labels)))

Epoch: 0. Loss: 2.79. MiniBatch Accuracy: 9.33. Validation Accuracy: 9.55
Epoch: 100. Loss: 1.43. MiniBatch Accuracy: 73.33. Validation Accuracy: 78.03
Epoch: 200. Loss: 1.16. MiniBatch Accuracy: 80.67. Validation Accuracy: 80.83
Epoch: 300. Loss: 1.29. MiniBatch Accuracy: 74.67. Validation Accuracy: 81.90
Epoch: 400. Loss: 0.95. MiniBatch Accuracy: 83.33. Validation Accuracy: 82.64
Epoch: 500. Loss: 0.97. MiniBatch Accuracy: 85.33. Validation Accuracy: 83.09
Epoch: 600. Loss: 1.20. MiniBatch Accuracy: 76.00. Validation Accuracy: 83.30
Epoch: 700. Loss: 1.08. MiniBatch Accuracy: 81.33. Validation Accuracy: 84.32
Epoch: 800. Loss: 0.98. MiniBatch Accuracy: 84.00. Validation Accuracy: 84.75
Epoch: 900. Loss: 0.94. MiniBatch Accuracy: 84.67. Validation Accuracy: 84.91
Epoch: 1000. Loss: 0.88. MiniBatch Accuracy: 87.33. Validation Accuracy: 85.14
Epoch: 1100. Loss: 0.83. MiniBatch Accuracy: 87.33. Validation Accuracy: 85.39
Epoch: 1200. Loss: 0.81. MiniBatch Accuracy: 87.33. Validation Acc