In [2]:
from __future__ import print_function
import numpy as np
import tensorflow as tf
from six.moves import cPickle as pickle

pickle_file = 'notMNIST.pickle'
image_size = 28
num_labels = 10
batch_size = 128
num_steps = 3001
hidden_units = 1024

def reformat(dataset, labels):
    #unroll each image in the dataset to create the input vector 
    dataset = dataset.reshape((-1, image_size * image_size)).astype(np.float32)
    #one-hot encoding for the labels
    # Map 2 to [0.0, 1.0, 0.0 ...], 3 to [0.0, 0.0, 1.0 ...]
    labels = (np.arange(num_labels) == labels[:,None]).astype(np.float32)
    return dataset, labels

def accuracy(predictions, labels):
    return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
          / predictions.shape[0])
          
#First load the data previously pre-processed.
with open(pickle_file, 'rb') as f:
    save = pickle.load(f)
    train_dataset = save['train_dataset']
    train_labels = save['train_labels']
    valid_dataset = save['valid_dataset']
    valid_labels = save['valid_labels']
    test_dataset = save['test_dataset']
    test_labels = save['test_labels']
    del save  # hint to help gc free up memory
    print('Training set', train_dataset.shape, train_labels.shape)
    print('Validation set', valid_dataset.shape, valid_labels.shape)
    print('Test set', test_dataset.shape, test_labels.shape)
    
    # Reformat the data - expand the images to a flat format  
    train_dataset, train_labels = reformat(train_dataset, train_labels)
    valid_dataset, valid_labels = reformat(valid_dataset, valid_labels)
    test_dataset, test_labels = reformat(test_dataset, test_labels)
    print('Training set', train_dataset.shape, train_labels.shape)
    print('Validation set', valid_dataset.shape, valid_labels.shape)
    print('Test set', test_dataset.shape, test_labels.shape)

Training set (200000, 28, 28) (200000,)
Validation set (10000, 28, 28) (10000,)
Test set (10000, 28, 28) (10000,)
Training set (200000, 784) (200000, 10)
Validation set (10000, 784) (10000, 10)
Test set (10000, 784) (10000, 10)


In [5]:
graph = tf.Graph()
with graph.as_default():

    # Input data. For the training data, we use a placeholder that will be fed
    # at run time with a training minibatch.
    tf_train_dataset = tf.placeholder(tf.float32,shape=(batch_size, image_size * image_size))
    tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
    tf_valid_dataset = tf.constant(valid_dataset)
    tf_test_dataset = tf.constant(test_dataset)
    
    # Variables.
    # Input to Hidden Layer: 
    # W1 784 x 1024
    W1 = tf.Variable(tf.truncated_normal([image_size * image_size, hidden_units]))
    # b1 1 x 1024
    b1 = tf.Variable(tf.zeros([hidden_units]))
    # h1 = RELU(XW + b)   ---> 128 x 1024
    h1 = tf.placeholder(tf.float32, shape=(batch_size, hidden_units))

    # Hidden Layer to output
    # W2 1024 x 10
    W2 = tf.Variable(tf.truncated_normal([hidden_units, num_labels]))
    # b2 1 x 10
    b2 = tf.Variable(tf.zeros([num_labels]))
    
    
    # Training computation.
    # NN layer one results in h1 - activation outputs for layer 1
    h1 = tf.nn.relu(tf.matmul(tf_train_dataset, W1) + b1 )
    
    # Softmax layer 
    scores = tf.matmul(h1, W2) + b2 
    # Setup the loss as something we want to monitor - this is the cost J
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(scores, tf_train_labels))
    
    # Optimizer - setup gradient descent with learning rate of 0.5 and minimise the cost J
    optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss)

    # Predictions for the training, validation, and test data.
    train_prediction = tf.nn.softmax(scores)

    valid_prediction_hidden = tf.nn.relu(tf.matmul(tf_valid_dataset, W1) + b1)
    valid_prediction = tf.nn.softmax(tf.matmul(valid_prediction_hidden, W2) + b2)
    
    test_prediction_hidden = tf.nn.relu(tf.matmul(tf_test_dataset, W1) + b1)
    test_prediction = tf.nn.softmax(tf.matmul(test_prediction_hidden, W2) + b2)


In [6]:
# Now run the sgd graph prepared
with tf.Session(graph=graph) as session:
    tf.initialize_all_variables().run()
    print("Initialized - SGD")
    for step in range(num_steps):
        # Pick an offset within the training data, which has been randomized.
        # Note: we could use better randomization across epochs.
        offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
        
        # Generate a minibatch.
        batch_data = train_dataset[offset:(offset + batch_size), :]
        batch_labels = train_labels[offset:(offset + batch_size), :]
        
        # Prepare a dictionary telling the session where to feed the minibatch.
        # The key of the dictionary is the placeholder node of the graph to be fed,
        # and the value is the numpy array to feed to it.
        feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}

        _, l, predictions = session.run([optimizer, loss, train_prediction], feed_dict=feed_dict)

        if (step % 500 == 0):
            print("Minibatch loss at step %d: %f" % (step, l))
            print("Minibatch accuracy: %.1f%%" % accuracy(predictions, batch_labels))
            print("Validation accuracy: %.1f%%" % accuracy(valid_prediction.eval(), valid_labels))
             
    print("Test accuracy: %.1f%%" % accuracy(test_prediction.eval(), test_labels))


Initialized - SGD
Minibatch loss at step 0: 421.866180
Minibatch accuracy: 5.5%
Validation accuracy: 27.4%
Minibatch loss at step 500: 16.003557
Minibatch accuracy: 77.3%
Validation accuracy: 78.7%
Minibatch loss at step 1000: 13.119270
Minibatch accuracy: 79.7%
Validation accuracy: 81.6%
Minibatch loss at step 1500: 4.984738
Minibatch accuracy: 88.3%
Validation accuracy: 80.8%
Minibatch loss at step 2000: 3.938705
Minibatch accuracy: 82.8%
Validation accuracy: 81.2%
Minibatch loss at step 2500: 2.048643
Minibatch accuracy: 86.7%
Validation accuracy: 82.1%
Minibatch loss at step 3000: 1.599501
Minibatch accuracy: 82.8%
Validation accuracy: 82.7%
Test accuracy: 89.2%
