# Udacity Deep Learning Project - Digit Recognition - Part 2

Author: Lei Mao <br>
Late revised on: 12/23/2016 <br>

## Convolutional Neural Network (CNN) for multiMNIST Dataset

### Import libraries required

In [1]:
# These are all the modules we'll be using later. Make sure you can import them
# before proceeding further.
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import os
from six.moves import cPickle as pickle

# Config the matplotlib backend as plotting inline in IPython
%matplotlib inline

In [2]:
%autosave 0

Autosave disabled


### Load multiMNIST dataset

In [3]:
# Read data from multiMNIST.pickle
multiMNIST_folder = 'data/multiMNIST/'
pickle_file = 'multiMNIST_continuous_80000.pickle'

with open(multiMNIST_folder + pickle_file, 'rb') as f:
    print('Loading multiMNIST data ...')
    print('This may consume a lot of memories.')
    save = pickle.load(f)
    multiMNIST_test_dataset = save['test_dataset']
    multiMNIST_test_labels = save['test_labels']
    del save  # hint to help gc free up memory
    print('multiMNIST test set', multiMNIST_test_dataset.shape, multiMNIST_test_labels.shape)

Loading multiMNIST data ...
This may consume a lot of memories.
('multiMNIST test set', (8000, 28, 140), (8000, 5))


Reformat into a TensorFlow-friendly shape:
* convolutions need the image data formatted as a cube (width by height by #channels)
* labels as float 1-hot encodings.

In [4]:
length_limit = 5
num_length_class = length_limit + 2 # 0, 1, 2, 3, 4, 5, more than 5
num_digit_class = 11 # 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 (blank)
num_image_rows = 28
num_image_columns = 28 * 5
num_channels = 1 # Gray scale

def onehot_encoding(array, num_class):
    '''Turn a numerical numpy array to one-hot encoded matrix'''
    onehot_encoded = np.zeros(shape=(len(array), num_class), dtype=np.int)
    for i in xrange(len(array)):
        onehot_encoded[i][array[i]] = 1
    return onehot_encoded

def labels_reformat(dataset, labels):
    '''Reformat dataset to [num_images, num_rows, num_columns, num_channels]'''
    '''Reformat labels to one-hot encoded matrix list'''
    dataset = dataset.reshape((-1, num_image_rows, num_image_columns, num_channels))
    labelset = list()
    length = np.sum(labels != 10, axis = 1)
    length_onehot = onehot_encoding(array = length, num_class = num_length_class)
    labelset.append(length_onehot)
    for i in xrange(labels.shape[1]):
        digit_onehot = onehot_encoding(array = labels[:, i], num_class = num_digit_class)
        labelset.append(digit_onehot)
    return dataset, labelset

In [5]:
test_dataset, test_labelset = labels_reformat(multiMNIST_test_dataset, multiMNIST_test_labels)

In [6]:
def probabilities_to_label(probabilities):
    """Turn a 1-hot encoding or a probability distribution over the possible
    characters back into its (most likely) character representation."""
    return np.array([c for c in np.argmax(probabilities, 1)])
def accuracy(prediction_digits, labels):
    digits_predicted = list()
    for i in xrange(len(prediction_digits)):
        digits_predicted.append(probabilities_to_label(prediction_digits[i]))
    labels_predicted = np.array(digits_predicted).T
    num_correct = 0
    for i in xrange(len(labels)):
        num_correct += np.array_equal(labels_predicted[i], labels[i])
    accuracy = float(num_correct)/len(labels)
    return accuracy

### Deep Convolutional Neural Network using Tensorflow

2 layers of CNN

In [7]:
batch_size = 64
patch_size = 6
depth = 16
num_hidden = 128

graph = tf.Graph()

with graph.as_default():
    
    # Input data
    tf_test_dataset = tf.constant(test_dataset, dtype=tf.float32)

    # Variables
    cnn_1_weights = tf.Variable(tf.truncated_normal([patch_size, patch_size, num_channels, depth], stddev=0.1), 
                                name = 'CNN1_W')
    cnn_1_biases = tf.Variable(tf.zeros([depth]), name = 'CNN1_B')
    
    cnn_2_weights = tf.Variable(tf.truncated_normal([patch_size, patch_size, depth, depth], stddev=0.1), 
                                name = 'CNN2_W')
    cnn_2_biases = tf.Variable(tf.constant(1.0, shape=[depth]), name = 'CNN2_B')
    
    cnn_3_weights = tf.Variable(tf.truncated_normal(
            [num_image_rows // 4 * num_image_columns // 4 * depth, num_hidden], stddev=0.1), name = 'CNN3_W')
    cnn_3_biases = tf.Variable(tf.constant(1.0, shape=[num_hidden]), name = 'CNN3_B')
    
    logistic_weights = list()
    logistic_biases = list()
    logistic_weights.append(tf.Variable(tf.truncated_normal([num_hidden, num_length_class], stddev=0.1), name = 'H0_W'))
    logistic_biases.append(tf.Variable(tf.zeros([num_length_class]), name = 'H0_B'))
    
    for i in xrange(length_limit):
        logistic_weights.append(tf.Variable(
                tf.truncated_normal([num_hidden, num_digit_class], stddev=0.1), name = 'H' + str(i + 1) + '_W'))
        logistic_biases.append(tf.Variable(tf.constant(1.0, shape=[num_digit_class]), name = 'H' + str(i + 1) + '_B'))

    # Model
    def model(data):
        
        conv = tf.nn.conv2d(data, cnn_1_weights, [1, 2, 2, 1], padding = 'SAME')
        hidden = tf.nn.relu(conv + cnn_1_biases)
        conv = tf.nn.conv2d(hidden, cnn_2_weights, [1, 2, 2, 1], padding = 'SAME')
        hidden = tf.nn.relu(conv + cnn_2_biases)
        shape = hidden.get_shape().as_list()
        reshape = tf.reshape(hidden, [shape[0], shape[1] * shape[2] * shape[3]])
        hidden = tf.nn.relu(tf.matmul(reshape, cnn_3_weights) + cnn_3_biases)
        
        
        # Hints: tf.pack()?
        logits = list()
        for i in xrange(length_limit + 1):
            logits.append(tf.matmul(hidden, logistic_weights[i]) + logistic_biases[i])
        
        return logits

    # Predict the length of sequence in test set
    test_logits = model(tf_test_dataset)
    test_prediction_length = tf.nn.softmax(test_logits[0])
    
    # Predict the digits of sequence in test set
    test_prediction_digits = list()
    for i in xrange(length_limit):
        test_prediction_digits.append(tf.nn.softmax(test_logits[i + 1]))
    test_prediction_digits = tf.pack(test_prediction_digits)
    
    # Prediction of single test data
    # Input data
    tf_test_single = tf.placeholder(tf.float32, shape=(1, num_image_rows, num_image_columns, num_channels))
    # Predict the length of sequence in single test data
    test_logits_single = model(tf_test_single)
    test_prediction_length_single = tf.nn.softmax(test_logits_single[0])
    # Predict the digits of sequence in single test data
    test_prediction_digits_single = list()
    for i in xrange(length_limit):
        test_prediction_digits_single.append(tf.nn.softmax(test_logits_single[i + 1]))
    test_prediction_digits_single = tf.pack(test_prediction_digits_single)   
    
    saver = tf.train.Saver()

In [8]:
model_path = 'model/'
with tf.Session(graph=graph) as session:
    tf.global_variables_initializer().run()
    print('Initialized')
    saver.restore(session, model_path + 'CNN_multiMNIST')
    print('Test accuracy: %.1f%%' 
          %(accuracy(prediction_digits = test_prediction_digits.eval(), labels = multiMNIST_test_labels) * 100))


Initialized
Test accuracy: 86.3%
