In [5]:
import numpy as np
import tensorflow as tf

from mnist import MNIST
import matplotlib.pyplot as plt

# Data Preprocessing

## Functions

In [99]:
def randomize(dataset, labels):
    permutation = np.random.permutation(labels.shape[0])
    shuffled_dataset = dataset[permutation, :, :]
    shuffled_labels = labels[permutation]
    return shuffled_dataset, shuffled_labels

def one_hot_encode(np_array):
    return (np.arange(10) == np_array[:,None]).astype(np.float32)

def reformat_data(dataset, labels, image_width, image_height, image_depth):
    np_dataset_ = np.array([np.array(image_data).reshape(image_width, image_height, image_depth) for image_data in dataset])
    np_labels_ = one_hot_encode(np.array(labels, dtype=np.float32))
    np_dataset, np_labels = randomize(np_dataset_, np_labels_)
    return np_dataset, np_labels

def flatten_array(array):
    shape = array.shape
    return array.reshape([shape[0], shape[1] * shape[2] * shape[3]])

def accuracy(predictions, labels):
    return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1)) / predictions.shape[0])

## MNIST Data

In [6]:
mnist_folder = './data/mnist/'
mnist_image_width = 28
mnist_image_height = 28
mnist_image_depth = 1
mnist_num_labels = 10

mndata = MNIST(mnist_folder)

# mnist_train_dataset_: list of len 60000; eachelement list of 784 len; mnist_train_labels_: list of len 60000; eachelement one int
mnist_train_dataset_, mnist_train_labels_ = mndata.load_training()

# mnist_test_dataset_: list of len 10000; eachelement list of 784 len; mnist_test_labels_: list of len 10000; eachelement one int
mnist_test_dataset_, mnist_test_labels_ = mndata.load_testing()

In [43]:
# mnist_train_dataset: (60000, 28, 28, 1); mnist_train_labels: (60000, 10)
mnist_train_dataset, mnist_train_labels = reformat_data(mnist_train_dataset_, mnist_train_labels_, mnist_image_width, mnist_image_height, mnist_image_depth)

# mnist_test_dataset: (10000, 28, 28, 1); mnist_test_labels: (10000, 10)
mnist_test_dataset, mnist_test_labels = reformat_data(mnist_test_dataset_, mnist_test_labels_, mnist_image_width, mnist_image_height, mnist_image_depth)

In [172]:
train_x_ = flatten_array(mnist_train_dataset) # (60000, 784)
train_y_ = mnist_train_labels # (60000, 10)
test_x_ = flatten_array(mnist_test_dataset) # (10000, 784)
test_y = mnist_test_labels # (10000, 10)

In [166]:
print(type(train_x_[0,0]))
print(type(test_x[0,0]))

<class 'numpy.int32'>
<class 'numpy.int32'>


# MLP Model

In [46]:
learning_rate = 0.1 # ??????????????????????
batch_size = 400 # 60000/400 = 150 batches
num_epochs = 50
n_input = mnist_image_width * mnist_image_height * mnist_image_depth # num of features: 784
n_classes = 10

## Define training inputs as placeholders, and weights and biases as variables

In [173]:
X = tf.placeholder("float", [None, n_input]) # for each batch: (400, 784)
Y = tf.placeholder("float", [None, n_classes]) # for each batch: (400, 10)
test_x = tf.constant(test_x_, tf.float32)

# Weights and biases
n_hidden_1 = 10  # number of neurons in hidden layers
weights = {
	'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])), # 1st hidden layer: (784, 10)
	'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_1])), # 2nd hidden layer: (10, 10)
	'h_out': tf.Variable(tf.random_normal([n_hidden_1, n_classes])) # output layer: (10, 10)
}
biases = {
	'b1': tf.Variable(tf.random_normal([n_hidden_1])), # bias for the 1st hidden layer: (10,)
	'b2': tf.Variable(tf.random_normal([n_hidden_1])), # bias for the 2nd hidden layer: (10,)
	'b_out': tf.Variable(tf.random_normal([n_classes])) # bias for the output layer: (10,)
}

## Define layers

In [71]:
def multilayer_perceptron(x):
    layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
    layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
    out_layer = tf.nn.sigmoid(tf.matmul(layer_2, weights['h_out']) + biases['b_out'])
    return out_layer

## Operations

In [175]:
logits = multilayer_perceptron(X) # (400, 10)

loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)

train_prediction = tf.nn.softmax(logits)
test_prediction = tf.nn.softmax(multilayer_perceptron(test_x))

init = tf.global_variables_initializer()

## Run the session

In [177]:
with tf.Session() as sess:
    sess.run(init)
    
    total_batch_train = int(mnist_train_dataset.shape[0] / batch_size) # 150: number of batches for train dataset
    total_batch_test = int(mnist_test_dataset.shape[0] / batch_size) # 25: number of batches for test dataset
    
    for iter_num in range(num_epochs):
        avg_train_acc = 0.
        for i in range(total_batch_train):
            train_x = train_x_[(i) * batch_size: (i + 1) * batch_size, ...] # (400, 784)
            train_y = train_y_[(i) * batch_size: (i + 1) * batch_size, :] # (400, 10)
            
            feed_dict = {X: train_x, Y: train_y}
            _, l, prediction = sess.run([optimizer, loss, train_prediction], feed_dict=feed_dict)
            
            train_accuracy = accuracy(prediction, train_y)
            avg_train_acc += train_accuracy
            
        test_accuracy = accuracy(test_prediction.eval(), test_y)
        
        # print the train and test accuracies   
        print("Train acc: %f, Test_acc: %f" % (avg_train_acc/total_batch_train, test_accuracy))

Train acc: 11.263333, Test_acc: 11.350000
Train acc: 11.236667, Test_acc: 11.350000
Train acc: 11.236667, Test_acc: 11.350000
Train acc: 11.236667, Test_acc: 11.350000
Train acc: 11.236667, Test_acc: 11.350000
Train acc: 11.236667, Test_acc: 11.350000
Train acc: 11.236667, Test_acc: 11.350000
Train acc: 11.236667, Test_acc: 11.350000
Train acc: 11.236667, Test_acc: 11.350000
Train acc: 11.236667, Test_acc: 11.350000
Train acc: 11.236667, Test_acc: 11.350000
Train acc: 11.236667, Test_acc: 11.350000
Train acc: 11.236667, Test_acc: 11.350000
Train acc: 11.236667, Test_acc: 11.350000
Train acc: 11.236667, Test_acc: 11.350000
Train acc: 11.236667, Test_acc: 11.350000
Train acc: 11.236667, Test_acc: 11.350000
Train acc: 11.236667, Test_acc: 11.350000
Train acc: 11.236667, Test_acc: 11.350000
Train acc: 11.236667, Test_acc: 11.350000
Train acc: 11.236667, Test_acc: 11.350000
Train acc: 11.236667, Test_acc: 11.350000
Train acc: 11.236667, Test_acc: 11.350000
Train acc: 11.236667, Test_acc: 11

KeyboardInterrupt: 

In [None]:
!jupyter nbconvert --output-dir='../html-output' MLP.ipynb --to html --output MLP.html