In [1]:
%matplotlib inline

In [2]:
import os

import numpy as np
import sys

from ipwxlearn import glue
from ipwxlearn.datasets import mnist
from ipwxlearn.glue import G
from ipwxlearn.utils import dataflow, training

  "downsample module has been moved to the theano.tensor.signal.pool module.")


In [3]:
def load_data():
    cache_dir = os.path.abspath('../../data')
    train_X, train_y, test_X, test_y = mnist.read_data_sets(cache_dir=cache_dir, floatX=glue.config.floatX)

    # split train-test set.
    indices = np.arange(train_X.shape[0])
    np.random.shuffle(indices)
    valid_size = int(train_X.shape[0] * 0.1)
    train_idx, valid_idx = indices[:-valid_size], indices[-valid_size:]
    return (train_X[train_idx], train_y[train_idx]), (train_X[valid_idx], train_y[valid_idx]), \
           (test_X, test_y)

In [4]:
(train_X, train_y), (valid_X, valid_y), (test_X, test_y) = load_data()

In [5]:
# build the multilayer perceptron.
graph = G.Graph()
with graph.as_default():
    target_num = 10
    train_input_shape = (32, 784)
    test_input_shape = (None,) + train_input_shape[1:]

    train_input = G.make_placeholder('trainX', shape=train_input_shape, dtype=glue.config.floatX)
    train_label = G.make_placeholder('trainY', shape=train_input_shape[:1], dtype=np.int32)
    test_input = G.make_placeholder('testX', shape=test_input_shape, dtype=glue.config.floatX)
    test_label = G.make_placeholder('testY', shape=test_input_shape[:1], dtype=np.int32)

    # compose the network
    input = G.layers.InputLayer(train_input, shape=train_input_shape)
    dropout0 = G.layers.DropoutLayer('dropout0', input, p=0.2)
    hidden1 = G.layers.DenseLayer('hidden1', dropout0, num_units=128)
    dropout1 = G.layers.DropoutLayer('dropout1', hidden1, p=0.5)
    hidden2 = G.layers.DenseLayer('hidden2', dropout1, num_units=32)
    dropout2 = G.layers.DropoutLayer('dropout2', hidden1, p=0.5)
    softmax = G.layers.SoftmaxLayer('softmax', dropout2, num_units=target_num)

    # derivate the predictions and loss
    train_output = G.layers.get_output(softmax)
    train_loss = G.op.mean(G.objectives.sparse_categorical_crossentropy(train_output, train_label))

    test_output = G.layers.get_output(softmax, inputs={input: test_input}, deterministic=True)
    test_loss = G.op.sum(G.objectives.sparse_categorical_crossentropy(test_output, test_label))
    test_predict = G.op.argmax(test_output, axis=1)

    # Create update expressions for training.
    params = G.layers.get_all_params(softmax, trainable=True)
    updates = G.updates.adam(train_loss, params)

    train_fn = G.make_function(inputs=[train_input, train_label], outputs=train_loss, updates=updates)
    valid_fn = G.make_function(inputs=[test_input, test_label], outputs=test_loss)
    test_fn = G.make_function(inputs=[test_input], outputs=test_predict)

In [6]:
# train the MLP.
with G.Session(graph) as sess:
    monitors = training.ValidationMonitor(valid_fn, (valid_X, valid_y), params=params, log_file=sys.stdout)
    training.run_steps(train_fn, (train_X, train_y), monitor=monitors, max_steps=10 * len(train_X) // 32)

Step 200: train loss 0.665509, valid loss 0.450832 (*)
Step 400: train loss 0.385806, valid loss 0.333764 (*)
Step 600: train loss 0.755397, valid loss 0.294522 (*)
Step 800: train loss 0.337349, valid loss 0.269586 (*)
Step 1000: train loss 0.241877, valid loss 0.249626 (*)
Step 1200: train loss 0.424761, valid loss 0.228390 (*)
Step 1400: train loss 0.236672, valid loss 0.219577 (*)
Step 1600: train loss 0.203284, valid loss 0.205687 (*)
Step 1800: train loss 0.233868, valid loss 0.196585 (*)
Step 2000: train loss 0.296461, valid loss 0.189495 (*)
Step 2200: train loss 0.171812, valid loss 0.177086 (*)
Step 2400: train loss 0.221327, valid loss 0.174106 (*)
Step 2600: train loss 0.212054, valid loss 0.172257 (*)
Step 2800: train loss 0.223170, valid loss 0.160464 (*)
Step 3000: train loss 0.543407, valid loss 0.160148 (*)
Step 3200: train loss 0.138891, valid loss 0.164002
Step 3400: train loss 0.126966, valid loss 0.155478 (*)
Step 3600: train loss 0.334295, valid loss 0.149152 (*)


In [7]:
with G.Session(graph) as sess:
    # After training, we compute and print the test error.
    test_predicts = []
    for test_batch_X in dataflow.iterate_testing_batches(test_X, batch_size=256):
        test_predicts.append(test_fn(test_batch_X))
    test_predicts = np.concatenate(test_predicts, axis=0).astype(np.int32)
    print('Test error: %.2f %%' % (float(np.mean(test_predicts != test_y)) * 100.0))

Test error: 2.54 %
