In [1]:
# coding: utf-8
from __future__ import print_function, division, absolute_import
import sys
import time
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

In [2]:
!rm -fr logdir

In [3]:
!mkdir -p logdir/train logdir/test

In [4]:
from tensorflow.examples.tutorials.mnist.input_data \
  import read_data_sets

In [5]:
mnist = read_data_sets('./mnist', one_hot=False)

Extracting ./mnist/train-images-idx3-ubyte.gz
Extracting ./mnist/train-labels-idx1-ubyte.gz
Extracting ./mnist/t10k-images-idx3-ubyte.gz
Extracting ./mnist/t10k-labels-idx1-ubyte.gz


In [6]:
INPUT_UNITS = 28
NUM_HIDDEN_UNITS = 31
BATCH_SIZE = 128
MAX_SEQ_LEN = 28

In [7]:
train_loop_count = mnist.train.num_examples // BATCH_SIZE
test_loop_count  = mnist.test.num_examples // BATCH_SIZE

In [8]:
from minimalrnn import MinimalRNNCell

In [9]:
class MnistRnn:
    """
    """
    def __init__(self, inputs, labels):
        """
        """
        
        dropout_rate = 0.5
        
        training   = tf.Variable(False,name='training',dtype=tf.bool)
        keep_prob  = tf.cond(training,lambda: 1.0 - dropout_rate,lambda: 1.0)

        def my_phi_initializer(inputs, num_outputs, **kwargs):
            print(('my_phi_initializer','inputs.shape',inputs.get_shape().as_list()))
            def my_phi(inputs):
                input_units = inputs.get_shape().as_list()[-1]
                layer   = tf.reshape(inputs, [-1,input_units,1])
                layer   = tf.layers.conv1d(layer, num_outputs // 4, 3, 1, 
                                           activation=tf.nn.elu)
                layer   = tf.layers.average_pooling1d(layer, 2, 2)
                layer   = tf.layers.conv1d(layer, num_outputs // 2, 3, 1,
                                           activation=tf.nn.elu)
                layer   = tf.layers.average_pooling1d(layer, 2, 2)
                layer   = tf.layers.conv1d(layer, num_outputs, 3, 1, 
                                           activation=tf.nn.elu)
                layer   = tf.layers.average_pooling1d(layer, 2, 2)
                layer   = tf.reduce_max(layer, axis=1, keep_dims=True)
                
                layer   = tf.layers.conv1d(layer, num_outputs, 1, 1, 
                                           activation=tf.nn.elu)
                layer   = tf.nn.dropout(layer, keep_prob=keep_prob)
                layer   = tf.layers.conv1d(layer, num_outputs, 1, 1,
                                           activation=None)
                
                layer   = tf.squeeze(layer)
                print(('my_phi','output.shape',layer.get_shape().as_list()))
                return layer
            return my_phi

        cell = MinimalRNNCell(NUM_HIDDEN_UNITS, phi_initializer=my_phi_initializer)
        
        sequence_length = [MAX_SEQ_LEN] * BATCH_SIZE
        
        last, states = tf.nn.dynamic_rnn(
            cell,
            inputs,
            sequence_length=sequence_length,
            dtype=tf.float32)
        rnn_output = last[:,MAX_SEQ_LEN-1,:]
        outputs    = tf.layers.dense(rnn_output, 10)
        
        loss       = tf.losses.sparse_softmax_cross_entropy(
            labels,
            outputs)

        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            optimize   = tf.train.AdamOptimizer(learning_rate=0.001). \
                minimize(loss)

        preds      = tf.argmax(outputs, axis=1)
        errors     = tf.count_nonzero(labels - preds)
        accuracy   = 1.0 - tf.cast(errors,tf.float32) / \
                        tf.cast(tf.size(preds), tf.float32)
        
        with tf.control_dependencies([
            tf.assign(training, True)
        ]):
            self.train_mode = tf.constant(1)
            
        with tf.control_dependencies([
            tf.assign(training, False)
        ]):
            self.test_mode  = tf.constant(1)

        self.inputs   = inputs
        self.labels   = labels
        self.outputs  = outputs
        self.loss     = loss
        self.optimize = optimize
        self.accuracy = accuracy

In [10]:
tf.reset_default_graph()

inputs_ = tf.placeholder(tf.float32,
                         [BATCH_SIZE, MAX_SEQ_LEN, INPUT_UNITS],
                         name='inputs')
labels_ = tf.placeholder(tf.int64,
                         [BATCH_SIZE],
                         name='labels')

model   = MnistRnn(inputs_, labels_)

('my_phi_initializer', 'inputs.shape', [128, 28])
('my_phi', 'output.shape', [128, 31])


In [11]:
config = tf.ConfigProto(gpu_options={'allow_growth':True})
sess   = tf.InteractiveSession(config=config)

tf.global_variables_initializer().run()

In [12]:
def train(max_epochs, train_writer=None, test_writer=None):
    """
    """
    
    step = 0
    
    for ep in range(max_epochs):
        
        sess.run(model.train_mode)
        
        for i in range(train_loop_count):
            offs = i * BATCH_SIZE
            batch_input = mnist.train.images[offs:offs+BATCH_SIZE,:]. \
                            reshape([BATCH_SIZE, MAX_SEQ_LEN, INPUT_UNITS])
            batch_label = mnist.train.labels[offs:offs+BATCH_SIZE]
            _, loss, accuracy = sess.run(
                [model.optimize, model.loss, model.accuracy],
                 feed_dict = {
                     model.inputs: batch_input,
                     model.labels: batch_label })
            step += 1
            if train_writer:
                summary = tf.Summary(
                    value = [
                        tf.Summary.Value(tag='accuracy',simple_value=accuracy),
                        tf.Summary.Value(tag='loss',simple_value=loss),
                    ])
                train_writer.add_summary(summary, global_step=step)
            if step % 250 == 0:
                print('[trn] ep {:d}, step {:d}, loss {:.5f}, accu {:.5f}'.format(
                    ep + 1, step, loss, accuracy))
                
        test_accuracies = []
        
        sess.run(model.test_mode)

        for i in range(test_loop_count):
            offs = i * BATCH_SIZE
            batch_input = mnist.train.images[offs:offs+BATCH_SIZE,:]. \
                            reshape([BATCH_SIZE, MAX_SEQ_LEN, INPUT_UNITS])
            batch_label = mnist.train.labels[offs:offs+BATCH_SIZE]
            accuracy, = sess.run([model.accuracy],
                                 feed_dict = {
                                     model.inputs: batch_input,
                                     model.labels: batch_label})
            test_accuracies.append(accuracy)
            if test_writer:
                summary = tf.Summary(
                    value = [
                        tf.Summary.Value(tag='accuracy',simple_value=accuracy),
                    ])
                test_writer.add_summary(summary, global_step=step)
            if i % 250 == 0:
                print('[tst] ep {:d}, step {:d}, accu {:.5f}'.format(
                    ep + 1, step, np.mean(test_accuracies)))


In [None]:
train_writer = tf.summary.FileWriter('logdir/train', 
                                     graph=tf.get_default_graph())
test_writer  = tf.summary.FileWriter('logdir/test',
                                     graph=tf.get_default_graph())

In [None]:
# tf.get_default_graph().finalize()
train(10, train_writer, test_writer)

[trn] ep 1, step 250, loss 1.13382, accu 0.62500
[tst] ep 1, step 429, accu 0.76562
[trn] ep 2, step 500, loss 0.91315, accu 0.67188
[trn] ep 2, step 750, loss 0.59268, accu 0.80469
[tst] ep 2, step 858, accu 0.82812
[trn] ep 3, step 1000, loss 0.51758, accu 0.82812
[trn] ep 3, step 1250, loss 0.37322, accu 0.89062
[tst] ep 3, step 1287, accu 0.85156
[trn] ep 4, step 1500, loss 0.43805, accu 0.85156
[tst] ep 4, step 1716, accu 0.88281
[trn] ep 5, step 1750, loss 0.28497, accu 0.92188
[trn] ep 5, step 2000, loss 0.54268, accu 0.82031
[tst] ep 5, step 2145, accu 0.89844
[trn] ep 6, step 2250, loss 0.32463, accu 0.89844
[trn] ep 6, step 2500, loss 0.39843, accu 0.84375
[tst] ep 6, step 2574, accu 0.89844
[trn] ep 7, step 2750, loss 0.25621, accu 0.91406
[trn] ep 7, step 3000, loss 0.26351, accu 0.92188
[tst] ep 7, step 3003, accu 0.89062
[trn] ep 8, step 3250, loss 0.26339, accu 0.92188
[tst] ep 8, step 3432, accu 0.90625
[trn] ep 9, step 3500, loss 0.24087, accu 0.92969
[trn] ep 9, step 

In [None]:
!tensorboard --logdir logdir