In [1]:
# coding: utf-8
from __future__ import print_function, division, absolute_import
import sys
import time
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

In [2]:
!rm -fr logdir

In [3]:
!mkdir -p logdir

In [4]:
from tensorflow.examples.tutorials.mnist.input_data \
  import read_data_sets

In [5]:
mnist = read_data_sets('./mnist', one_hot=False)

Extracting ./mnist/train-images-idx3-ubyte.gz
Extracting ./mnist/train-labels-idx1-ubyte.gz
Extracting ./mnist/t10k-images-idx3-ubyte.gz
Extracting ./mnist/t10k-labels-idx1-ubyte.gz


In [6]:
INPUT_UNITS = 28
NUM_HIDDEN_UNITS = 31
BATCH_SIZE = 128
MAX_SEQ_LEN = 28

In [7]:
train_loop_count = mnist.train.num_examples // BATCH_SIZE
test_loop_count  = mnist.test.num_examples // BATCH_SIZE

In [8]:
from minimalrnn import MinimalRNNCell

In [9]:
class MnistRnn:
    """
    """
    def __init__(self, inputs, labels, use_custom_phi=False):
        """
        """

        dropout_rate = 0.0

        training    = tf.Variable(False,name='training',dtype=tf.bool)
        global_step = tf.Variable(1,name='global_step',dtype=tf.int64)
        keep_prob   = tf.cond(training,lambda: 1.0 - dropout_rate,lambda: 1.0)

        step_update = tf.placeholder(tf.int64,None,name='step_update')

        def my_phi_initializer(inputs, num_outputs, **kwargs):
            
            print(('my_phi_initializer','inputs[0].shape',inputs[0].get_shape().as_list()))
            
            def my_phi(inputs):
                
                input_units = inputs.get_shape().as_list()[-1]
                layer   = tf.reshape(inputs, [-1,input_units,1])

                layer   = tf.layers.dropout(layer, rate=1.0-keep_prob, training=training)
                layer   = tf.layers.conv1d(layer, num_outputs // 4, 3, 1) 
#                 layer   = tf.layers.batch_normalization(layer, training=training)
                layer   = tf.nn.relu(layer)

                layer   = tf.layers.dropout(layer, rate=1.0-keep_prob, training=training)
                layer   = tf.layers.conv1d(layer, num_outputs // 2, 3, 1)
#                 layer   = tf.layers.batch_normalization(layer, training=training)
                layer   = tf.nn.relu(layer)

                layer   = tf.layers.max_pooling1d(layer, 2, 2)

                layer   = tf.layers.dropout(layer, rate=1.0-keep_prob, training=training)
                layer   = tf.layers.conv1d(layer, num_outputs, 3, 1)
#                 layer   = tf.layers.batch_normalization(layer, training=training)
                layer   = tf.nn.relu(layer)

                layer   = tf.layers.max_pooling1d(layer, 2, 2)

                layer   = tf.reduce_max(layer, axis=1, keep_dims=True)

                layer   = tf.layers.dropout(layer, rate=1.0-keep_prob, training=training)
                layer   = tf.layers.conv1d(layer, num_outputs, 1, 1)
#                 layer   = tf.layers.batch_normalization(layer, training=training)
                layer   = tf.nn.relu(layer)

                layer   = tf.layers.dropout(layer, rate=1.0-keep_prob, training=training)
                layer   = tf.layers.conv1d(layer, num_outputs, 1, 1)
#                 layer   = tf.layers.batch_normalization(layer, training=training)

                layer   = tf.squeeze(layer)
                
                print(('my_phi','output.shape',layer.get_shape().as_list()))
                
                return layer
            
            
            return my_phi


        cell = MinimalRNNCell(NUM_HIDDEN_UNITS,
                              phi_initializer=my_phi_initializer if use_custom_phi else None)
        
        sequence_length = [MAX_SEQ_LEN] * BATCH_SIZE
        
        last, states = tf.nn.dynamic_rnn(
            cell,
            inputs,
            sequence_length=sequence_length,
            dtype=tf.float32)
        rnn_output = last[:,MAX_SEQ_LEN-1,:]
        outputs    = tf.layers.dense(rnn_output, 10)
        
        loss       = tf.losses.sparse_softmax_cross_entropy(
            labels,
            outputs)

        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            optimize   = tf.train.AdamOptimizer(learning_rate=0.001). \
                minimize(loss)

        preds      = tf.argmax(outputs, axis=1)
        errors     = tf.count_nonzero(labels - preds)
        accuracy   = 1.0 - tf.cast(errors,tf.float32) / \
                        tf.cast(tf.size(preds), tf.float32)
        
        with tf.control_dependencies([
            tf.assign(training, True),
            tf.assign(global_step, step_update)
        ]):
            self.train_mode = tf.constant(1)
            
        with tf.control_dependencies([
            tf.assign(training, False)
        ]):
            self.test_mode  = tf.constant(1)

        self.step_update = step_update
        self.inputs   = inputs
        self.labels   = labels
        self.outputs  = outputs
        self.loss     = loss
        self.optimize = optimize
        self.accuracy = accuracy

In [10]:
def train(sess, model, max_epochs, train_writer=None, test_writer=None):
    """
    """
    
    step = sess.run(tf.train.get_global_step())

    for ep in range(max_epochs):

        for i in range(train_loop_count):

            sess.run(model.train_mode, {model.step_update: step})
        
            offs = i * BATCH_SIZE
            batch_input = mnist.train.images[offs:offs+BATCH_SIZE,:]. \
                            reshape([BATCH_SIZE, MAX_SEQ_LEN, INPUT_UNITS])
            batch_label = mnist.train.labels[offs:offs+BATCH_SIZE]
            t_start     = time.time()
            _, loss, accuracy = sess.run(
                [model.optimize, model.loss, model.accuracy],
                 feed_dict = {
                     model.inputs: batch_input,
                     model.labels: batch_label })
            t_elapsed     = time.time() - t_start
            step += 1
            
            if train_writer:
                summary = tf.Summary(
                    value = [
                        tf.Summary.Value(tag='accuracy',simple_value=accuracy),
                        tf.Summary.Value(tag='loss',simple_value=loss),
                        tf.Summary.Value(tag='elapsed',simple_value=t_elapsed),
                    ])
                train_writer.add_summary(summary, global_step=step)
                train_writer.flush()
                
            if step % 251 == 0:
                print('[train] ep {:d}, step {:d}, accu {:.5f}, loss {:.5f} [elapsed {:.5f}]'.format(
                    ep + 1, step, accuracy, loss, t_elapsed))
                
        test_accuracies = []
        
        sess.run(model.test_mode)

        for i in range(test_loop_count):
            offs = i * BATCH_SIZE
            batch_input = mnist.test.images[offs:offs+BATCH_SIZE,:]. \
                            reshape([BATCH_SIZE, MAX_SEQ_LEN, INPUT_UNITS])
            batch_label = mnist.test.labels[offs:offs+BATCH_SIZE]
            t_start   = time.time()
            accuracy, = sess.run([model.accuracy],
                                 feed_dict = {
                                     model.inputs: batch_input,
                                     model.labels: batch_label})
            t_elapsed = time.time() - t_start
            test_accuracies.append(accuracy)
            
            if test_writer:
                summary = tf.Summary(
                    value = [
                        tf.Summary.Value(tag='accuracy',simple_value=accuracy),
                        tf.Summary.Value(tag='elapsed',simple_value=t_elapsed),
                    ])
                test_writer.add_summary(summary, global_step=step)
                test_writer.flush()
                
            if i % 503 == 0:
                print(' [test] ep {:d}, step {:d}, accu {:.5f} [elapsed {:.5f}]'.format(
                    ep + 1, step, np.mean(test_accuracies), t_elapsed))


In [11]:
tf.reset_default_graph()

inputs_ = tf.placeholder(tf.float32,
                         [BATCH_SIZE, MAX_SEQ_LEN, INPUT_UNITS],
                         name='inputs')
labels_ = tf.placeholder(tf.int64,
                         [BATCH_SIZE],
                         name='labels')

model   = MnistRnn(inputs_, labels_)

In [12]:
config = tf.ConfigProto(gpu_options={'allow_growth':True})
sess   = tf.InteractiveSession(config=config)

In [13]:
train_writer = tf.summary.FileWriter('logdir/train-rnn', 
                                     graph=tf.get_default_graph())
test_writer  = tf.summary.FileWriter('logdir/test-rnn',
                                     graph=tf.get_default_graph())

In [14]:
sess.run(tf.global_variables_initializer())

In [15]:
train(sess, model, 10, train_writer, test_writer)

[train] ep 1, step 251, accu 0.55469, loss 1.54025 [elapsed 0.00962]
 [test] ep 1, step 430, accu 0.69531 [elapsed 0.00963]
[train] ep 2, step 502, accu 0.68750, loss 1.01582 [elapsed 0.00993]
[train] ep 2, step 753, accu 0.75781, loss 0.77917 [elapsed 0.00969]
 [test] ep 2, step 859, accu 0.88281 [elapsed 0.00459]
[train] ep 3, step 1004, accu 0.84375, loss 0.44357 [elapsed 0.00981]
[train] ep 3, step 1255, accu 0.91406, loss 0.26452 [elapsed 0.01014]
 [test] ep 3, step 1288, accu 0.91406 [elapsed 0.00486]
[train] ep 4, step 1506, accu 0.89844, loss 0.30739 [elapsed 0.01000]
 [test] ep 4, step 1717, accu 0.93750 [elapsed 0.00456]
[train] ep 5, step 1757, accu 0.89062, loss 0.34249 [elapsed 0.00982]
[train] ep 5, step 2008, accu 0.91406, loss 0.26912 [elapsed 0.01257]
 [test] ep 5, step 2146, accu 0.96094 [elapsed 0.00448]
[train] ep 6, step 2259, accu 0.93750, loss 0.23653 [elapsed 0.00956]
[train] ep 6, step 2510, accu 0.96094, loss 0.15191 [elapsed 0.00967]
 [test] ep 6, step 2575, 

In [16]:
tf.reset_default_graph()

inputs_ = tf.placeholder(tf.float32,
                         [BATCH_SIZE, MAX_SEQ_LEN, INPUT_UNITS],
                         name='inputs')
labels_ = tf.placeholder(tf.int64,
                         [BATCH_SIZE],
                         name='labels')

model   = MnistRnn(inputs_, labels_, use_custom_phi = True)

('my_phi_initializer', 'inputs[0].shape', [28])
('my_phi', 'output.shape', [128, 31])


In [17]:
config = tf.ConfigProto(gpu_options={'allow_growth':True})
sess   = tf.InteractiveSession(config=config)

In [18]:
train_writer = tf.summary.FileWriter('logdir/train-c-rnn', 
                                     graph=tf.get_default_graph())
test_writer  = tf.summary.FileWriter('logdir/test-c-rnn',
                                     graph=tf.get_default_graph())

In [None]:
sess.run(tf.global_variables_initializer())

In [None]:
train(sess, model, 20, train_writer, test_writer)

[train] ep 1, step 251, accu 0.44531, loss 1.34853 [elapsed 0.07080]
 [test] ep 1, step 430, accu 0.59375 [elapsed 0.02875]
[train] ep 2, step 502, accu 0.66406, loss 0.99989 [elapsed 0.07047]
[train] ep 2, step 753, accu 0.72656, loss 0.94284 [elapsed 0.06955]
 [test] ep 2, step 859, accu 0.74219 [elapsed 0.01569]
[train] ep 3, step 1004, accu 0.78906, loss 0.61635 [elapsed 0.11842]
[train] ep 3, step 1255, accu 0.83594, loss 0.46208 [elapsed 0.09284]
 [test] ep 3, step 1288, accu 0.84375 [elapsed 0.02022]
[train] ep 4, step 1506, accu 0.85938, loss 0.39811 [elapsed 0.07065]
 [test] ep 4, step 1717, accu 0.89062 [elapsed 0.01450]
[train] ep 5, step 1757, accu 0.89062, loss 0.35891 [elapsed 0.07141]
[train] ep 5, step 2008, accu 0.88281, loss 0.38823 [elapsed 0.11234]
 [test] ep 5, step 2146, accu 0.91406 [elapsed 0.01358]
[train] ep 6, step 2259, accu 0.88281, loss 0.33796 [elapsed 0.07184]
[train] ep 6, step 2510, accu 0.90625, loss 0.23762 [elapsed 0.07216]
 [test] ep 6, step 2575, 

In [None]:
!tensorboard --logdir logdir