In [1]:
# coding: utf-8
from __future__ import print_function, division, absolute_import
import sys
import time
import numpy as np
import tensorflow as tf

In [2]:
!rm -fr logdir
!mkdir -p logdir

In [3]:
from tensorflow.examples.tutorials.mnist.input_data \
  import read_data_sets

mnist = read_data_sets('./mnist', one_hot=False)

Extracting ./mnist/train-images-idx3-ubyte.gz
Extracting ./mnist/train-labels-idx1-ubyte.gz
Extracting ./mnist/t10k-images-idx3-ubyte.gz
Extracting ./mnist/t10k-labels-idx1-ubyte.gz


In [4]:
INPUT_UNITS = 28
NUM_HIDDEN_UNITS = 31
BATCH_SIZE = 128
MAX_SEQ_LEN = 28

In [5]:
train_loop_count = mnist.train.num_examples // BATCH_SIZE
test_loop_count  = mnist.test.num_examples // BATCH_SIZE

In [6]:
from minimalrnn import MinimalRNNCell

class MnistRnn:
    """
    """
    def __init__(self, inputs, labels, use_custom_phi=False):
        """
        """

        global_step = tf.Variable(1,name='global_step',dtype=tf.int64)
        training    = tf.Variable(False,name='training',dtype=tf.bool)
        dropout_rate = tf.Variable(0.0,name='dropout_rate',dtype=tf.float32)

        keep_prob   = tf.cond(training,lambda: 1.0 - dropout_rate,lambda: 1.0)

        step_update = tf.placeholder(tf.int64,None,name='step_update')
        dropout_update = tf.placeholder(tf.float32,None,name='dropout_update')

        def my_phi_initializer(inputs, num_outputs, **kwargs):
            
            print(('my_phi_initializer','inputs.shape',inputs.get_shape().as_list()))
            
            def my_phi(inputs):
                
                input_units = inputs.get_shape().as_list()[-1]
                layer   = tf.reshape(inputs, [-1,input_units,1])

                layer   = tf.layers.dropout(layer, rate=1.0-keep_prob, training=training)
                layer   = tf.layers.conv1d(layer, num_outputs // 4, 3, 1) 
#                 layer   = tf.layers.batch_normalization(layer, training=training)
                layer   = tf.nn.relu(layer)

                layer   = tf.layers.dropout(layer, rate=1.0-keep_prob, training=training)
                layer   = tf.layers.conv1d(layer, num_outputs // 2, 3, 1)
#                 layer   = tf.layers.batch_normalization(layer, training=training)
                layer   = tf.nn.relu(layer)

                layer   = tf.layers.max_pooling1d(layer, 2, 2)

                layer   = tf.layers.dropout(layer, rate=1.0-keep_prob, training=training)
                layer   = tf.layers.conv1d(layer, num_outputs, 3, 1)
#                 layer   = tf.layers.batch_normalization(layer, training=training)
                layer   = tf.nn.relu(layer)

                layer   = tf.layers.max_pooling1d(layer, 2, 2)

                layer   = tf.reduce_max(layer, axis=1, keep_dims=True)

                layer   = tf.layers.dropout(layer, rate=1.0-keep_prob, training=training)
                layer   = tf.layers.conv1d(layer, num_outputs, 1, 1)
#                 layer   = tf.layers.batch_normalization(layer, training=training)
                layer   = tf.nn.relu(layer)

                layer   = tf.layers.dropout(layer, rate=1.0-keep_prob, training=training)
                layer   = tf.layers.conv1d(layer, num_outputs, 1, 1)
#                 layer   = tf.layers.batch_normalization(layer, training=training)

                layer   = tf.squeeze(layer)
                
                print(('my_phi','output.shape',layer.get_shape().as_list()))
                
                return layer
            
            
            return my_phi


        cell = MinimalRNNCell(NUM_HIDDEN_UNITS,
                              phi_initializer=my_phi_initializer if use_custom_phi else None)

        cell = tf.nn.rnn_cell.DropoutWrapper(
            cell,
            state_keep_prob=keep_prob,
            variational_recurrent=True,
            dtype=tf.float32
        )
        
        sequence_length = [MAX_SEQ_LEN] * BATCH_SIZE
        
        last, states = tf.nn.dynamic_rnn(
            cell,
            inputs,
            sequence_length=sequence_length,
            dtype=tf.float32)
        rnn_output = last[:,MAX_SEQ_LEN-1,:]
        outputs    = tf.layers.dense(rnn_output, 10)
        
        loss       = tf.losses.sparse_softmax_cross_entropy(
            labels,
            outputs)

        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            optimize   = tf.train.AdamOptimizer(learning_rate=0.001). \
                minimize(loss)

        preds      = tf.argmax(outputs, axis=1)
        errors     = tf.count_nonzero(labels - preds)
        accuracy   = 1.0 - tf.cast(errors,tf.float32) / \
                        tf.cast(tf.size(preds), tf.float32)
        
        with tf.control_dependencies([
            tf.assign(training, True),
            tf.assign(global_step, step_update),
            tf.assign(dropout_rate, dropout_update)
        ]):
            self.train_mode = tf.constant(1)
            
        with tf.control_dependencies([
            tf.assign(training, False)
        ]):
            self.test_mode  = tf.constant(1)

        self.step_update = step_update
        self.dropout_update = dropout_update
        self.inputs   = inputs
        self.labels   = labels
        self.outputs  = outputs
        self.loss     = loss
        self.optimize = optimize
        self.accuracy = accuracy

In [7]:
def train(sess, model, max_epochs, train_writer=None, test_writer=None, dropout_rate=0.0):
    """
    """
    
    step = sess.run(tf.train.get_global_step())

    for ep in range(max_epochs):

        for i in range(train_loop_count):

            sess.run(model.train_mode, {
                model.step_update: step,
                model.dropout_update: dropout_rate})

            offs = i * BATCH_SIZE
            batch_input = mnist.train.images[offs:offs+BATCH_SIZE,:]. \
                            reshape([BATCH_SIZE, MAX_SEQ_LEN, INPUT_UNITS])
            batch_label = mnist.train.labels[offs:offs+BATCH_SIZE]
            t_start     = time.time()
            _, loss, accuracy = sess.run(
                [model.optimize, model.loss, model.accuracy],
                 feed_dict = {
                     model.inputs: batch_input,
                     model.labels: batch_label })
            t_elapsed     = time.time() - t_start
            step += 1
            
            if train_writer:
                summary = tf.Summary(
                    value = [
                        tf.Summary.Value(tag='accuracy',simple_value=accuracy),
                        tf.Summary.Value(tag='loss',simple_value=loss),
                        tf.Summary.Value(tag='elapsed',simple_value=t_elapsed),
                    ])
                train_writer.add_summary(summary, global_step=step)
                train_writer.flush()
                
            if step % 251 == 0:
                print('[train] ep {:d}, step {:d}, accu {:.5f}, loss {:.5f} [elapsed {:.5f}]'.format(
                    ep + 1, step, accuracy, loss, t_elapsed))
                
        test_accuracies = []
        
        sess.run(model.test_mode)

        for i in range(test_loop_count):
            offs = i * BATCH_SIZE
            batch_input = mnist.test.images[offs:offs+BATCH_SIZE,:]. \
                            reshape([BATCH_SIZE, MAX_SEQ_LEN, INPUT_UNITS])
            batch_label = mnist.test.labels[offs:offs+BATCH_SIZE]
            t_start   = time.time()
            accuracy, = sess.run([model.accuracy],
                                 feed_dict = {
                                     model.inputs: batch_input,
                                     model.labels: batch_label})
            t_elapsed = time.time() - t_start
            test_accuracies.append(accuracy)
            
            if test_writer:
                summary = tf.Summary(
                    value = [
                        tf.Summary.Value(tag='accuracy',simple_value=accuracy),
                        tf.Summary.Value(tag='elapsed',simple_value=t_elapsed),
                    ])
                test_writer.add_summary(summary, global_step=step)
                test_writer.flush()
                
            if i % 503 == 0:
                print(' [test] ep {:d}, step {:d}, accu {:.5f} [elapsed {:.5f}]'.format(
                    ep + 1, step, np.mean(test_accuracies), t_elapsed))


In [8]:
tf.reset_default_graph()
inputs_ = tf.placeholder(tf.float32,
                         [BATCH_SIZE, MAX_SEQ_LEN, INPUT_UNITS],
                         name='inputs')
labels_ = tf.placeholder(tf.int64,
                         [BATCH_SIZE],
                         name='labels')
model   = MnistRnn(inputs_, labels_)

In [9]:
config = tf.ConfigProto(gpu_options={'allow_growth':True})
sess   = tf.Session(config=config)
sess.run(tf.global_variables_initializer())
train_writer = tf.summary.FileWriter('logdir/train1')
test_writer  = tf.summary.FileWriter('logdir/test1')

In [10]:
train(sess, model, 10, train_writer, test_writer)

[train] ep 1, step 251, accu 0.52344, loss 1.54951 [elapsed 0.01618]
 [test] ep 1, step 430, accu 0.67188 [elapsed 0.01246]
[train] ep 2, step 502, accu 0.67188, loss 1.08780 [elapsed 0.01331]
[train] ep 2, step 753, accu 0.79688, loss 0.68375 [elapsed 0.01404]
 [test] ep 2, step 859, accu 0.86719 [elapsed 0.00660]
[train] ep 3, step 1004, accu 0.82031, loss 0.55651 [elapsed 0.01374]
[train] ep 3, step 1255, accu 0.90625, loss 0.35159 [elapsed 0.01389]
 [test] ep 3, step 1288, accu 0.90625 [elapsed 0.00619]
[train] ep 4, step 1506, accu 0.88281, loss 0.37363 [elapsed 0.01508]
 [test] ep 4, step 1717, accu 0.94531 [elapsed 0.00596]
[train] ep 5, step 1757, accu 0.92969, loss 0.28628 [elapsed 0.01389]
[train] ep 5, step 2008, accu 0.89062, loss 0.32012 [elapsed 0.01368]
 [test] ep 5, step 2146, accu 0.96094 [elapsed 0.00665]
[train] ep 6, step 2259, accu 0.92188, loss 0.24059 [elapsed 0.01342]
[train] ep 6, step 2510, accu 0.95312, loss 0.15077 [elapsed 0.02121]
 [test] ep 6, step 2575, 

In [11]:
tf.reset_default_graph()
inputs2_ = tf.placeholder(tf.float32,
                         [BATCH_SIZE, MAX_SEQ_LEN, INPUT_UNITS],
                         name='inputs')
labels2_ = tf.placeholder(tf.int64,
                         [BATCH_SIZE],
                         name='labels')
model2   = MnistRnn(inputs2_, labels2_, use_custom_phi = True)

('my_phi_initializer', 'inputs.shape', [128, 28])
('my_phi', 'output.shape', [128, 31])


In [12]:
config = tf.ConfigProto(gpu_options={'allow_growth':True})
sess   = tf.Session(config=config)
sess.run(tf.global_variables_initializer())
train_writer2 = tf.summary.FileWriter('logdir/train2')
test_writer2  = tf.summary.FileWriter('logdir/test2')

In [13]:
train(sess, model2, 20, train_writer2, test_writer2)

[train] ep 1, step 251, accu 0.45312, loss 1.39273 [elapsed 0.08365]
 [test] ep 1, step 430, accu 0.58594 [elapsed 0.02963]
[train] ep 2, step 502, accu 0.61719, loss 1.05632 [elapsed 0.07524]
[train] ep 2, step 753, accu 0.68750, loss 0.98259 [elapsed 0.07238]
 [test] ep 2, step 859, accu 0.71094 [elapsed 0.01355]
[train] ep 3, step 1004, accu 0.74219, loss 0.74703 [elapsed 0.07163]
[train] ep 3, step 1255, accu 0.86719, loss 0.55851 [elapsed 0.07279]
 [test] ep 3, step 1288, accu 0.79688 [elapsed 0.01675]
[train] ep 4, step 1506, accu 0.72656, loss 0.66578 [elapsed 0.07258]
 [test] ep 4, step 1717, accu 0.84375 [elapsed 0.01511]
[train] ep 5, step 1757, accu 0.83594, loss 0.50263 [elapsed 0.07486]
[train] ep 5, step 2008, accu 0.85938, loss 0.44661 [elapsed 0.08205]
 [test] ep 5, step 2146, accu 0.89844 [elapsed 0.01887]
[train] ep 6, step 2259, accu 0.89062, loss 0.36692 [elapsed 0.08542]
[train] ep 6, step 2510, accu 0.90625, loss 0.30799 [elapsed 0.07954]
 [test] ep 6, step 2575, 

In [14]:
!tensorboard --logdir logdir

[31mE1213 12:01:45.767539 MainThread program.py:255] TensorBoard attempted to bind to port 6006, but it was already in use
[0mTensorBoard attempted to bind to port 6006, but it was already in use
