In [1]:
# coding: utf-8
from __future__ import print_function, division, absolute_import
import sys
import time
import numpy as np
import tensorflow as tf

In [2]:
!rm -fr logdir
!mkdir -p logdir

In [3]:
from tensorflow.examples.tutorials.mnist.input_data \
  import read_data_sets

mnist = read_data_sets('./mnist', one_hot=False)

Extracting ./mnist/train-images-idx3-ubyte.gz
Extracting ./mnist/train-labels-idx1-ubyte.gz
Extracting ./mnist/t10k-images-idx3-ubyte.gz
Extracting ./mnist/t10k-labels-idx1-ubyte.gz


In [4]:
INPUT_UNITS = 28
BATCH_SIZE = 128
MAX_SEQ_LEN = 28

In [5]:
train_loop_count = mnist.train.num_examples // BATCH_SIZE
test_loop_count  = mnist.test.num_examples // BATCH_SIZE

In [6]:
from minimalrnn import MinimalRNNCell
from tensorflow.python.layers.layers import conv1d,max_pooling1d,dropout,batch_normalization

class MnistRnn:
    """
    """
    def __init__(self, args, inputs, labels):
        """
        """

        training    = tf.Variable(False,name='training',dtype=tf.bool)
        global_step = tf.Variable(1,name='global_step',dtype=tf.int64)
        rnn_dropout_rate = tf.Variable(0.0,name='dropout_rate',dtype=tf.float32)
        cnn_dropout_rate = tf.Variable(0.0,name='dropout_rate',dtype=tf.float32)

        step_update = tf.placeholder(tf.int64,None,name='step_update')
        rnn_dropout_update = tf.placeholder(tf.float32,None,name='rnn_dropout_update')
        cnn_dropout_update = tf.placeholder(tf.float32,None,name='cnn_dropout_update')

        def my_phi_initializer(inputs, num_outputs, **kwargs):
            
            print(('my_phi_initializer','inputs.shape',inputs.get_shape().as_list()))
            
            def my_phi(inputs):
                
                cnn_keep_prob   = tf.cond(training,lambda: 1.0 - cnn_dropout_rate,lambda: 1.0)

                input_units = inputs.get_shape().as_list()[-1]
                layer   = tf.reshape(inputs, [-1,input_units,1])

                layer   = conv1d(layer, num_outputs//3, 5, 1, activation=tf.nn.relu)
                layer   = max_pooling1d(layer, 3, 2)
                layer   = conv1d(layer, num_outputs, 3, 1, activation=tf.nn.relu)
                layer   = conv1d(layer, num_outputs, 3, 1, activation=tf.nn.relu)
                layer   = conv1d(layer, num_outputs//2, 3, 1, activation=tf.nn.relu)
                layer   = max_pooling1d(layer, 3, 2)

                layer   = conv1d(layer, num_outputs, 1, 1, activation=tf.nn.relu)
                layer   = dropout(layer, rate=1.0-cnn_keep_prob)
                if args.use_batch_norm:
                    layer   = tf.layers.batch_normalization(layer, training=training)
                layer   = conv1d(layer, num_outputs, 1, 1, activation=tf.nn.relu)

                layer   = tf.reduce_mean(layer, axis=1, keep_dims=True)
                layer   = tf.squeeze(layer)

                print(('my_phi','layer.shape',layer.get_shape().as_list()))

                return layer
            
            
            return my_phi


        cell = MinimalRNNCell(args.num_hidden_units,
                              phi_initializer=my_phi_initializer if args.use_custom_phi else None)

        rnn_keep_prob   = tf.cond(training,lambda: 1.0 - rnn_dropout_rate,lambda: 1.0)
        cell = tf.nn.rnn_cell.DropoutWrapper(
            cell,
            state_keep_prob=rnn_keep_prob,
            variational_recurrent=args.use_variational_dropout,
            dtype=tf.float32
        )
        
        sequence_length = [MAX_SEQ_LEN] * BATCH_SIZE
        
        last, states = tf.nn.dynamic_rnn(
            cell,
            inputs,
            sequence_length=sequence_length,
            dtype=tf.float32)
        rnn_output = last[:,MAX_SEQ_LEN-1,:]
        outputs    = tf.layers.dense(rnn_output, 10)
        
        loss       = tf.losses.sparse_softmax_cross_entropy(
            labels,
            outputs)

        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            optimize   = tf.train.AdamOptimizer(learning_rate=0.001). \
                minimize(loss)

        preds      = tf.argmax(outputs, axis=1)
        errors     = tf.count_nonzero(labels - preds)
        accuracy   = 1.0 - tf.cast(errors,tf.float32) / \
                        tf.cast(tf.size(preds), tf.float32)
        
        with tf.control_dependencies([
            tf.assign(training, True),
            tf.assign(global_step, step_update),
            tf.assign(rnn_dropout_rate, rnn_dropout_update),
            tf.assign(cnn_dropout_rate, cnn_dropout_update),
        ]):
            self.train_mode = tf.constant(1)
            
        with tf.control_dependencies([
            tf.assign(training, False)
        ]):
            self.test_mode  = tf.constant(1)

        self.step_update = step_update
        self.rnn_dropout_update = rnn_dropout_update
        self.cnn_dropout_update = cnn_dropout_update
        self.inputs   = inputs
        self.labels   = labels
        self.outputs  = outputs
        self.loss     = loss
        self.optimize = optimize
        self.accuracy = accuracy

In [7]:
def train(args, sess, model, max_epochs, train_writer=None, test_writer=None):
    """
    """

    step = sess.run(tf.train.get_global_step())

    for ep in range(max_epochs):

        for i in range(train_loop_count):

            sess.run(model.train_mode, {
                model.step_update: step,
                model.rnn_dropout_update: args.rnn_dropout_rate,
                model.cnn_dropout_update: args.cnn_dropout_rate})

            offs = i * BATCH_SIZE
            batch_input = mnist.train.images[offs:offs+BATCH_SIZE,:]. \
                            reshape([BATCH_SIZE, MAX_SEQ_LEN, INPUT_UNITS])
            batch_label = mnist.train.labels[offs:offs+BATCH_SIZE]
            t_start     = time.time()
            _, loss, accuracy = sess.run(
                [model.optimize, model.loss, model.accuracy],
                 feed_dict = {
                     model.inputs: batch_input,
                     model.labels: batch_label })
            t_elapsed     = time.time() - t_start
            step += 1
            
            if train_writer:
                summary = tf.Summary(
                    value = [
                        tf.Summary.Value(tag='accuracy',simple_value=accuracy),
                        tf.Summary.Value(tag='loss',simple_value=loss),
                        tf.Summary.Value(tag='elapsed',simple_value=t_elapsed),
                    ])
                train_writer.add_summary(summary, global_step=step)
                train_writer.flush()
                
            if step % 251 == 0:
                print('[train] ep {:d}, step {:d}, accu {:.5f}, loss {:.5f} [elapsed {:.5f}]'.format(
                    ep + 1, step, accuracy, loss, t_elapsed))
                
        test_accuracies = []
        test_elapsed    = []
        
        sess.run(model.test_mode)

        for i in range(test_loop_count):
            offs = i * BATCH_SIZE
            batch_input = mnist.test.images[offs:offs+BATCH_SIZE,:]. \
                            reshape([BATCH_SIZE, MAX_SEQ_LEN, INPUT_UNITS])
            batch_label = mnist.test.labels[offs:offs+BATCH_SIZE]
            t_start   = time.time()
            accuracy, = sess.run([model.accuracy],
                                 feed_dict = {
                                     model.inputs: batch_input,
                                     model.labels: batch_label})
            t_elapsed = time.time() - t_start
            test_accuracies.append(accuracy)
            test_elapsed.append(t_elapsed)

        mean_accuracy = np.mean(test_accuracies)
        mean_elapsed  = np.mean(test_elapsed)
        
        if test_writer:
            summary = tf.Summary(
                value = [
                    tf.Summary.Value(tag='accuracy',simple_value=mean_accuracy),
                    tf.Summary.Value(tag='elapsed',simple_value=mean_elapsed),
                ])
            test_writer.add_summary(summary, global_step=step)
            test_writer.flush()

        print(' [test] ep {:d}, step {:d}, accu {:.5f} [elapsed {:.5f}]'.format(
            ep + 1, step, mean_accuracy, mean_elapsed))


In [8]:
from collections import namedtuple
HyperParams = namedtuple('HyperParams',[
    'num_hidden_units',
    'rnn_dropout_rate',
    'cnn_dropout_rate',
    'use_custom_phi',
    'use_batch_norm',
    'use_variational_dropout'], verbose=False)

In [9]:
hp_configs = [
    HyperParams(num_hidden_units=31,
                rnn_dropout_rate=0.0,
                cnn_dropout_rate=0.0,
                use_custom_phi=False,
                use_batch_norm=False,
                use_variational_dropout=True),
    HyperParams(num_hidden_units=15,
                rnn_dropout_rate=0.0,
                cnn_dropout_rate=0.0,
                use_custom_phi=False,
                use_batch_norm=False,
                use_variational_dropout=True),
    HyperParams(num_hidden_units=31,
                rnn_dropout_rate=0.0,
                cnn_dropout_rate=0.0,
                use_custom_phi=True,
                use_batch_norm=False,
                use_variational_dropout=True),
    HyperParams(num_hidden_units=15,
                rnn_dropout_rate=0.0,
                cnn_dropout_rate=0.0,
                use_custom_phi=True,
                use_batch_norm=False,
                use_variational_dropout=True),
    HyperParams(num_hidden_units=61,
                rnn_dropout_rate=0.0,
                cnn_dropout_rate=0.0,
                use_custom_phi=False,
                use_batch_norm=False,
                use_variational_dropout=True),
    HyperParams(num_hidden_units=61,
                rnn_dropout_rate=0.0,
                cnn_dropout_rate=0.0,
                use_custom_phi=True,
                use_batch_norm=False,
                use_variational_dropout=True),
    HyperParams(num_hidden_units=91,
                rnn_dropout_rate=0.0,
                cnn_dropout_rate=0.0,
                use_custom_phi=False,
                use_batch_norm=False,
                use_variational_dropout=True),
    HyperParams(num_hidden_units=91,
                rnn_dropout_rate=0.0,
                cnn_dropout_rate=0.0,
                use_custom_phi=True,
                use_batch_norm=False,
                use_variational_dropout=True),
]

In [10]:
from urllib import quote

In [11]:
def hp_signature(hp):
#     return '/'.join([quote(str(k)) + '=' + quote(str(v)) for k,v in args._asdict().items()])
    return '/'.join([quote(str(v)) for k,v in args._asdict().items()])

In [12]:
for args in hp_configs:

    signature = hp_signature(args)
    print('start training:',signature)
    
    tf.reset_default_graph()
    inputs_      = tf.placeholder(tf.float32,
                             [BATCH_SIZE, MAX_SEQ_LEN, INPUT_UNITS],
                             name='inputs')
    labels_      = tf.placeholder(tf.int64,
                             [BATCH_SIZE],
                             name='labels')
    model        = MnistRnn(args, inputs_, labels_)

    sess         = tf.Session(config=tf.ConfigProto(gpu_options={'allow_growth':True}))
    sess.run(tf.global_variables_initializer())

    train_writer = tf.summary.FileWriter('logdir/train_{:s}'.format(signature))
    test_writer  = tf.summary.FileWriter('logdir/test__{:s}'.format(signature))

    train(args, sess, model, 10, train_writer, test_writer)

start training: 31/0.0/0.0/False/False/True
[train] ep 1, step 251, accu 0.54688, loss 1.59292 [elapsed 0.01381]
 [test] ep 1, step 430, accu 0.70192 [elapsed 0.00647]
[train] ep 2, step 502, accu 0.69531, loss 1.01491 [elapsed 0.01304]
[train] ep 2, step 753, accu 0.75000, loss 0.70770 [elapsed 0.01305]
 [test] ep 2, step 859, accu 0.82833 [elapsed 0.00652]
[train] ep 3, step 1004, accu 0.88281, loss 0.49037 [elapsed 0.01315]
[train] ep 3, step 1255, accu 0.92188, loss 0.35902 [elapsed 0.01330]
 [test] ep 3, step 1288, accu 0.86729 [elapsed 0.00645]
[train] ep 4, step 1506, accu 0.89062, loss 0.32810 [elapsed 0.01335]
 [test] ep 4, step 1717, accu 0.89323 [elapsed 0.00678]
[train] ep 5, step 1757, accu 0.87500, loss 0.38172 [elapsed 0.01427]
[train] ep 5, step 2008, accu 0.91406, loss 0.34555 [elapsed 0.01328]
 [test] ep 5, step 2146, accu 0.90825 [elapsed 0.00634]
[train] ep 6, step 2259, accu 0.89062, loss 0.34980 [elapsed 0.01295]
[train] ep 6, step 2510, accu 0.95312, loss 0.16060

[train] ep 6, step 2259, accu 0.94531, loss 0.13843 [elapsed 0.02203]
[train] ep 6, step 2510, accu 0.98438, loss 0.06984 [elapsed 0.02087]
 [test] ep 6, step 2575, accu 0.96394 [elapsed 0.00943]
[train] ep 7, step 2761, accu 0.96094, loss 0.10963 [elapsed 0.02228]
 [test] ep 7, step 3004, accu 0.96785 [elapsed 0.00940]
[train] ep 8, step 3012, accu 0.97656, loss 0.09236 [elapsed 0.02183]
[train] ep 8, step 3263, accu 0.96875, loss 0.08546 [elapsed 0.02207]
 [test] ep 8, step 3433, accu 0.97095 [elapsed 0.00968]
[train] ep 9, step 3514, accu 0.99219, loss 0.05686 [elapsed 0.02246]
[train] ep 9, step 3765, accu 0.95312, loss 0.09064 [elapsed 0.02164]
 [test] ep 9, step 3862, accu 0.97165 [elapsed 0.00944]
[train] ep 10, step 4016, accu 0.95312, loss 0.11401 [elapsed 0.02171]
[train] ep 10, step 4267, accu 0.98438, loss 0.08837 [elapsed 0.02202]
 [test] ep 10, step 4291, accu 0.97336 [elapsed 0.00943]
start training: 61/0.0/0.0/True/False/True
('my_phi_initializer', 'inputs.shape', [128,

In [13]:
!tensorboard --logdir logdir

[31mE1219 15:21:35.826254 MainThread program.py:255] TensorBoard attempted to bind to port 6006, but it was already in use
[0mTensorBoard attempted to bind to port 6006, but it was already in use
