In [1]:
# coding: utf-8
from __future__ import print_function, division, absolute_import
import sys
import time
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

In [2]:
!rm -fr logdir

In [3]:
!mkdir -p logdir/train logdir/test

In [4]:
from tensorflow.examples.tutorials.mnist.input_data \
  import read_data_sets

In [5]:
mnist = read_data_sets('./mnist', one_hot=False)

Extracting ./mnist/train-images-idx3-ubyte.gz
Extracting ./mnist/train-labels-idx1-ubyte.gz
Extracting ./mnist/t10k-images-idx3-ubyte.gz
Extracting ./mnist/t10k-labels-idx1-ubyte.gz


In [6]:
INPUT_UNITS = 28
NUM_HIDDEN_UNITS = 31
BATCH_SIZE = 128
MAX_SEQ_LEN = 28

In [7]:
train_loop_count = mnist.train.num_examples // BATCH_SIZE
test_loop_count  = mnist.test.num_examples // BATCH_SIZE

In [8]:
from minimalrnn import MinimalRNNCell

In [9]:
class MnistRnn:
    """
    """
    def __init__(self, inputs, labels, use_custom_phi=False):
        """
        """
        
        dropout_rate = 0.5
        
        training   = tf.Variable(False,name='training',dtype=tf.bool)
        keep_prob  = tf.cond(training,lambda: 1.0 - dropout_rate,lambda: 1.0)


        def my_phi_initializer(inputs, num_outputs, **kwargs):
            print(('my_phi_initializer','inputs.shape',inputs.get_shape().as_list()))
            def my_phi(inputs):
                input_units = inputs.get_shape().as_list()[-1]
                layer   = tf.reshape(inputs, [-1,input_units,1])
                layer   = tf.layers.conv1d(layer, num_outputs // 4, 3, 1, 
                                           activation=tf.nn.elu)
                layer   = tf.layers.average_pooling1d(layer, 2, 2)
                layer   = tf.layers.conv1d(layer, num_outputs // 2, 3, 1,
                                           activation=tf.nn.elu)
                layer   = tf.layers.average_pooling1d(layer, 2, 2)
                layer   = tf.layers.conv1d(layer, num_outputs, 3, 1, 
                                           activation=tf.nn.elu)
                layer   = tf.layers.average_pooling1d(layer, 2, 2)
                layer   = tf.reduce_max(layer, axis=1, keep_dims=True)

                layer   = tf.layers.conv1d(layer, num_outputs, 1, 1, 
                                           activation=tf.nn.elu)
                layer   = tf.nn.dropout(layer, keep_prob=keep_prob)
                layer   = tf.layers.conv1d(layer, num_outputs, 1, 1,
                                           activation=None)

                layer   = tf.squeeze(layer)
                print(('my_phi','output.shape',layer.get_shape().as_list()))
                return layer
            return my_phi


        cell = MinimalRNNCell(NUM_HIDDEN_UNITS,
                              phi_initializer=my_phi_initializer if use_custom_phi else None)
        
        sequence_length = [MAX_SEQ_LEN] * BATCH_SIZE
        
        last, states = tf.nn.dynamic_rnn(
            cell,
            inputs,
            sequence_length=sequence_length,
            dtype=tf.float32)
        rnn_output = last[:,MAX_SEQ_LEN-1,:]
        outputs    = tf.layers.dense(rnn_output, 10)
        
        loss       = tf.losses.sparse_softmax_cross_entropy(
            labels,
            outputs)

        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            optimize   = tf.train.AdamOptimizer(learning_rate=0.001). \
                minimize(loss)

        preds      = tf.argmax(outputs, axis=1)
        errors     = tf.count_nonzero(labels - preds)
        accuracy   = 1.0 - tf.cast(errors,tf.float32) / \
                        tf.cast(tf.size(preds), tf.float32)
        
        with tf.control_dependencies([
            tf.assign(training, True)
        ]):
            self.train_mode = tf.constant(1)
            
        with tf.control_dependencies([
            tf.assign(training, False)
        ]):
            self.test_mode  = tf.constant(1)

        self.inputs   = inputs
        self.labels   = labels
        self.outputs  = outputs
        self.loss     = loss
        self.optimize = optimize
        self.accuracy = accuracy

In [10]:
def train(sess, model, max_epochs, train_writer=None, test_writer=None):
    """
    """
    
    step = 0
    
    for ep in range(max_epochs):
        
        sess.run(model.train_mode)
        
        for i in range(train_loop_count):
            offs = i * BATCH_SIZE
            batch_input = mnist.train.images[offs:offs+BATCH_SIZE,:]. \
                            reshape([BATCH_SIZE, MAX_SEQ_LEN, INPUT_UNITS])
            batch_label = mnist.train.labels[offs:offs+BATCH_SIZE]
            _, loss, accuracy = sess.run(
                [model.optimize, model.loss, model.accuracy],
                 feed_dict = {
                     model.inputs: batch_input,
                     model.labels: batch_label })
            step += 1
            if train_writer:
                summary = tf.Summary(
                    value = [
                        tf.Summary.Value(tag='accuracy',simple_value=accuracy),
                        tf.Summary.Value(tag='loss',simple_value=loss),
                    ])
                train_writer.add_summary(summary, global_step=step)
                train_writer.flush()
            if step % 250 == 0:
                print('[train] ep {:d}, step {:d}, loss {:.5f}, accu {:.5f}'.format(
                    ep + 1, step, loss, accuracy))
                
        test_accuracies = []
        
        sess.run(model.test_mode)

        for i in range(test_loop_count):
            offs = i * BATCH_SIZE
            batch_input = mnist.test.images[offs:offs+BATCH_SIZE,:]. \
                            reshape([BATCH_SIZE, MAX_SEQ_LEN, INPUT_UNITS])
            batch_label = mnist.test.labels[offs:offs+BATCH_SIZE]
            accuracy, = sess.run([model.accuracy],
                                 feed_dict = {
                                     model.inputs: batch_input,
                                     model.labels: batch_label})
            test_accuracies.append(accuracy)
            if test_writer:
                summary = tf.Summary(
                    value = [
                        tf.Summary.Value(tag='accuracy',simple_value=accuracy),
                    ])
                test_writer.add_summary(summary, global_step=step)
                test_writer.flush()
            if i % 250 == 0:
                print(' [test] ep {:d}, step {:d}, accu {:.5f}'.format(
                    ep + 1, step, np.mean(test_accuracies)))


In [11]:
tf.reset_default_graph()

inputs_ = tf.placeholder(tf.float32,
                         [BATCH_SIZE, MAX_SEQ_LEN, INPUT_UNITS],
                         name='inputs')
labels_ = tf.placeholder(tf.int64,
                         [BATCH_SIZE],
                         name='labels')

model   = MnistRnn(inputs_, labels_)

In [12]:
config = tf.ConfigProto(gpu_options={'allow_growth':True})
sess   = tf.InteractiveSession(config=config)

In [13]:
train_writer = tf.summary.FileWriter('logdir/train-rnn', 
                                     graph=tf.get_default_graph())
test_writer  = tf.summary.FileWriter('logdir/test-rnn',
                                     graph=tf.get_default_graph())

In [14]:
sess.run(tf.global_variables_initializer())
train(sess, model, 10, train_writer, test_writer)

[train] ep 1, step 250, loss 1.57092, accu 0.52344
 [test] ep 1, step 429, accu 0.71094
[train] ep 2, step 500, loss 1.03613, accu 0.70312
[train] ep 2, step 750, loss 0.71140, accu 0.76562
 [test] ep 2, step 858, accu 0.85938
[train] ep 3, step 1000, loss 0.51308, accu 0.85156
[train] ep 3, step 1250, loss 0.28956, accu 0.93750
 [test] ep 3, step 1287, accu 0.87500
[train] ep 4, step 1500, loss 0.33424, accu 0.91406
 [test] ep 4, step 1716, accu 0.89844
[train] ep 5, step 1750, loss 0.28598, accu 0.89844
[train] ep 5, step 2000, loss 0.42561, accu 0.87500
 [test] ep 5, step 2145, accu 0.92188
[train] ep 6, step 2250, loss 0.30456, accu 0.93750
[train] ep 6, step 2500, loss 0.48928, accu 0.82031
 [test] ep 6, step 2574, accu 0.94531
[train] ep 7, step 2750, loss 0.26681, accu 0.93750
[train] ep 7, step 3000, loss 0.23323, accu 0.95312
 [test] ep 7, step 3003, accu 0.96094
[train] ep 8, step 3250, loss 0.20294, accu 0.92188
 [test] ep 8, step 3432, accu 0.96094
[train] ep 9, step 3500, 

In [15]:
tf.reset_default_graph()

inputs_ = tf.placeholder(tf.float32,
                         [BATCH_SIZE, MAX_SEQ_LEN, INPUT_UNITS],
                         name='inputs')
labels_ = tf.placeholder(tf.int64,
                         [BATCH_SIZE],
                         name='labels')

model   = MnistRnn(inputs_, labels_, use_custom_phi = True)

('my_phi_initializer', 'inputs.shape', [128, 28])
('my_phi', 'output.shape', [128, 31])


In [16]:
config = tf.ConfigProto(gpu_options={'allow_growth':True})
sess   = tf.InteractiveSession(config=config)

In [17]:
train_writer = tf.summary.FileWriter('logdir/train-c-rnn', 
                                     graph=tf.get_default_graph())
test_writer  = tf.summary.FileWriter('logdir/test-c-rnn',
                                     graph=tf.get_default_graph())

In [18]:
sess.run(tf.global_variables_initializer())
train(sess, model, 10, train_writer, test_writer)

[train] ep 1, step 250, loss 1.30821, accu 0.52344
 [test] ep 1, step 429, accu 0.67969
[train] ep 2, step 500, loss 0.93973, accu 0.62500
[train] ep 2, step 750, loss 0.65945, accu 0.75781
 [test] ep 2, step 858, accu 0.78906
[train] ep 3, step 1000, loss 0.62646, accu 0.79688
[train] ep 3, step 1250, loss 0.41628, accu 0.87500
 [test] ep 3, step 1287, accu 0.87500
[train] ep 4, step 1500, loss 0.48059, accu 0.84375
 [test] ep 4, step 1716, accu 0.91406
[train] ep 5, step 1750, loss 0.35298, accu 0.87500
[train] ep 5, step 2000, loss 0.60718, accu 0.82812
 [test] ep 5, step 2145, accu 0.92969
[train] ep 6, step 2250, loss 0.34710, accu 0.90625
[train] ep 6, step 2500, loss 0.48610, accu 0.82812
 [test] ep 6, step 2574, accu 0.92969
[train] ep 7, step 2750, loss 0.29251, accu 0.89844
[train] ep 7, step 3000, loss 0.28745, accu 0.90625
 [test] ep 7, step 3003, accu 0.93750
[train] ep 8, step 3250, loss 0.32901, accu 0.85938
 [test] ep 8, step 3432, accu 0.93750
[train] ep 9, step 3500, 

In [19]:
!tensorboard --logdir logdir

[31mE1212 00:49:47.822920 MainThread program.py:255] TensorBoard attempted to bind to port 6006, but it was already in use
[0mTensorBoard attempted to bind to port 6006, but it was already in use
