In [1]:
import tensorflow as tf
import numpy as np
from tensorflow.contrib.rnn import RNNCell
from tensorflow.contrib import rnn
from scipy import ndimage
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
%matplotlib inline

tf.set_random_seed(123)  # reproducibility

  from ._conv import register_converters as _register_converters


In [2]:
mnist = tf.contrib.learn.datasets.load_dataset("mnist")
train_data = mnist.train.images  # Returns np.array
train_labels = np.eye(10)[np.asarray(mnist.train.labels, dtype=np.int32)]
test_data = mnist.test.images  # Returns np.array
test_labels = np.eye(10)[np.asarray(mnist.test.labels, dtype=np.int32)]
train_data, val_data, train_labels, val_labels = train_test_split(train_data, train_labels, test_size=0.1, random_state=123)


print train_data.shape
learning_rate = 0.001
training_epochs = 50
batch_size = 100
num_hidden = 100
timesteps = 28
num_classes=10

class SimpleRNNCell(RNNCell):
    def __init__(self, num_units, activation=tf.nn.tanh):
        self._num_units = num_units
        self._activation = activation
        self.matrix = []
    
    def _linear(self, args, output_size, bias, bias_start=0.0, scope=None):
        """Linear map: sum_i(args[i] * W[i]), where W[i] is a variable.
        Args:
          args: a 2D Tensor or a list of 2D, batch x n, Tensors.
          output_size: int, second dimension of W[i].
          bias: boolean, whether to add a bias term or not.
          bias_start: starting value to initialize the bias; 0 by default.
          scope: VariableScope for the created subgraph; defaults to "Linear".
        Returns:
          A 2D Tensor with shape [batch x output_size] equal to
          sum_i(args[i] * W[i]), where W[i]s are newly created matrices.
        Raises:
          ValueError: if some of the arguments has unspecified or wrong shape.
        """
        if args is None or (isinstance(args, (list, tuple)) and not args):
            raise ValueError("`args` must be specified")
        if not isinstance(args, (list, tuple)):
            args = [args]

        # Calculate the total size of arguments on dimension 1.
        total_arg_size = 0
        shapes = [a.get_shape().as_list() for a in args]
        for shape in shapes:
            if len(shape) != 2:
                raise ValueError(
                    "Linear is expecting 2D arguments: %s" % str(shapes))
            if not shape[1]:
                raise ValueError(
                    "Linear expects shape[1] of arguments: %s" % str(shapes))
            else:
                total_arg_size += shape[1]

        # Now the computation.
        with tf.variable_scope(scope or "Linear"):
            self.matrix = tf.get_variable("Matrix", [total_arg_size, output_size], initializer=tf.contrib.layers.xavier_initializer())
            if len(args) == 1:
                res = tf.matmul(args[0], self.matrix)
            else:
                res = tf.matmul(tf.concat(args,1), self.matrix)
            if not bias:
                return res
            bias_term = tf.get_variable(
                "Bias", [output_size],
                initializer=tf.constant_initializer(bias_start))
        return res + bias_term


    @property
    def state_size(self):
        return self._num_units

    @property
    def output_size(self):
        return self._num_units

    def __call__(self, inputs, state, scope=None):
        """Most basic RNN: output = new_state = activation(W * input + U * state + B)."""
        with tf.variable_scope(scope or type(self).__name__):  # "BasicRNNCell"
            output = self._activation(self._linear([inputs, state], self._num_units, True))
        return output, output #h, state


class RNN(object):
    def __init__(self):
        self.graph = tf.Graph()
        with self.graph.as_default():
            # input place holders
            self.X = tf.placeholder(tf.float32, [None, 784])
            self.Y = tf.placeholder(tf.float32, [None, 10])
            self.mode = tf.placeholder(tf.bool)
            self.keep_prob = tf.placeholder(tf.float32)
            self.input_layer = tf.reshape(self.X, [-1, 28, 28])
            
            self.w_out = tf.Variable(tf.random_normal([num_hidden, num_classes]))
            self.b_out = tf.Variable(tf.random_normal([num_classes]))
            
            
            # Prepare data shape to match `rnn` function requirements
            # Current data input shape: (batch_size, timesteps, n_input)
            # Required shape: 'timesteps' tensors list of shape (batch_size, n_input)

            # Unstack to get a list of 'timesteps' tensors of shape (batch_size, n_input)
            self.x = tf.unstack(self.input_layer, timesteps, 1)
            rnn_cell = SimpleRNNCell(num_hidden)
            outputs, states = rnn.static_rnn(rnn_cell, self.x, dtype=tf.float32)
            self.logits = tf.matmul(outputs[-1], self.w_out) + self.b_out
            self.matrix = rnn_cell.matrix

            self.pred = tf.nn.softmax(self.logits)
            # Test model and check accuracy
            self.correct_prediction = tf.equal(tf.argmax(self.pred, 1), tf.argmax(self.Y, 1))
            self.accuracy = tf.reduce_mean(tf.cast(self.correct_prediction, tf.float32))

            tf.summary.scalar('accuracy', self.accuracy)
            # define cost/loss & optimizer
            self.cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=self.Y))
            self.global_step = tf.Variable(0, name='global_step', trainable=False)
            tf.summary.scalar('mean_loss', self.cost)
            self.merged = tf.summary.merge_all()

            # When using the batchnormalization layers,
            # it is necessary to manually add the update operations
            # because the moving averages are not included in the graph            
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            with tf.control_dependencies(update_ops):                     
                self.optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(self.cost, global_step=self.global_step)

nn = RNN()
# Best validation accuracy seen so far.
best_validation_accuracy = 0.0

# Iteration-number for last improvement to validation accuracy.
last_improvement = 0

# Stop optimization if no improvement found in this many iterations.
patience = 10

# Start session
sv = tf.train.Supervisor(graph=nn.graph,
                         logdir='logs_rnn/',
                         summary_op=None,
                         save_model_secs=0)

with sv.managed_session(config=tf.ConfigProto(device_count={'GPU':1})) as sess:
    for epoch in range(training_epochs):
        avg_cost = 0
        total_batch = int(len(train_data) / batch_size)
        if sv.should_stop(): break
        for i in range(total_batch):
            batch_xs, batch_ys = train_data[(i)*batch_size:(i+1)*batch_size], train_labels[(i)*batch_size:(i+1)*batch_size]
            feed_dict = {nn.X: batch_xs, nn.Y: batch_ys, nn.mode:True, nn.keep_prob:0.8}
            c, _ = sess.run([nn.cost, nn.optimizer], feed_dict=feed_dict)
            avg_cost += c / total_batch
            if i%50:
                
                sv.summary_computed(sess, sess.run(nn.merged, feed_dict))
                gs = sess.run(nn.global_step, feed_dict)
        
        print 'Epoch : ' + str(epoch) + ' Training Loss: ' + str(avg_cost)
        acc = sess.run(nn.accuracy, feed_dict={
                        nn.X: val_data, nn.Y: val_labels, nn.mode:False, nn.keep_prob:1.0})
        print 'Validation Accuracy: ' + str(acc)
        if acc > best_validation_accuracy:
            last_improvement = epoch
            best_validation_accuracy = acc
            sv.saver.save(sess, 'logs_rnn' + '/model_gs', global_step=gs)
        if epoch - last_improvement > patience:
            print("Early stopping ...")
            break


Extracting MNIST-data/train-images-idx3-ubyte.gz
Extracting MNIST-data/train-labels-idx1-ubyte.gz
Extracting MNIST-data/t10k-images-idx3-ubyte.gz
Extracting MNIST-data/t10k-labels-idx1-ubyte.gz
(49500, 784)
INFO:tensorflow:Starting standard services.
INFO:tensorflow:Starting queue runners.
INFO:tensorflow:global_step/sec: 0
Epoch : 0 Training Loss: 0.5921337350450379
Validation Accuracy: 0.88618183
Epoch : 1 Training Loss: 0.25245149212353146
Validation Accuracy: 0.9367273
Epoch : 2 Training Loss: 0.19688182293315137
Validation Accuracy: 0.9383636
Epoch : 3 Training Loss: 0.1628297660687957
Validation Accuracy: 0.95436364
Epoch : 4 Training Loss: 0.14439215468186317
Validation Accuracy: 0.9541818
Epoch : 5 Training Loss: 0.1326256215609986
Validation Accuracy: 0.954
Epoch : 6 Training Loss: 0.12180089174835652
Validation Accuracy: 0.9647273
Epoch : 7 Training Loss: 0.1150093343917921
Validation Accuracy: 0.9670909
Epoch : 8 Training Loss: 0.11105018466560526
Validation Accuracy: 0.9670

In [4]:
nn = RNN()
print("Graph loaded")
with nn.graph.as_default():
    sv = tf.train.Supervisor()
    with sv.managed_session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
        ## Restore parameters
        sv.saver.restore(sess, tf.train.latest_checkpoint('logs_rnn/'))
        print("Restored!")
        acc = sess.run(nn.accuracy, feed_dict={
              nn.X: test_data, nn.Y: test_labels, nn.mode:False, nn.keep_prob:1.0})
        print('Accuracy:', acc)

Graph loaded
INFO:tensorflow:Starting standard services.
INFO:tensorflow:Starting queue runners.
INFO:tensorflow:Restoring parameters from logs_rnn/model_gs-9405
Restored!
('Accuracy:', 0.9715)
