## Training a sequence classifier
Note: the book uses tensorflow.contrib.layers.fully_connected() rather than tf.layers.dense() (which did not exist when this chapter was written). It is now preferable to use tf.layers.dense(), because anything in the contrib module may change or be deleted without notice. The dense() function is almost identical to the fully_connected() function. The main differences relevant to this chapter are:

several parameters are renamed: scope becomes name, activation_fn becomes activation (and similarly the _fn suffix is removed from other parameters such as normalizer_fn), weights_initializer becomes kernel_initializer, etc.
the default activation is now None rather than tf.nn.relu.

In [2]:
from tensorflow_graph_in_jupyter import show_graph
import numpy as np
import tensorflow as tf

  from ._conv import register_converters as _register_converters


In [4]:
#reset_graph()

n_steps = 28
n_inputs = 28
n_neurons = 150
n_outputs = 10

learning_rate = 0.001

X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
y = tf.placeholder(tf.int32, [None])

basic_cell = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons)
outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32)

logits = tf.layers.dense(states, n_outputs)
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y,
                                                          logits=logits)
loss = tf.reduce_mean(xentropy)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(loss)
correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

init = tf.global_variables_initializer()

In [6]:
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
X_train = X_train.astype(np.float32).reshape(-1, 28*28) / 255.0
X_test = X_test.astype(np.float32).reshape(-1, 28*28) / 255.0
y_train = y_train.astype(np.int32)
y_test = y_test.astype(np.int32)
X_valid, X_train = X_train[:5000], X_train[5000:]
y_valid, y_train = y_train[:5000], y_train[5000:]

Downloading data from https://s3.amazonaws.com/img-datasets/mnist.npz


In [7]:
def shuffle_batch(X, y, batch_size):
    rnd_idx = np.random.permutation(len(X))
    n_batches = len(X) // batch_size
    for batch_idx in np.array_split(rnd_idx, n_batches):
        X_batch, y_batch = X[batch_idx], y[batch_idx]
        yield X_batch, y_batch

In [8]:
X_test = X_test.reshape((-1, n_steps, n_inputs))

In [9]:
n_epochs = 100
batch_size = 150

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            X_batch = X_batch.reshape((-1, n_steps, n_inputs))
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        acc_batch = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})
        print(epoch, "Last batch accuracy:", acc_batch, "Test accuracy:", acc_test)

0 Last batch accuracy: 0.94 Test accuracy: 0.931
1 Last batch accuracy: 0.97333336 Test accuracy: 0.9509
2 Last batch accuracy: 0.94666666 Test accuracy: 0.9509
3 Last batch accuracy: 0.96666664 Test accuracy: 0.9554
4 Last batch accuracy: 0.98 Test accuracy: 0.9642
5 Last batch accuracy: 0.9866667 Test accuracy: 0.9685
6 Last batch accuracy: 0.9533333 Test accuracy: 0.9643
7 Last batch accuracy: 0.9866667 Test accuracy: 0.9668
8 Last batch accuracy: 0.9866667 Test accuracy: 0.9689
9 Last batch accuracy: 0.99333334 Test accuracy: 0.9705
10 Last batch accuracy: 0.9866667 Test accuracy: 0.9636
11 Last batch accuracy: 0.98 Test accuracy: 0.9702
12 Last batch accuracy: 0.99333334 Test accuracy: 0.9717
13 Last batch accuracy: 0.99333334 Test accuracy: 0.9733
14 Last batch accuracy: 0.9866667 Test accuracy: 0.9728
15 Last batch accuracy: 0.9866667 Test accuracy: 0.9767
16 Last batch accuracy: 0.9866667 Test accuracy: 0.9768
17 Last batch accuracy: 0.98 Test accuracy: 0.9752
18 Last batch acc