In [3]:
import numpy as np
import tensorflow as tf 
import matplotlib.pyplot as plt 
import warnings

%matplotlib inline
warnings.filterwarnings(action='ignore')

### A simple RNN from scratch

In [64]:
tf.reset_default_graph()

n_inputs = 3
n_neurons = 5

X0 = tf.placeholder(tf.float32, shape=[None, n_inputs])
X1 = tf.placeholder(tf.float32, shape=[None, n_inputs])

Wx = tf.Variable(tf.random_normal(shape=[n_inputs, n_neurons], dtype=tf.float32))
Wy = tf.Variable(tf.random_normal(shape=[n_neurons, n_neurons], dtype=tf.float32))
b = tf.Variable(tf.zeros(shape=[1, n_neurons]), dtype=tf.float32)

Y0 = tf.tanh(tf.matmul(X0, Wx) + b)
Y1 = tf.tanh(tf.matmul(Y0, Wy) + tf.matmul(X1, Wx) + b)

init = tf.global_variables_initializer()

In [5]:
# Mini-Batch:       instance 0, instance 1,instance 2,instance 3

X0_batch = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 0, 1]]) # t = 0
X1_batch = np.array([[9, 8, 7], [0, 0, 0], [6, 5, 4], [3, 2, 1]]) # t = 1

In [6]:
with tf.Session() as sess:
    init.run()
    Y0_val, Y1_val = sess.run([Y0, Y1], feed_dict={X0: X0_batch, X1: X1_batch})

In [7]:
print(Y0_val) # output at t = 0

[[-0.3712633  -0.9422762   0.90898055  0.6768324  -0.9941537 ]
 [-0.9999776  -0.9999995   0.9757459   0.9993513  -0.99999785]
 [-1.         -1.          0.9936987   0.99999905 -1.        ]
 [-1.         -0.99999934 -0.40298423  0.9994849  -0.9994797 ]]


In [8]:
print(Y1_val) # output at t = 1

[[-1.         -1.          0.5364571   0.9999998  -1.        ]
 [ 0.17842388  0.996354   -0.19095483 -0.6406073  -0.54699767]
 [-1.         -0.9999976  -0.32098052  0.99986297 -0.99996984]
 [-0.9987995  -0.8203782  -0.9952709   0.9821737  -0.9584852 ]]


#### Static Unrolling Through Time

The static_rnn() function creates an unrolled RNN network by chaining cells. The
following code creates the exact same model as the previous one:

In [12]:
tf.reset_default_graph()

X0 = tf.placeholder(tf.float32, shape=[None, n_inputs])
X1 = tf.placeholder(tf.float32, shape=[None, n_inputs])

basic_cell = tf.keras.layers.SimpleRNNCell(units=n_neurons, activation='tanh')
output_seqs, states = tf.nn.static_rnn(cell=basic_cell, inputs=[X0, X1], dtype=tf.float32)

# When you are using
# basic cells, the final state is simply equal to the last output.

Y0, Y1 = output_seqs

init = tf.global_variables_initializer()

with tf.Session() as sess:
    init.run()
    Y0_val, Y1_val = sess.run([Y0, Y1], feed_dict={X0: X0_batch, X1: X1_batch})
    final_state = states.eval(feed_dict={X0: X0_batch, X1: X1_batch})

In [13]:
Y1_val

array([[ 0.94276506, -0.99656993, -0.9995738 ,  1.        ,  0.99961364],
       [-0.4096585 , -0.716821  ,  0.38663137,  0.8855186 ,  0.16710818],
       [ 0.9494632 , -0.9994471 , -0.9846877 ,  0.99999994,  0.97973204],
       [ 0.9914004 , -0.99097985, -0.65060073,  0.999642  , -0.50370914]],
      dtype=float32)

In [14]:
final_state

array([[ 0.94276506, -0.99656993, -0.9995738 ,  1.        ,  0.99961364],
       [-0.4096585 , -0.716821  ,  0.38663137,  0.8855186 ,  0.16710818],
       [ 0.9494632 , -0.9994471 , -0.9846877 ,  0.99999994,  0.97973204],
       [ 0.9914004 , -0.99097985, -0.65060073,  0.999642  , -0.50370914]],
      dtype=float32)

#### Packing Sequences

In [51]:
tf.reset_default_graph()

n_steps = 2
n_inputs = 3
n_neurons = 5

X = tf.placeholder(tf.float32, shape=[None, n_steps, n_inputs])

# Then we extract a Python list of tensors along the first dimension (i.e., one
# tensor per time step)
X_seqs = tf.unstack((tf.transpose(X, perm=[1, 0, 2])))  # Swap first dim and second dim, keep the last dim as it is 
basic_cell = tf.keras.layers.SimpleRNNCell(units=n_neurons, activation='tanh')
output_seqs, states = tf.nn.static_rnn(basic_cell, inputs=X_seqs, dtype=tf.float32)

outputs = tf.transpose(tf.stack(output_seqs), perm=[1, 0, 2])

init = tf.global_variables_initializer()

In [52]:
# Now we can run the network by feeding it a single tensor that contains all the minibatch
# sequences:

In [95]:
X_batch = np.array([# t = 0 t = 1
                    [[0, 1, 2], [9, 8, 7]], # instance 0
                    [[3, 4, 5], [0, 0, 0]], # instance 1
                    [[6, 7, 8], [6, 5, 4]], # instance 2
                    [[9, 0, 1], [3, 2, 1]]]) # instance 3
X_batch

array([[[0, 1, 2],
        [9, 8, 7]],

       [[3, 4, 5],
        [0, 0, 0]],

       [[6, 7, 8],
        [6, 5, 4]],

       [[9, 0, 1],
        [3, 2, 1]]])

In [55]:
with tf.Session() as sess:
    init.run()
    outputs_val = outputs.eval(feed_dict={X: X_batch})

#### Dynamic Unrolling Through Time

In [66]:
tf.reset_default_graph()

n_steps = 2
n_inputs = 3
n_neurons = 5

X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
basic_cell = tf.keras.layers.SimpleRNNCell(units=n_neurons)
outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    init.run()
    outputs_val = outputs.eval(feed_dict={X: X_batch})

#### Handling Variable Length Input Sequences

<em>So far we have used only fixed-size input sequences (all exactly two steps long). What
if the input sequences have variable lengths (e.g., like sentences)? In this case you
should set the sequence_length parameter when calling the dynamic_rnn() (or
static_rnn()) function; it must be a 1D tensor indicating the length of the input
sequence for each instance

In [78]:
tf.reset_default_graph()

n_steps = 2
n_inputs = 3
n_neurons = 5

X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
basic_cell = tf.keras.layers.SimpleRNNCell(units=n_neurons)

seq_length = tf.placeholder(tf.int32, shape=[None])
seq_length = tf.placeholder(tf.int32, [None])
outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32, sequence_length=seq_length)

init = tf.global_variables_initializer()

                     # step 0     step 1
X_batch = np.array([[[0, 1, 2], [9, 8, 7]], # instance 0
                    [[3, 4, 5], [0, 0, 0]], # instance 1 (padded with a zero vector)
                    [[6, 7, 8], [6, 5, 4]], # instance 2
                    [[9, 0, 1], [3, 2, 1]]]) # instance 3

seq_length_batch = [2, 1, 2, 2]

with tf.Session() as sess:
    init.run()
    outputs_val, states_val = sess.run([outputs, states], feed_dict={X: X_batch, seq_length: seq_length_batch})

In [79]:
print(outputs_val)

[[[-0.6682938  -0.98525363 -0.9424816   0.6381121   0.94908243]
  [-0.99350595 -1.         -0.9715961  -0.99555904  1.        ]]

 [[-0.9256099  -0.99999744 -0.9912698  -0.2731056   0.99999714]
  [ 0.          0.          0.          0.          0.        ]]

 [[-0.98509985 -1.         -0.9987025  -0.8656395   1.        ]
  [-0.95136887 -0.9999959  -0.797005   -0.91468775  1.        ]]

 [[ 0.9542289   0.69114906  0.9999984  -0.99987644  0.9999656 ]
  [ 0.80366445 -0.88931924  0.8793504  -0.97636855  0.9998091 ]]]


In [80]:
print(states_val)  # The states tensor contains the final state of each cell (excluding the zero vectors)

[[-0.99350595 -1.         -0.9715961  -0.99555904  1.        ]
 [-0.9256099  -0.99999744 -0.9912698  -0.2731056   0.99999714]
 [-0.95136887 -0.9999959  -0.797005   -0.91468775  1.        ]
 [ 0.80366445 -0.88931924  0.8793504  -0.97636855  0.9998091 ]]


### Training a sequence classifer 

<em>
- We will treat each image as a sequence of
 28 rows of 28 pixels each (since each MNIST image is 28 × 28 pixels).<br>
- We are creating a RNN with 150 neurons and we will unroll it 28 times steps since each training instance will be 28 inputs long
- Each input will also contain 28 features 

<em>
fully connected layer is connected to the states tensor,
which contains only the final state of the RNN (i.e., the 28th output).

In [128]:
tf.reset_default_graph()

n_steps = 28 
n_inputs = 28
n_neurons = 150
n_outputs = 10

learning_rate = 0.001 

X = tf.placeholder(tf.float32, shape=[None, n_steps, n_inputs])
y = tf.placeholder(tf.int32, shape=[None]) # placeholder for the target classes

basic_cell = tf.keras.layers.SimpleRNNCell(units=n_neurons)
outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32)
logits = tf.layers.dense(inputs=states, units=n_outputs)

xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
loss = tf.reduce_mean(xentropy)

optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(loss)

correct = tf.nn.in_top_k(logits, targets=y, k=1)
accuracy = tf.reduce_mean(tf.cast(correct, dtype=tf.float32))

init = tf.global_variables_initializer()

In [129]:
# Loading and prepairing the data

from scipy.io import loadmat

mnist_org = loadmat('mnist-original.mat')
data = mnist_org['data'].T
targets = mnist_org['label'].T
X_train, X_test, y_train, y_test = data[:60000], data[60000:], targets[:60000], targets[60000:]

shuffle_index = np.random.permutation(60000)
X_train, y_train = X_train[shuffle_index], y_train[shuffle_index]
y_train, y_test = y_train.ravel(), y_test.ravel()

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)

# Reshape the test data first 
X_test = X_test.reshape((-1, n_steps, n_inputs))

In [130]:
# Next batch function to quickly extract mini-batches 

def next_batch(size, X, y):
    index = np.random.randint(X.shape[0], size=size)
    X_batch = X[index]
    y_batch = y[index]
    return X_batch, y_batch

In [131]:
n_epochs = 10
batch_size = 150 
n_batches = len(X_train) // batch_size

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for iteration in range(n_batches):
            X_batch, y_batch = next_batch(batch_size, X_train, y_train)
            X_batch = X_batch.reshape((-1, n_steps, n_inputs))
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})
        print(epoch, "Train accuracy:", acc_train, "Test accuracy:", acc_test)

0 Train accuracy: 0.9266667 Test accuracy: 0.9269
1 Train accuracy: 0.98 Test accuracy: 0.94
2 Train accuracy: 0.94 Test accuracy: 0.9379
3 Train accuracy: 0.98 Test accuracy: 0.9597
4 Train accuracy: 0.9866667 Test accuracy: 0.9583
5 Train accuracy: 0.9866667 Test accuracy: 0.9632
6 Train accuracy: 0.97333336 Test accuracy: 0.9615
7 Train accuracy: 0.9533333 Test accuracy: 0.9615
8 Train accuracy: 0.98 Test accuracy: 0.9662
9 Train accuracy: 0.9866667 Test accuracy: 0.9677


## Training to Predict Time Series

In [6]:
t_min, t_max = 0, 30
resolution = 0.1
n_steps = 20

def time_series(t):
    return t * np.sin(t) / 3 + 2 * np.sin(t*5)

def next_batch(batch_size, n_steps):
    t0 = np.random.rand(batch_size, 1) * (t_max - t_min - n_steps * resolution)
    Ts = t0 + np.arange(0., n_steps + 1) * resolution
    ys = time_series(Ts)
    return ys[:, :-1].reshape(-1, n_steps, 1), ys[:, 1:].reshape(-1, n_steps, 1)

In [7]:
X_batch, y_batch = next_batch(50, n_steps)
np.c_[X_batch[44], y_batch[44]] ## Input and output sequence 

array([[ 4.75580159,  5.19908438],
       [ 5.19908438,  6.05928172],
       [ 6.05928172,  7.23891075],
       [ 7.23891075,  8.54501591],
       [ 8.54501591,  9.73531271],
       [ 9.73531271, 10.57658639],
       [10.57658639, 10.90105802],
       [10.90105802, 10.64692655],
       [10.64692655,  9.87316753],
       [ 9.87316753,  8.74497027],
       [ 8.74497027,  7.49338151],
       [ 7.49338151,  6.35903719],
       [ 6.35903719,  5.53375692],
       [ 5.53375692,  5.11429758],
       [ 5.11429758,  5.07958273],
       [ 5.07958273,  5.29697516],
       [ 5.29697516,  5.55604697],
       [ 5.55604697,  5.62156717],
       [ 5.62156717,  5.29272014],
       [ 5.29272014,  4.45404068]])

In [8]:
y_batch.shape

(50, 20, 1)

### Using an OuputProjectionWrapper

Let's create the RNN. It will contain 100 recurrent neurons and we will unroll it over 20 time steps since each training instance will be 20 inputs long. Each input will contain only one feature (the value at that time). The targets are also sequences of 20 inputs, each containing a single value:

In [12]:
tf.reset_default_graph()

n_steps = 20
n_inputs = 1
n_neurons = 100
n_outputs = 1

X = tf.placeholder(tf.float32, shape=[None, n_steps, n_inputs])
y = tf.placeholder(tf.float32, shape=[None, n_steps, n_outputs])

- At each time step we now have an output vector of size 100. But what we actually
want is a single output value at each time step. The simplest solution is to wrap the
cell in an OutputProjectionWrapper

- The OutputProjectionWrapper adds a fully connected layer of linear neurons (i.e., without
any activation function) on top of each output (but it does not affect the cell state).

- All these fully connected layers share the same (trainable) weights and bias terms

In [13]:
cell = tf.contrib.rnn.OutputProjectionWrapper(
    tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons, activation=tf.nn.relu),
    output_size=n_outputs)

outputs, states = tf.nn.dynamic_rnn(cell, inputs=X, dtype=tf.float32)

In [14]:
learning_rate = 0.001

loss = tf.reduce_mean(tf.square(outputs - y))

unexplained_error = tf.reduce_sum(tf.square(y - outputs))
total_error = tf.reduce_sum(tf.square(y - tf.reduce_mean(y, axis=0)))
R2 = 1. - tf.div(unexplained_error, total_error)

optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(loss)

init = tf.global_variables_initializer()
saver = tf.train.Saver()

In [15]:
n_iterations = 1500
batch_size = 50 

with tf.Session() as sess:
    init.run()
    for iteration in range(n_iterations):
        X_batch, y_batch = next_batch(batch_size, n_steps)
        sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        if iteration % 100 == 0:
            mse = loss.eval(feed_dict={X: X_batch, y: y_batch})
            r2 = R2.eval(feed_dict={X: X_batch, y: y_batch})
            print('Iteration', iteration, '\tMSE:', mse, '\tR2', r2)
    saver.save(sess, "./my_time_series_model") 

Iteration 0 	MSE: 18.77428 	R2 -0.12749219
Iteration 100 	MSE: 0.7097012 	R2 0.9601144
Iteration 200 	MSE: 0.31224054 	R2 0.982063
Iteration 300 	MSE: 0.1098454 	R2 0.99461997
Iteration 400 	MSE: 0.06952478 	R2 0.9959823
Iteration 500 	MSE: 0.06293467 	R2 0.9963066
Iteration 600 	MSE: 0.06294356 	R2 0.9968455
Iteration 700 	MSE: 0.05105515 	R2 0.9975703
Iteration 800 	MSE: 0.048389606 	R2 0.99766624
Iteration 900 	MSE: 0.065213755 	R2 0.9966278
Iteration 1000 	MSE: 0.053504776 	R2 0.9972218
Iteration 1100 	MSE: 0.04769876 	R2 0.9970911
Iteration 1200 	MSE: 0.044577383 	R2 0.9972022
Iteration 1300 	MSE: 0.050732225 	R2 0.9971172
Iteration 1400 	MSE: 0.038553927 	R2 0.99788034


In [16]:
with tf.Session() as sess:                          # not shown in the book
    saver.restore(sess, "./my_time_series_model")   # not shown

    X_new = time_series(np.array(t_instance[:-1].reshape(-1, n_steps, n_inputs)))
    y_pred = sess.run(outputs, feed_dict={X: X_new})

Instructions for updating:
Use standard file APIs to check for files with this prefix.
INFO:tensorflow:Restoring parameters from ./my_time_series_model


NameError: name 't_instance' is not defined