# Recurrent Neural Networks Basics #

In this tutorial, you will explore the basic concepts behind Recurrent Neural Networks and reproduce two of Jürgen Schmidhubers classic experiments.

In [None]:
import os
import numpy as np
import tensorflow as tf

# don't use GPU, you can change this
os.environ["CUDA_VISIBLE_DEVICES"]=""

## Experiment 1: Noise-Free and Noisy Sequences ##

In [None]:
def gen_sequence(time_steps=10, prob=0.5):
    
    num_steps = time_steps + 1
    num_symbols = time_steps + 1
    
    sequence = np.zeros((num_steps, num_symbols))
    
    for i in range(time_steps - 1):
            sequence[i + 1][i] = 1
    
    rand = np.random.uniform(0, 1)
    
    if rand < prob:
        # seq A - (y, a(1), a(2), ..., a(p-1), y)
        sequence[0][time_steps - 1] = 1
        sequence[num_steps - 1][time_steps - 1] = 1
    else:
        # seq B - (x, a(1), a(2), ..., a(p-1), x)
        sequence[0][time_steps] = 1
        sequence[num_steps - 1][time_steps] = 1
        
    return sequence

In [None]:
def simple_RNN(sequence, hidden_units=20):

    num_steps = sequence.shape[0].value
    num_symbols = sequence.shape[1].value
    xav_init = tf.contrib.layers.xavier_initializer

    # initialize variables 
    W = tf.get_variable("W", shape=[num_symbols, hidden_units], initializer=xav_init())
    U = tf.get_variable("U", shape=[hidden_units, hidden_units], initializer=xav_init())
    V = tf.get_variable("V", shape=[hidden_units, num_symbols], initializer=xav_init())
        
    b1 = tf.get_variable("b1", shape=[hidden_units], initializer=tf.constant_initializer(0.))
    b2 = tf.get_variable("b2", shape=[num_symbols], initializer=tf.constant_initializer(0.))
    
    # define function that will be performed each step
    def step(input_tensor, hidden_state):
        
        new_hidden_state = tf.tanh(tf.matmul(input_tensor, W) + tf.matmul(hidden_state, U) + b1)
        logits = tf.matmul(new_hidden_state, V) + b2
        
        return new_hidden_state, logits
    
    # unroll the RNN for N time steps
    hidden_state = tf.zeros([1, hidden_units], dtype=tf.float32)
    
    logits_list = []
    
    for step_idx in range(num_steps):
        
        hidden_state, logits = step(sequence[step_idx:step_idx+1], hidden_state)
        logits_list.append(logits)
        
    return logits_list

In [None]:
def LSTM(sequence, hidden_units=20):
    
    num_steps = sequence.shape[0].value
    num_symbols = sequence.shape[1].value
    xav_init = tf.contrib.layers.xavier_initializer
    
    # parameters
    W = tf.get_variable("W", shape=[4, hidden_units, hidden_units], initializer=xav_init())
    U = tf.get_variable("U", shape=[4, num_symbols, hidden_units], initializer=xav_init())
    V = tf.get_variable("V", shape=[hidden_units, num_symbols], initializer=xav_init())
    
    bi = tf.get_variable("bi", shape=[hidden_units], initializer=tf.constant_initializer(0.))
    bf = tf.get_variable("bf", shape=[hidden_units], initializer=tf.constant_initializer(0.))
    bo = tf.get_variable("bo", shape=[hidden_units], initializer=tf.constant_initializer(0.))
    bc = tf.get_variable("bc", shape=[hidden_units], initializer=tf.constant_initializer(0.))
    bl = tf.get_variable("bl", shape=[num_symbols], initializer=tf.constant_initializer(0.))
        
    def step(input_tensor, hidden_state):
        # gather previous internal state and output state
        state, cell = tf.unstack(hidden_state)

        # gates
        input_gate = tf.sigmoid(tf.matmul(input_tensor, U[0]) + tf.matmul(state, W[0]) + bi)
        forget_gate = tf.sigmoid(tf.matmul(input_tensor, U[1]) + tf.matmul(state, W[1]) + bf)
        output_gate = tf.sigmoid(tf.matmul(input_tensor, U[2]) + tf.matmul(state, W[2]) + bo)
        gate_weights = tf.tanh(tf.matmul(input_tensor, U[3]) + tf.matmul(state, W[3]) + bc)
        
        # new internal cell state
        cell = cell * forget_gate + gate_weights * input_gate

        # output state
        state = tf.tanh(cell) * output_gate
                    
        # logits
        logits = tf.matmul(state, V) + bl
        
        return tf.stack([state, cell]), logits
    
    # unroll LSTM for N time steps
    hidden_state = tf.zeros([2, 1, hidden_units], dtype=tf.float32)
    
    logits_list = []
    
    for step_idx in range(num_steps):
        
        hidden_state, logits = step(sequence[step_idx:step_idx+1], hidden_state)
        logits_list.append(logits)
        
    return logits_list

In [None]:
def multi_layer_RNN(sequence, net_builder, hidden_units=10, layers=3):
    
    for layer_idx in range(layers):
        
        with tf.variable_scope("layer{}".format(layer_idx + 1)):
            sequence = net_builder(sequence, hidden_units=hidden_units)
            
            if layer_idx < layers - 1:
                sequence = tf.stack(sequence)[:, 0, :]

    return sequence

In [None]:
def RNN_loss(sequence, logits_list):
    
    # create loss
    loss = 0
    num_steps = sequence.shape[0].value - 1
    
    for step_idx in range(num_steps):
        
        step_loss = tf.nn.softmax_cross_entropy_with_logits(
            labels=sequence[step_idx + 1:step_idx + 2], logits=logits_list[step_idx])
        step_loss = tf.reduce_mean(step_loss)
        
        loss += step_loss
        
    loss /= num_steps
    
    return loss
    
def RNN_abs_error(sequence, logits_list,):
    
    # calculate absolute error
    abs_errors = []
    num_steps = sequence.shape[0].value - 1
    
    for step_idx in range(num_steps):
        
        prediction = tf.nn.softmax(logits_list[step_idx])
        tmp_error = tf.reduce_max(tf.abs(prediction - sequence[step_idx + 1]))
        
        abs_errors.append(tmp_error)
        
    abs_error = tf.reduce_max(abs_errors)
    
    return abs_error
            
def RNN_train(loss, learning_rate=1, grad_max_value=5):
    # create training op
    opt = tf.train.GradientDescentOptimizer(learning_rate)
    grads = opt.compute_gradients(loss)
    
    if grad_max_value is not None:
        clipped_grads = [(tf.clip_by_value(grad, -grad_max_value, grad_max_value), var) 
                         for grad, var in grads]
    else:
        clipped_grads = grads
    
    train_op = opt.apply_gradients(clipped_grads)
        
    return train_op

In [None]:
def run_experiment_1(sequence_pl, abs_error, train_op, max_training_steps=100000, 
                     stop_threshold=0.25, threshold_steps=1000):
    with tf.Session() as session:

        session.run(tf.global_variables_initializer())

        last_step_above_threshold = 0

        # training and evaluation
        success = False
        
        for train_step_idx in range(max_training_steps):
            seq = gen_sequence(time_steps=sequence_pl.shape[0].value - 1)
            abs_error_val, _ = session.run([abs_error, train_op], feed_dict={
                sequence_pl: seq
            })
            #print("step:", train_step_idx)
            #print("err", abs_error_val)
            #print()

            if abs_error_val > stop_threshold:
                last_step_above_threshold = train_step_idx

            if train_step_idx - last_step_above_threshold >= threshold_steps:
                success = True
                break

        # report
        if success:
            return train_step_idx
        else:
            return None

In [None]:
time_step_trials = [4, 5, 6, 7, 8, 9, 10, 20, 50, 100]

num_trials = 10

RNN_results = {}
LSTM_results = {}

for time_steps in time_step_trials:
    
    for trial_idx in range(num_trials):
        
        # RNN
        tf.reset_default_graph()
        sequence_pl = tf.placeholder(shape=[time_steps, time_steps], dtype=tf.float32)

        logits_list = simple_RNN(sequence_pl)
        loss = RNN_loss(sequence_pl, logits_list)
        abs_error = RNN_abs_error(sequence_pl, logits_list)
        train_op = RNN_train(loss)

        res = run_experiment_1(sequence_pl, abs_error, train_op)
        
        if trial_idx == 0:
            RNN_results[time_steps] = {trial_idx: res}
        else:
            RNN_results[time_steps][trial_idx] = res
        
        # LSTM
        tf.reset_default_graph()
        sequence_pl = tf.placeholder(shape=[time_steps, time_steps], dtype=tf.float32)

        logits_list = LSTM(sequence_pl)
        loss = RNN_loss(sequence_pl, logits_list)
        abs_error = RNN_abs_error(sequence_pl, logits_list)
        train_op = RNN_train(loss)

        res = run_experiment_1(sequence_pl, abs_error, train_op)
        
        if trial_idx == 0:
            LSTM_results[time_steps] = {trial_idx: res}
        else:
            LSTM_results[time_steps][trial_idx] = res
            
    print("RNN results")
    print(RNN_results)
    print("LSTM results")
    print(LSTM_results)

results:

RNN results
{4: {0: 1014, 1: 1006, 2: 1018, 3: 1005, 4: 1023, 5: 1023, 6: 1010, 7: 1012, 8: 1015, 9: 1009}}
LSTM results
{4: {0: 1091, 1: 1103, 2: 1120, 3: 1104, 4: 1102, 5: 1083, 6: 1109, 7: 1109, 8: 1091, 9: 1107}}
RNN results
{4: {0: 1014, 1: 1006, 2: 1018, 3: 1005, 4: 1023, 5: 1023, 6: 1010, 7: 1012, 8: 1015, 9: 1009}, 5: {0: 1021, 1: 1045, 2: 1024, 3: 1061, 4: 1016, 5: 1035, 6: 1029, 7: 1056, 8: 1024, 9: 1030}}
LSTM results
{4: {0: 1091, 1: 1103, 2: 1120, 3: 1104, 4: 1102, 5: 1083, 6: 1109, 7: 1109, 8: 1091, 9: 1107}, 5: {0: 1193, 1: 1240, 2: 1204, 3: 1165, 4: 1177, 5: 1296, 6: 1192, 7: 1238, 8: 1224, 9: 1294}}
RNN results
{4: {0: 1014, 1: 1006, 2: 1018, 3: 1005, 4: 1023, 5: 1023, 6: 1010, 7: 1012, 8: 1015, 9: 1009}, 5: {0: 1021, 1: 1045, 2: 1024, 3: 1061, 4: 1016, 5: 1035, 6: 1029, 7: 1056, 8: 1024, 9: 1030}, 6: {0: 1051, 1: 1054, 2: 1125, 3: 1048, 4: 1029, 5: 1050, 6: 1122, 7: 1177, 8: 1066, 9: 1065}}
LSTM results
{4: {0: 1091, 1: 1103, 2: 1120, 3: 1104, 4: 1102, 5: 1083, 6: 1109, 7: 1109, 8: 1091, 9: 1107}, 5: {0: 1193, 1: 1240, 2: 1204, 3: 1165, 4: 1177, 5: 1296, 6: 1192, 7: 1238, 8: 1224, 9: 1294}, 6: {0: 1727, 1: 1438, 2: 1547, 3: 1584, 4: 1430, 5: 1447, 6: 1511, 7: 1437, 8: 1371, 9: 1430}}
RNN results
{4: {0: 1014, 1: 1006, 2: 1018, 3: 1005, 4: 1023, 5: 1023, 6: 1010, 7: 1012, 8: 1015, 9: 1009}, 5: {0: 1021, 1: 1045, 2: 1024, 3: 1061, 4: 1016, 5: 1035, 6: 1029, 7: 1056, 8: 1024, 9: 1030}, 6: {0: 1051, 1: 1054, 2: 1125, 3: 1048, 4: 1029, 5: 1050, 6: 1122, 7: 1177, 8: 1066, 9: 1065}, 7: {0: 1088, 1: 1064, 2: 1279, 3: 1147, 4: 1531, 5: 1399, 6: 1311, 7: 1169, 8: 1207, 9: 1098}}
LSTM results
{4: {0: 1091, 1: 1103, 2: 1120, 3: 1104, 4: 1102, 5: 1083, 6: 1109, 7: 1109, 8: 1091, 9: 1107}, 5: {0: 1193, 1: 1240, 2: 1204, 3: 1165, 4: 1177, 5: 1296, 6: 1192, 7: 1238, 8: 1224, 9: 1294}, 6: {0: 1727, 1: 1438, 2: 1547, 3: 1584, 4: 1430, 5: 1447, 6: 1511, 7: 1437, 8: 1371, 9: 1430}, 7: {0: 2181, 1: 2869, 2: 2245, 3: 1725, 4: 3301, 5: 2454, 6: 1883, 7: 3979, 8: 3061, 9: 2885}}
RNN results
{8: {0: 2834, 1: 1525, 2: 1589, 3: 1231, 4: 1804, 5: 3591, 6: 1208, 7: 1446, 8: 1200, 9: 1206}, 4: {0: 1014, 1: 1006, 2: 1018, 3: 1005, 4: 1023, 5: 1023, 6: 1010, 7: 1012, 8: 1015, 9: 1009}, 5: {0: 1021, 1: 1045, 2: 1024, 3: 1061, 4: 1016, 5: 1035, 6: 1029, 7: 1056, 8: 1024, 9: 1030}, 6: {0: 1051, 1: 1054, 2: 1125, 3: 1048, 4: 1029, 5: 1050, 6: 1122, 7: 1177, 8: 1066, 9: 1065}, 7: {0: 1088, 1: 1064, 2: 1279, 3: 1147, 4: 1531, 5: 1399, 6: 1311, 7: 1169, 8: 1207, 9: 1098}}
LSTM results
{8: {0: 2850, 1: 10448, 2: 4351, 3: 4833, 4: 5563, 5: 3949, 6: 4135, 7: 3915, 8: 12816, 9: 4049}, 4: {0: 1091, 1: 1103, 2: 1120, 3: 1104, 4: 1102, 5: 1083, 6: 1109, 7: 1109, 8: 1091, 9: 1107}, 5: {0: 1193, 1: 1240, 2: 1204, 3: 1165, 4: 1177, 5: 1296, 6: 1192, 7: 1238, 8: 1224, 9: 1294}, 6: {0: 1727, 1: 1438, 2: 1547, 3: 1584, 4: 1430, 5: 1447, 6: 1511, 7: 1437, 8: 1371, 9: 1430}, 7: {0: 2181, 1: 2869, 2: 2245, 3: 1725, 4: 3301, 5: 2454, 6: 1883, 7: 3979, 8: 3061, 9: 2885}}
RNN results
{4: {0: 1014, 1: 1006, 2: 1018, 3: 1005, 4: 1023, 5: 1023, 6: 1010, 7: 1012, 8: 1015, 9: 1009}, 5: {0: 1021, 1: 1045, 2: 1024, 3: 1061, 4: 1016, 5: 1035, 6: 1029, 7: 1056, 8: 1024, 9: 1030}, 6: {0: 1051, 1: 1054, 2: 1125, 3: 1048, 4: 1029, 5: 1050, 6: 1122, 7: 1177, 8: 1066, 9: 1065}, 7: {0: 1088, 1: 1064, 2: 1279, 3: 1147, 4: 1531, 5: 1399, 6: 1311, 7: 1169, 8: 1207, 9: 1098}, 8: {0: 2834, 1: 1525, 2: 1589, 3: 1231, 4: 1804, 5: 3591, 6: 1208, 7: 1446, 8: 1200, 9: 1206}, 9: {0: 1128, 1: 7752, 2: 1383, 3: None, 4: 7224, 5: None, 6: 6790, 7: 16860, 8: 9663, 9: 3423}}
LSTM results
{4: {0: 1091, 1: 1103, 2: 1120, 3: 1104, 4: 1102, 5: 1083, 6: 1109, 7: 1109, 8: 1091, 9: 1107}, 5: {0: 1193, 1: 1240, 2: 1204, 3: 1165, 4: 1177, 5: 1296, 6: 1192, 7: 1238, 8: 1224, 9: 1294}, 6: {0: 1727, 1: 1438, 2: 1547, 3: 1584, 4: 1430, 5: 1447, 6: 1511, 7: 1437, 8: 1371, 9: 1430}, 7: {0: 2181, 1: 2869, 2: 2245, 3: 1725, 4: 3301, 5: 2454, 6: 1883, 7: 3979, 8: 3061, 9: 2885}, 8: {0: 2850, 1: 10448, 2: 4351, 3: 4833, 4: 5563, 5: 3949, 6: 4135, 7: 3915, 8: 12816, 9: 4049}, 9: {0: 7307, 1: 9525, 2: 12096, 3: 11257, 4: 44083, 5: 26061, 6: 14560, 7: 6564, 8: 13284, 9: 9148}}
RNN results
{4: {0: 1014, 1: 1006, 2: 1018, 3: 1005, 4: 1023, 5: 1023, 6: 1010, 7: 1012, 8: 1015, 9: 1009}, 5: {0: 1021, 1: 1045, 2: 1024, 3: 1061, 4: 1016, 5: 1035, 6: 1029, 7: 1056, 8: 1024, 9: 1030}, 6: {0: 1051, 1: 1054, 2: 1125, 3: 1048, 4: 1029, 5: 1050, 6: 1122, 7: 1177, 8: 1066, 9: 1065}, 7: {0: 1088, 1: 1064, 2: 1279, 3: 1147, 4: 1531, 5: 1399, 6: 1311, 7: 1169, 8: 1207, 9: 1098}, 8: {0: 2834, 1: 1525, 2: 1589, 3: 1231, 4: 1804, 5: 3591, 6: 1208, 7: 1446, 8: 1200, 9: 1206}, 9: {0: 1128, 1: 7752, 2: 1383, 3: None, 4: 7224, 5: None, 6: 6790, 7: 16860, 8: 9663, 9: 3423}, 10: {0: 1685, 1: 1100, 2: 1957, 3: None, 4: None, 5: 4377, 6: None, 7: 19026, 8: 6897, 9: 2613}}
LSTM results
{4: {0: 1091, 1: 1103, 2: 1120, 3: 1104, 4: 1102, 5: 1083, 6: 1109, 7: 1109, 8: 1091, 9: 1107}, 5: {0: 1193, 1: 1240, 2: 1204, 3: 1165, 4: 1177, 5: 1296, 6: 1192, 7: 1238, 8: 1224, 9: 1294}, 6: {0: 1727, 1: 1438, 2: 1547, 3: 1584, 4: 1430, 5: 1447, 6: 1511, 7: 1437, 8: 1371, 9: 1430}, 7: {0: 2181, 1: 2869, 2: 2245, 3: 1725, 4: 3301, 5: 2454, 6: 1883, 7: 3979, 8: 3061, 9: 2885}, 8: {0: 2850, 1: 10448, 2: 4351, 3: 4833, 4: 5563, 5: 3949, 6: 4135, 7: 3915, 8: 12816, 9: 4049}, 9: {0: 7307, 1: 9525, 2: 12096, 3: 11257, 4: 44083, 5: 26061, 6: 14560, 7: 6564, 8: 13284, 9: 9148}, 10: {0: 20526, 1: 6059, 2: 3756, 3: 12046, 4: 14351, 5: 55894, 6: 13177, 7: 7303, 8: 27809, 9: 11988}}
RNN results
{4: {0: 1014, 1: 1006, 2: 1018, 3: 1005, 4: 1023, 5: 1023, 6: 1010, 7: 1012, 8: 1015, 9: 1009}, 5: {0: 1021, 1: 1045, 2: 1024, 3: 1061, 4: 1016, 5: 1035, 6: 1029, 7: 1056, 8: 1024, 9: 1030}, 6: {0: 1051, 1: 1054, 2: 1125, 3: 1048, 4: 1029, 5: 1050, 6: 1122, 7: 1177, 8: 1066, 9: 1065}, 7: {0: 1088, 1: 1064, 2: 1279, 3: 1147, 4: 1531, 5: 1399, 6: 1311, 7: 1169, 8: 1207, 9: 1098}, 8: {0: 2834, 1: 1525, 2: 1589, 3: 1231, 4: 1804, 5: 3591, 6: 1208, 7: 1446, 8: 1200, 9: 1206}, 9: {0: 1128, 1: 7752, 2: 1383, 3: None, 4: 7224, 5: None, 6: 6790, 7: 16860, 8: 9663, 9: 3423}, 10: {0: 1685, 1: 1100, 2: 1957, 3: None, 4: None, 5: 4377, 6: None, 7: 19026, 8: 6897, 9: 2613}, 20: {0: None, 1: None, 2: None, 3: None, 4: None, 5: None, 6: None, 7: None, 8: None, 9: None}}
LSTM results
{4: {0: 1091, 1: 1103, 2: 1120, 3: 1104, 4: 1102, 5: 1083, 6: 1109, 7: 1109, 8: 1091, 9: 1107}, 5: {0: 1193, 1: 1240, 2: 1204, 3: 1165, 4: 1177, 5: 1296, 6: 1192, 7: 1238, 8: 1224, 9: 1294}, 6: {0: 1727, 1: 1438, 2: 1547, 3: 1584, 4: 1430, 5: 1447, 6: 1511, 7: 1437, 8: 1371, 9: 1430}, 7: {0: 2181, 1: 2869, 2: 2245, 3: 1725, 4: 3301, 5: 2454, 6: 1883, 7: 3979, 8: 3061, 9: 2885}, 8: {0: 2850, 1: 10448, 2: 4351, 3: 4833, 4: 5563, 5: 3949, 6: 4135, 7: 3915, 8: 12816, 9: 4049}, 9: {0: 7307, 1: 9525, 2: 12096, 3: 11257, 4: 44083, 5: 26061, 6: 14560, 7: 6564, 8: 13284, 9: 9148}, 10: {0: 20526, 1: 6059, 2: 3756, 3: 12046, 4: 14351, 5: 55894, 6: 13177, 7: 7303, 8: 27809, 9: 11988}, 20: {0: None, 1: None, 2: None, 3: None, 4: None, 5: None, 6: None, 7: None, 8: None, 9: None}}

better LSTM implementation