# Lecture 4 notebook (Recurrent Neural Networks Example 1: Bit-sequence)
## Introduction to TensorFlow and Deep Learning

## IADS Summer School 2023

### Dr Michael Fairbank, University of Essex, UK

- Email: m.fairbank@essex.ac.uk
- This is a Jupyter Notebook to accompany Lecture 4 of the course



## Build a time-sequence test problem

- We will simply generate a random sequence of bits
- The task for the RNN is simply to memorise these bits after a given delay length.
- The longer the delay length, the harder the problem.
- E.g. if the input sequence is 1,0,1,1 and the delay length is 2, then the RNN must perform the following:
    - time step 0: input =1, output = None
    - time step 1: input =0, output = None
    - time step 2: input =1, output = 1
    - time step 3: input =1, output = 0


In [None]:
# RNN Demo for tensorflow v2.x
# Bit-sequence memorization problem
# Shows how we can build a RNN using keras to memorise a sequence of bits
# Michael Fairbank July 2019

import tensorflow as tf
import tensorflow.keras as keras
import numpy as np


trainingSetSize=200
delayLength=5 # The longer this is, the more challenging the problem is
seqLength=delayLength+50
testSetSize=100
useXor=False # Set this to true to make the problem significantly harder.

def calculateInputOutputSequences(seqLength, batchSize, delayLength, useXor, seed):
    np.random.seed(seed)
    train_input_sequence=np.random.randint(2, size=(batchSize, seqLength,1)) # a random sequence of bits (0 or 1)
    # The objective of this problem is for the RNN to memorize all of the bits shown to the RNN
    train_output_targets=train_input_sequence[:,0:seqLength-delayLength,:]  # removes the last "delayLength" bits from the bit sequence (since these cannot possibly be remembered)
    if useXor:
        # for an extra twist of difficulty, we can xor the bit sequence at time t with the bit sequence at time t-delayLength
        a=train_output_targets
        b=train_input_sequence[:,delayLength:seqLength,:]
        train_output_targets=np.bitwise_xor(a,b)
    return [train_input_sequence.astype(np.float32),train_output_targets.astype(np.float32)]

[train_input_sequence,train_output_targets]=calculateInputOutputSequences(seqLength, trainingSetSize, delayLength, useXor=useXor, seed=1)
[test_input_sequence,test_output_targets]=calculateInputOutputSequences(seqLength, testSetSize, delayLength, useXor=useXor, seed=0)
print("train_input_sequence",train_input_sequence[0:4].reshape((4,-1)))
print("train_output_targets",train_output_targets[0:4].reshape((4,-1)))

In [None]:
numHiddenNodes=delayLength+3 # this should be enough hidden nodes to solve this problem. Deliberately keeping this small for challenge.
num_output_nodes=1

layer_recurrent=tf.keras.layers.SimpleRNN(numHiddenNodes,return_sequences=True)
#layer_recurrent=tf.keras.layers.LSTM(numHiddenNodes,return_sequences=True)
layer_output_layer=tf.keras.layers.Dense(num_output_nodes, activation=tf.nn.sigmoid)
layer_clip=tf.keras.layers.Lambda(lambda x: x[:,delayLength:,:])

full_recurrent_keras_model=tf.keras.Sequential([layer_recurrent,layer_output_layer,layer_clip])

full_recurrent_keras_model(train_input_sequence[0:1]) # we have to run the network once before the trainable_variables are created


In [None]:
optimizer=keras.optimizers.Adam()

full_recurrent_keras_model.compile(optimizer=optimizer,  
              loss='mse',
              metrics=[tf.keras.metrics.BinaryAccuracy()])


callback = tf.keras.callbacks.EarlyStopping(monitor='val_binary_accuracy', mode='max',patience=100)# try to make it stop when the accuracy stops increasing.  
# see https://www.tensorflow.org/api_docs/python/tf/keras/callbacks/Callback for some more information.

history = full_recurrent_keras_model.fit(train_input_sequence, train_output_targets,
                batch_size=10,
                epochs=500,
                validation_data=(test_input_sequence, test_output_targets), callbacks=[callback])

In [None]:
import matplotlib.pyplot as plt
plt.plot(history.history['binary_accuracy'], label='binary_accuracy')
plt.plot(history.history['val_binary_accuracy'], label = 'val_binary_accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.suptitle("Accuracy for delay length "+str(delayLength)+ (" with XOR" if useXor else ""), fontsize=12,y=0.93)
plt.grid()
plt.legend(loc='lower right')
plt.show()

In [None]:
# print the first test pattern's output and label side-by-side for comparison
print(np.concatenate([full_recurrent_keras_model(test_input_sequence[0:1]).numpy(),test_output_targets[0:1]],axis=2))
