# RNN Recurrent Neural Network
### Using Tensorflow / Keras

## Imports

In [1]:
from __future__ import print_function
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
import numpy as np
from six.moves import range

## Utility CharacterTable class

Given a set of characters: (char, characters that can appear in the output    
- Encode them to a one-hot integer representation  
- Decode the one-hot or integer representation to their character output  
- Decode a vector of probabilities to their character output  


In [2]:
class CharacterTable(object):
   
    def __init__(self, chars):
        self.chars = sorted(set(chars))
        self.char_indices = dict((c, i) for i, c in enumerate(self.chars))
        self.indices_char = dict((i, c) for i, c in enumerate(self.chars))

    def encode(self, str, num_rows):
        oh = np.zeros((num_rows, len(self.chars)))
        for i, s in enumerate(str):
            oh[i, self.char_indices[s]] = 1
        return oh

    def decode(self, x, calc_argmax=True):
        if calc_argmax:
            x = x.argmax(axis=-1)
        return ''.join(self.indices_char[x] for x in x)

## Parameters

In [3]:
class colors:
    ok = '\033[92m'
    fail = '\033[91m'
    close = '\033[0m'

# Parameters for the model and dataset.
training_size = 50000
digits = 3
reverse = True

# Maximum length of input is 'int + int' (e.g., '345+678'). Maximum length of int is digits.
maxlen = digits + 1 + digits

# All the numbers, plus sign and space for padding.
chars = '0123456789+ '
ctable = CharacterTable(chars)

## Generate the data

In [4]:
questions = []
expected = []
seen = set()
print('Generating data...')
while len(questions) < training_size:
    f = lambda: int(''.join(np.random.choice(list('0123456789'))
                    for i in range(np.random.randint(1, digits+1))))
    a, b = f(), f()
    
    # Skip any addition questions we've already seen. Also skip any such that x+Y == Y+x (hence the sorting).
    key = tuple(sorted((a, b)))
    if key in seen:
        continue
    seen.add(key)
    
    # Pad the data with spaces such that it is always maxlen.
    q = '{}+{}'.format(a, b)
    query = q + ' ' * (maxlen - len(q))
    ans = str(a + b)
    
    # Answers can be of maximum size digits + 1.
    ans += ' ' * (digits + 1 - len(ans))
    if reverse:
        # Reverse the query, e.g., '12+345  ' becomes '  543+21'. (Note the space used for padding.)
        query = query[::-1]
    questions.append(query)
    expected.append(ans)

print('Total addition questions:', len(questions))

print('Vectorization...')
x = np.zeros((len(questions), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(questions), digits + 1, len(chars)), dtype=np.bool)
for i, sentence in enumerate(questions):
    x[i] = ctable.encode(sentence, maxlen)
for i, sentence in enumerate(expected):
    y[i] = ctable.encode(sentence, digits + 1)

# Shuffle (x, y) in unison as the later parts of x will almost all be larger digits.
indices = np.arange(len(y))
np.random.shuffle(indices)
x = x[indices]
y = y[indices]

# Explicitly set apart 10% for validation data that we never train over.
split_at = len(x) - len(x) // 10
(x_train, x_val) = x[:split_at], x[split_at:]
(y_train, y_val) = y[:split_at], y[split_at:]

print('Training Data:')
print(x_train.shape)
print(y_train.shape)

print('Validation Data:')
print(x_val.shape)
print(y_val.shape)

Generating data...
Total addition questions: 50000
Vectorization...
Training Data:
(45000, 7, 12)
(45000, 4, 12)
Validation Data:
(5000, 7, 12)
(5000, 4, 12)


## Build model

In [5]:
lstm = layers.LSTM
nHid = 128
batch_size = 128
num_layers = 1

model = Sequential()
# "Encode" the input sequence using an RNN, producing an output of HIDDEN_SIZE.
# Note: In a situation where your input sequences have a variable length, use input_shape=(None, num_feature).
model.add(lstm(nHid, input_shape=(maxlen, len(chars))))
# As the decoder RNN's input, repeatedly provide with the last output of RNN for each time step.  
# Repeat 'DIGITS + 1' times as that's the maximum length of output, e.g., when DIGITS=3, max output is 999+999=1998.
model.add(layers.RepeatVector(digits+1))
# The decoder RNN could be multiple layers stacked or a single layer.
for _ in range(num_layers):
    # By setting return_sequences to True, return not only the last output but all the outputs so far in the form of (num_samples, timesteps, output_dim).  
    # This is necessary as TimeDistributed in the below expects the first dimension to be the timesteps.
    model.add(lstm(nHid, return_sequences=True))

# Apply a dense layer to the every temporal slice of an input. For each of step of the output sequence, decide which character should be chosen.
model.add(layers.TimeDistributed(layers.Dense(len(chars), activation='softmax')))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 128)               72192     
_________________________________________________________________
repeat_vector (RepeatVector) (None, 4, 128)            0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 4, 128)            131584    
_________________________________________________________________
time_distributed (TimeDistri (None, 4, 12)             1548      
Total params: 205,324
Trainable params: 205,324
Non-trainable params: 0
_________________________________________________________________


## Testing

Train the model each generation and show predictions against the validation dataset.

In [None]:
max_it = 200
for iteration in range(max_it):
    print('\n-' * 50)
    print('Iteration', iteration)
    model.fit(x_train, y_train, batch_size=batch_size, epochs=1, validation_data=(x_val, y_val))
    
    # Select 10 samples from the validation set at random so we can visualize errors.
    for i in range(10):
        ind = np.random.randint(0, len(x_val))
        rowx, rowy = x_val[np.array([ind])], y_val[np.array([ind])]
        preds = model.predict_classes(rowx, verbose=0)
        q = ctable.decode(rowx[0])
        correct = ctable.decode(rowy[0])
        guess = ctable.decode(preds[0], calc_argmax=False)
        print('Q', q[::-1] if reverse else q, end=' ')
        print('T', correct, end=' ')
        if correct == guess:
            print(colors.ok + '☑' + colors.close, end=' ')
        else:
            print(colors.fail + '☒' + colors.close, end=' ')
        print(guess)


-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Iteration 0
Instructions for updating:
Please use instead:* `np.argmax(model.predict(x), axis=-1)`,   if your model does multi-class classification   (e.g. if it uses a `softmax` last-layer activation).* `(model.predict(x) > 0.5).astype("int32")`,   if your model does binary classification   (e.g. if it uses a `sigmoid` last-layer activation).
Q 827+81  T 908  [91m☒[0m 100 
Q 219+706 T 925  [91m☒[0m 100 
Q 978+235 T 1213 [91m☒[0m 1000
Q 52+222  T 274  [91m☒[0m 700 
Q 86+329  T 415  [91m☒[0m 100 
Q 629+549 T 1178 [91m☒[0m 100 
Q 724+112 T 836  [91m☒[0m 100 
Q 553+38  T 591  [91m☒[0m 100 
Q 78+454  T 532  [91m☒[0m 100 
Q 2+403   T 405  [91m☒[0m 33  

-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Iteration 1
Q 58+979  T 1037 [91m☒[0m 906 
Q 3+555   T 558  [91m☒[0m 56  
Q 27+62   T 89   [91m☒[0m 23  
Q 895+237 T

## Credits & Links