# Numeric Adder using RNN

- https://github.com/keras-team/keras/blob/master/examples/addition_rnn.py

In [1]:
import keras
from keras.layers import LSTM, RepeatVector, TimeDistributed, Dense, Activation
from keras.models import Sequential

import numpy as np

Using TensorFlow backend.
  return f(*args, **kwds)


In [2]:
class CharacterTable(object):
    def __init__(self, chars):
        """Initialize character table.
        # Arguments
            chars: Characters that can appear in the input.
        """
        self.chars = sorted(set(chars))
        self.char_indices = dict((c, i) for i, c in enumerate(self.chars))
        self.indices_char = dict((i, c) for i, c in enumerate(self.chars))

    def encode(self, C, num_rows):
        """One hot encode given string C.
        # Arguments
            num_rows: Number of rows in the returned one hot encoding. This is
                used to keep the # of rows for each data the same.
        """
        x = np.zeros((num_rows, len(self.chars)))
        for i, c in enumerate(C):
            x[i, self.char_indices[c]] = 1
        return x

    def decode(self, x, calc_argmax=True):
        if calc_argmax:
            x = x.argmax(axis=-1)
        return ''.join(self.indices_char[x] for x in x)

In [3]:
# Global Parameters
TRAINING_SIZE = 50000
DIGITS = 3
REVERSE = True
MAXLEN = 2*DIGITS+1

In [4]:
# All the numbers, plus sign and space for padding.
n_chars = 12
chars = '0123456789+ '
ctable = CharacterTable(chars)

In [6]:
# Prepare Dataset
questions = []
expected = []
seen = set()
print('Generating data...')
while len(questions) < TRAINING_SIZE:
    f = lambda: int(''.join(np.random.choice(list('0123456789'))
                    for i in range(np.random.randint(1, DIGITS + 1))))
    a, b = f(), f()
    # Skip any addition questions we've already seen
    # Also skip any such that x+Y == Y+x (hence the sorting).
    key = tuple(sorted((a, b)))
    if key in seen:
        continue
    seen.add(key)
    # Pad the data with spaces such that it is always MAXLEN.
    q = '{}+{}'.format(a, b)
    query = q + ' ' * (MAXLEN - len(q))
    ans = str(a + b)
    # Answers can be of maximum size DIGITS + 1.
    ans += ' ' * (DIGITS + 1 - len(ans))
    if REVERSE:
        # Reverse the query, e.g., '12+345  ' becomes '  543+21'. (Note the
        # space used for padding.)
        query = query[::-1]
    questions.append(query)
    expected.append(ans)
print('Total addition questions:', len(questions))

print('Vectorization...')
x = np.zeros((len(questions), MAXLEN, n_chars), dtype=np.bool)
y = np.zeros((len(questions), DIGITS + 1, n_chars), dtype=np.bool)
for i, sentence in enumerate(questions):
    x[i] = ctable.encode(sentence, MAXLEN)
for i, sentence in enumerate(expected):
    y[i] = ctable.encode(sentence, DIGITS + 1)

# Shuffle (x, y) in unison as the later parts of x will almost all be larger
# digits.
indices = np.arange(len(y))
np.random.shuffle(indices)
x = x[indices]
y = y[indices]

# Split test train
train_length = int(len(x) * (1-ratio))
ratio = 0.2
x_train, x_test = x[:train_length], x[train_length:]
y_train, y_test = y[:train_length], y[train_length:]

print('Training Data:')
print(x_train.shape, y_train.shape)

print('Test Data:')
print(x_test.shape, y_test.shape)

Generating data...
Total addition questions: 50000
Vectorization...
Training Data:
(40000, 7, 12) (40000, 4, 12)
Test Data:
(10000, 7, 12) (10000, 4, 12)


In [8]:
# Build Model
HIDDEN_SIZE = 128
REC_LAYERS = 1

# Sequential Model
model = Sequential()

model.add(LSTM(HIDDEN_SIZE, input_shape=(MAXLEN, n_chars)))
model.add(RepeatVector(DIGITS + 1))
for _ in range(REC_LAYERS):
    model.add(LSTM(HIDDEN_SIZE, return_sequences=True))    

model.add(TimeDistributed(Dense(n_chars)))
model.add(Activation('softmax'))

# Compiler
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

# Summary
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_2 (LSTM)                (None, 128)               72192     
_________________________________________________________________
repeat_vector_1 (RepeatVecto (None, 4, 128)            0         
_________________________________________________________________
lstm_3 (LSTM)                (None, 4, 128)            131584    
_________________________________________________________________
time_distributed_1 (TimeDist (None, 4, 12)             1548      
_________________________________________________________________
activation_1 (Activation)    (None, 4, 12)             0         
Total params: 205,324
Trainable params: 205,324
Non-trainable params: 0
_________________________________________________________________


## Training and Validation

In [9]:
# Train, Validate
batch_size = 128
nepochs = 1
iterations = 200
validation_size = 10
for iteration in range(1, iterations):
    model.fit(x_train, y_train,
              batch_size=batch_size,
              epochs=nepochs,
              validation_data=(x_test, y_test))
    
    # Random Validation Process
    if iteration % 10 == 0:
        print('\n-' * 50)
        print('Training Iteration', iteration)
        for i in range(validation_size):
            indices = np.random.randint(0, len(x_test))
            rowx, rowy = x_test[np.array([indices])], y_test[np.array([indices])]

            preds = model.predict_classes(rowx, verbose=0)
            q = ctable.decode(rowx[0])
            correct = ctable.decode(rowy[0])
            guess = ctable.decode(preds[0], calc_argmax=False)
            print('Q', q[::-1] if REVERSE else q, end=' ')
            print('T', correct, end=' ')
            if correct == guess:
                print('equal', end=' ')
            else:
                print('not', end=' ')
            print(guess)

Train on 40000 samples, validate on 10000 samples
Epoch 1/1
 8960/40000 [=====>........................] - ETA: 24s - loss: 2.1275 - acc: 0.2609

KeyboardInterrupt: 