# Addition Prediction Problem
Known from https://arxiv.org/abs/1410.4615

Below is a picture of an example of what we want to achive

![Many-to-many model](./seq2seq.PNG)

In [33]:
from IPython.display import Image
from IPython.core.display import HTML 
from random import seed, randint
import numpy as np
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import LSTM, TimeDistributed, Dense, RepeatVector

In [34]:
def random_sum_pairs(n_examples, n_numbers, largest):
    '''Function to generate a list of random integers
        and the sum of those integers.'''
    X, y = [], []
    for i in range(n_examples):
        input_seq = [randint(1, largest) for i in range(n_numbers)]
        output_seq = sum(input_seq)
        X.append(input_seq)
        y.append(output_seq)
    return X, y

In [35]:
# Test of above function
seed(1)
n_examples = 2
n_numbers = 2
largest = 10
X, y = random_sum_pairs(n_examples, n_numbers, largest)
X, y

([[3, 10], [2, 5]], [13, 7])

In [36]:
def to_string(X, y, n_numbers, largest):
    '''Function to convert the output from 
    random_sum_pairs to a string representaion.'''
    max_length = int(n_numbers * np.ceil(np.log10(largest+1)) + n_numbers - 1)
    X_str = []
    for i in X:
        str_i =  '+'.join([str(n) for n in i])
        str_i = ''.join([' ' for _ in range(max_length-len(str_i))]) + str_i
        X_str.append(str_i)
    max_length = int(np.ceil(np.log10(n_numbers * (largest+1))))
    y_str = []
    for j in y:
        str_j = str(j)
        str_j = ''.join([' ' for _ in range(max_length-len(str_j))]) + str_j
        y_str.append(str_j)
    return X_str, y_str

In [37]:
# Test to_string
X, y = to_string(X, y, n_numbers, largest)
X, y

([' 3+10', '  2+5'], ['13', ' 7'])

In [38]:
# integer encode strings
def integer_encode(X, y, vocab):
    '''Function to integer encode our string 
    representation of  the input and output'''
    char_to_int = dict((c, i) for i, c in enumerate(vocab))
    X_enc = []
    for p in X:
        integer_encoded = [char_to_int[char] for char in p]
        X_enc.append(integer_encoded)
    y_enc = []
    for p in y:
        integer_encoded = [char_to_int[char] for char in p]
        y_enc.append(integer_encoded)
    return X_enc, y_enc

In [39]:
# Test integer_encode
vocab = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', ' ']
X, y = integer_encode(X, y, vocab)
print(X)
print(y)

[[11, 3, 10, 1, 0], [11, 11, 2, 10, 5]]
[[1, 3], [11, 7]]


In [40]:
def one_hot_encode(X, y, max_int):
    """Function to one-hot encode out integer encoding"""
    X_hot, y_hot = [], []
    
    for i in X:
        seq = []
        for j in i:
            temp = np.zeros(max_int).astype(int)
            temp[j] = 1
            seq.append(temp)
        X_hot.append(seq)
        
    for i in y:
        seq = []
        for j in i:
            temp = np.zeros(max_int).astype(int)
            temp[j] = 1
            seq.append(temp)
        y_hot.append(seq)
    
    return X_hot, y_hot

In [41]:
# Test  one-hot encoding
X, y = one_hot_encode(X, y, len(vocab))
X, '-'*80, y

([[array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]),
   array([0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]),
   array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0]),
   array([0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),
   array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])],
  [array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]),
   array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]),
   array([0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]),
   array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0]),
   array([0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0])]],
 '--------------------------------------------------------------------------------',
 [[array([0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),
   array([0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0])],
  [array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]),
   array([0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0])]])

In [42]:
def generate_data(n_examples, n_numbers, largest, vocab):
    '''Function for generating data for our model, using above
    helper-functions.'''
    # generate pairs
    X, y = random_sum_pairs(n_examples, n_numbers, largest)
    # convert to strings
    X, y = to_string(X, y, n_numbers, largest)
    # integer encode
    X, y = integer_encode(X, y, vocab)
    # one hot encode
    X, y = one_hot_encode(X, y, len(vocab))
    # return as NumPy arrays
    X, y = array(X), array(y)
    return X, y

In [61]:
# Invers transform of one-hot encoding
def invert(seq, vocab):
    int_to_char = dict((i, c) for i, c in enumerate(alphabet))
    strings = []
    for pattern in seq:
        string = int_to_char[np.argmax(pattern)]
        strings.append(string)
    return ''.join(strings)

In [58]:
# number of math terms
n_terms = 3
# largest value for any single input digit
largest = 10
# scope of possible symbols for each input or output time step
vocab = [str(x) for x in range(10)] + ['+', ' ']
# size of alphabet: (12 for 0-9, + and ' ')
n_chars = len(alphabet)
# length of encoded input sequence (8 for '10+10+10)
n_in_seq_length = int(n_terms * np.ceil(np.log10(largest+1)) + n_terms - 1)
# length of encoded output sequence (2 for '30')
n_out_seq_length = int(np.ceil(np.log10(n_terms * (largest+1))))

In [59]:
# define LSTM
model = Sequential()
model.add(LSTM(75, input_shape=(n_in_seq_length, n_chars)))
model.add(RepeatVector(n_out_seq_length))
model.add(LSTM(50, return_sequences=True))
model.add(TimeDistributed(Dense(n_chars, activation='softmax')))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_5 (LSTM)                (None, 75)                26400     
_________________________________________________________________
repeat_vector_3 (RepeatVecto (None, 2, 75)             0         
_________________________________________________________________
lstm_6 (LSTM)                (None, 2, 50)             25200     
_________________________________________________________________
time_distributed_3 (TimeDist (None, 2, 12)             612       
Total params: 52,212
Trainable params: 52,212
Non-trainable params: 0
_________________________________________________________________


In [60]:
# fit model
X, y = generate_data(75000, n_terms, largest, vocab)
model.fit(X, y, epochs=1, batch_size=32, verbose=2)

Epoch 1/1
 - 59s - loss: 0.6653 - acc: 0.8106


<keras.callbacks.History at 0x7f526cbd23c8>

In [64]:
# evaluate LSTM
X, y = generate_data(100, n_terms, largest, vocab)
loss, acc = model.evaluate(X, y, verbose=0)
print('Loss: %f, Accuracy: %f' % (loss, acc*100))

Loss: 0.111792, Accuracy: 99.500000


In [65]:
# predict
for _ in range(10):
    # generate an input-output pair
    X, y = generate_data(1, n_terms, largest, vocab)
    # make prediction
    yhat = model.predict(X, verbose=0)
    # decode input, expected and predicted
    in_seq = invert(X[0], vocab)
    out_seq = invert(y[0], vocab)
    predicted = invert(yhat[0], vocab)
    print('%s = %s (expect %s)' % (in_seq, predicted, out_seq))

  5+4+10 = 19 (expect 19)
  1+6+10 = 17 (expect 17)
  10+2+6 = 18 (expect 18)
   4+2+3 =  9 (expect  9)
   6+2+8 = 16 (expect 16)
 10+2+10 = 22 (expect 22)
   9+5+2 = 16 (expect 16)
   2+2+1 =  6 (expect  5)
   5+5+9 = 19 (expect 19)
   6+4+3 = 13 (expect 13)
