# Idea: predict summation of two numbers using RNN (LSTM)
1. Step 1: create a dataset (3*5=15)
2. Step 2: convert to the symbols (['3', '*', '5'...)
3. Step 3: encode the sympols ([3, 12, 5, ..])
4. Step 4: convert into one-hot encoded format ([0, 0, 0, 1, ...])
5. Step 5: create LSTM model and fit it.
6. Step 6. Make a prediction.

Overall result:
Input: 5*3
Output: 15

In [34]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import random

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, TimeDistributed, Dense, RepeatVector

# Generate data

In [35]:
n_samples = 5
n_numbers = 2
max_num = 9
def generate_data(n_samples, n_numbers, max_num):
    X = []
    y = []
    for i in range(n_samples):
        numbers = [random.randint(0, max_num) for _ in range(n_numbers)]
        X.append(numbers)
        y.append(numbers[0] * numbers[1])
    return X, y
X, y = generate_data(n_samples, n_numbers, max_num)
X

[[4, 4], [2, 6], [0, 5], [7, 8], [1, 5]]

In [36]:
y

[16, 12, 0, 56, 5]

# Convert data to string

In [37]:
def data_to_str(X, y, max_y_length):
    X_str = X.copy()
    for index, numbers in enumerate(X):
        X_str[index] = '*'.join([str(number) for number in numbers])
    y_str = \
        [' ' * (max_y_length - len(str(number))) + str(number) for number in y]
    return X_str, y_str
X, y = data_to_str(X, y, max_y_length=2)
X

['4*4', '2*6', '0*5', '7*8', '1*5']

In [38]:
y

['16', '12', ' 0', '56', ' 5']

# Create symbol vocabulary

In [39]:
symbols = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '*', ' ']
vocabulary = dict()
for number, symbol in enumerate(symbols):
    vocabulary[symbol] = number
vocab_length = len(vocabulary)
vocabulary

{'0': 0,
 '1': 1,
 '2': 2,
 '3': 3,
 '4': 4,
 '5': 5,
 '6': 6,
 '7': 7,
 '8': 8,
 '9': 9,
 '*': 10,
 ' ': 11}

# Encode data

In [40]:
def encode_data(X, y, vocabulary):
    for num, seq in enumerate(X):
        X[num] = [vocabulary[symbol] for symbol in seq]
    for num, seq in enumerate(y):
        y[num] = [vocabulary[symbol] for symbol in seq]
    return X, y
encode_data(X, y, vocabulary)

([[4, 10, 4], [2, 10, 6], [0, 10, 5], [7, 10, 8], [1, 10, 5]],
 [[1, 6], [1, 2], [11, 0], [5, 6], [11, 5]])

# One-hot encode data

In [41]:
def one_hot_encode(dataset):
    dataset_one_hot = []
    for seq in dataset:
        seq_one_hot = []
        for value in seq:
            encoded_seq = [0 for i in range(vocab_length)]
            encoded_seq[value] = 1
            seq_one_hot.append(encoded_seq)
        dataset_one_hot.append(seq_one_hot)

    return np.array(dataset_one_hot)
X = one_hot_encode(X)
y = one_hot_encode(y)
X[:1]

array([[[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
        [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0]]])

# Function to decode data from one-hot encoded format to original state (like 5 + 3)

In [42]:
def decode(dataset, vocabulary):
    dataset_dec = []
    vocabulary_inv = {value: key for key, value in vocabulary.items()}
    for sample_seq in dataset:
        sample = []
        for symbol_seq in sample_seq:
            # result = np.where(symbol_seq == 1)
            result = np.argmax(symbol_seq)
            symbol = vocabulary_inv[result]
            sample.append(symbol)
        sample = ''.join(sample)
        dataset_dec.append(sample)
    return dataset_dec
decode(X, vocabulary)

['4*4', '2*6', '0*5', '7*8', '1*5']

In [43]:
decode(y, vocabulary)

['16', '12', ' 0', '56', ' 5']

# Function to create data end-to-end

In [44]:
def create_data(n_samples, n_numbers, max_num):
    X, y = generate_data(n_samples, n_numbers, max_num)
    X, y = data_to_str(X, y, max_y_length=2)
    X, y = encode_data(X, y, vocabulary)
    X = one_hot_encode(X)
    y = one_hot_encode(y)

    return X, y

n_samples = 1000
n_numbers = 2
max_num = 9
n_chars = len(vocabulary)

X, y = create_data(n_samples, n_numbers, max_num)
X.shape, y.shape

((1000, 3, 12), (1000, 2, 12))

# Create NN model

In [45]:
model = Sequential()
model.add(LSTM(100, input_shape=(n_numbers + 1, n_chars)))
model.add(RepeatVector(2))
model.add(LSTM(50, return_sequences=True))
model.add(TimeDistributed(Dense(n_chars, activation='softmax')))
model.compile(optimizer='adam', loss='categorical_crossentropy',
              metrics=['accuracy'])
model.summary()
print(model.output.shape)
print(model.input.shape)

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_4 (LSTM)                (None, 100)               45200     
_________________________________________________________________
repeat_vector_2 (RepeatVecto (None, 2, 100)            0         
_________________________________________________________________
lstm_5 (LSTM)                (None, 2, 50)             30200     
_________________________________________________________________
time_distributed_2 (TimeDist (None, 2, 12)             612       
Total params: 76,012
Trainable params: 76,012
Non-trainable params: 0
_________________________________________________________________
(None, 2, 12)
(None, 3, 12)


# Fit model

In [46]:
model.fit(X, y, batch_size=10, epochs=20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x776418e0>

# Create test data and make a prediction

In [47]:
n_test_samples = 10
X_test, y_test = create_data(n_samples=n_test_samples, n_numbers=n_numbers,
                             max_num=max_num)
y_pred = model.predict(X_test, batch_size=10, verbose=0)

In [48]:
X_test_dec = decode(X_test, vocabulary)
y_test_dec = decode(y_test, vocabulary)
y_pred_dec = decode(y_pred, vocabulary)

for sample_n in range(n_test_samples):
    print(f'Sample: {X_test_dec[sample_n]}={y_test_dec[sample_n]} Predicted: '
          f'{y_test_dec[sample_n]}')

Sample: 4*9=36 Predicted: 36
Sample: 4*6=24 Predicted: 24
Sample: 6*6=36 Predicted: 36
Sample: 2*3= 6 Predicted:  6
Sample: 0*3= 0 Predicted:  0
Sample: 5*9=45 Predicted: 45
Sample: 8*7=56 Predicted: 56
Sample: 7*3=21 Predicted: 21
Sample: 0*0= 0 Predicted:  0
Sample: 5*0= 0 Predicted:  0
