# Making Your Model Learn Addition!

Given the string "54+7", the model should return a prediction: "61".

## 1. Import Libraries

In [1]:
import numpy as np

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import TimeDistributed, Dense, Dropout, SimpleRNN, RepeatVector
from tensorflow.keras.callbacks import EarlyStopping, LambdaCallback

from termcolor import colored

## 2. Generate Data

In [2]:
all_chars = '0123456789+'

In [3]:
num_features = len(all_chars)
print("number of features:", num_features)
char_to_index = dict((c,i) for i,c in enumerate(all_chars))
index_to_char = dict((i,c) for i,c in enumerate(all_chars))

number of features: 11


In [4]:
def generate_data():
    first = np.random.randint(0,100)
    second = np.random.randint(0,100)
    example = str(first) + '+' + str(second)
    label = str(first+second)
    return example, label

generate_data()

('42+95', '137')

## 3. Create the Model

In [6]:
hidden_units = 128
max_time_steps = 5

model = Sequential([
    SimpleRNN(hidden_units, input_shape=(None, num_features)),
    RepeatVector(max_time_steps),
    SimpleRNN(hidden_units, return_sequences=True),
    TimeDistributed(Dense(num_features, activation='softmax'))
])

model.compile(
    loss='categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
simple_rnn (SimpleRNN)       (None, 128)               17920     
_________________________________________________________________
repeat_vector (RepeatVector) (None, 5, 128)            0         
_________________________________________________________________
simple_rnn_1 (SimpleRNN)     (None, 5, 128)            32896     
_________________________________________________________________
time_distributed (TimeDistri (None, 5, 11)             1419      
Total params: 52,235
Trainable params: 52,235
Non-trainable params: 0
_________________________________________________________________


## 4. Vectorize and De-Vectorize Data

In [7]:
def vectorize_example(example, label):
    x = np.zeros((max_time_steps, num_features))
    y = np.zeros((max_time_steps, num_features))
    
    diff_x = max_time_steps - len(example)
    diff_y = max_time_steps - len(label)
    
    for i,c in enumerate(example):
        x[i+diff_x, char_to_index[c]] = 1
    for i in range(diff_x):
        x[i, char_to_index['0']] = 1
    for i,c in enumerate(label):
        y[i+diff_y, char_to_index[c]] = 1
    for i in range(diff_y):
        y[i, char_to_index['0']] = 1
    return x, y

e, l = generate_data()
print(e, l)
x, y = vectorize_example(e, l)
print(x.shape, y.shape)

54+61 115
(5, 11) (5, 11)


In [8]:
def devectorize_example(example):
    result = [index_to_char[np.argmax(vec)] for i,vec in enumerate(example)]
    return ''.join(result)

devectorize_example(x)

'54+61'

In [9]:
devectorize_example(y)

'00115'

## 5. Create Dataset

In [12]:
def create_dataset(num_examples=2000):
    x = np.zeros((num_examples, max_time_steps, num_features))
    y = np.zeros((num_examples, max_time_steps, num_features))
    for i in range(num_examples):
        e,l = generate_data()
        e_v, l_v = vectorize_example(e,l)
        x[i] = e_v
        y[i] = l_v
    return x,y
x,y = create_dataset()
print(x.shape, y.shape)

(2000, 5, 11) (2000, 5, 11)


In [13]:
devectorize_example(x[0])

'51+99'

In [14]:
devectorize_example(y[0])

'00150'

## 6. Training the Model

In [15]:
l_cb = LambdaCallback(
    on_epoch_end = lambda e,l: print('{:.2f}'.format(l['val_acc']), end = ' _ ')
)
es_cb = EarlyStopping(monitor='val_loss', patience=10)
model.fit(x, y, epochs=500, batch_size=256, validation_split=0.2, verbose=False, callbacks=[es_cb, l_cb])

0.56 _ 0.62 _ 0.62 _ 0.62 _ 0.63 _ 0.63 _ 0.63 _ 0.64 _ 0.64 _ 0.64 _ 0.65 _ 0.66 _ 0.66 _ 0.66 _ 0.66 _ 0.67 _ 0.67 _ 0.69 _ 0.69 _ 0.70 _ 0.70 _ 0.70 _ 0.72 _ 0.71 _ 0.73 _ 0.73 _ 0.73 _ 0.73 _ 0.73 _ 0.74 _ 0.75 _ 0.74 _ 0.75 _ 0.75 _ 0.73 _ 0.77 _ 0.76 _ 0.76 _ 0.76 _ 0.77 _ 0.77 _ 0.77 _ 0.77 _ 0.78 _ 0.77 _ 0.77 _ 0.77 _ 0.78 _ 0.78 _ 0.78 _ 0.79 _ 0.79 _ 0.80 _ 0.80 _ 0.80 _ 0.80 _ 0.81 _ 0.81 _ 0.82 _ 0.82 _ 0.82 _ 0.82 _ 0.83 _ 0.82 _ 0.83 _ 0.83 _ 0.85 _ 0.85 _ 0.85 _ 0.86 _ 0.86 _ 0.86 _ 0.86 _ 0.86 _ 0.87 _ 0.88 _ 0.88 _ 0.88 _ 0.89 _ 0.89 _ 0.89 _ 0.90 _ 0.89 _ 0.90 _ 0.89 _ 0.90 _ 0.90 _ 0.90 _ 0.91 _ 0.91 _ 0.90 _ 0.91 _ 0.91 _ 0.90 _ 0.91 _ 0.91 _ 0.91 _ 0.91 _ 0.92 _ 0.92 _ 0.92 _ 0.92 _ 0.91 _ 0.91 _ 0.92 _ 0.93 _ 0.92 _ 0.92 _ 0.93 _ 0.92 _ 0.93 _ 0.93 _ 0.93 _ 0.93 _ 0.93 _ 0.93 _ 0.93 _ 0.93 _ 0.93 _ 0.93 _ 0.93 _ 0.93 _ 0.93 _ 0.93 _ 0.93 _ 0.93 _ 0.93 _ 0.93 _ 0.93 _ 0.93 _ 0.94 _ 0.93 _ 0.94 _ 0.94 _ 0.93 _ 0.93 _ 0.94 _ 0.94 _ 0.94 _ 0.94 _ 0.93 _ 0.93 _ 0.94 _

<tensorflow.python.keras.callbacks.History at 0x158859ddcc0>

In [21]:
x_test, y_test = create_dataset(10)
preds = model.predict(x_test)

for i,pred in enumerate(preds):
    y = devectorize_example(y_test[i])
    y_hat = devectorize_example(pred)
    col = 'green'
    if y != y_hat:
        col = 'red'
    out = 'Input: '+devectorize_example(x_test[i])+ ' Out:'+y+' Pred:'+y_hat
    print(colored(out, col))

[32mInput: 56+48 Out:00104 Pred:00104[0m
[32mInput: 85+50 Out:00135 Pred:00135[0m
[32mInput: 72+59 Out:00131 Pred:00131[0m
[32mInput: 72+15 Out:00087 Pred:00087[0m
[32mInput: 68+42 Out:00110 Pred:00110[0m
[32mInput: 61+46 Out:00107 Pred:00107[0m
[31mInput: 60+20 Out:00080 Pred:00070[0m
[32mInput: 54+49 Out:00103 Pred:00103[0m
[32mInput: 71+39 Out:00110 Pred:00110[0m
[32mInput: 01+92 Out:00093 Pred:00093[0m
