In [None]:
"""
Learning addition and subtraction from string data.
"""

In [1]:
import numpy as np
import random

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import TimeDistributed, Dense, Dropout, SimpleRNN, RepeatVector
from tensorflow.keras.callbacks import EarlyStopping, LambdaCallback
tf.config.run_functions_eagerly(True)

from termcolor import colored

In [2]:
ops = ['+', '-'] 
all_chars = '0123456789' + ''.join(ops)
all_chars

'0123456789+-'

In [3]:
num_features = len(all_chars)
char_to_index = dict((c, i) for i, c in enumerate(all_chars)) # tokenize
index_to_char = dict((i, c) for i, c in enumerate(all_chars))

In [4]:
LO, HI = 0, 1000
def generate_data(lo=LO, hi=HI):
    n1 = np.random.randint(lo, hi+1)
    n2 = np.random.randint(lo, hi+1)
    op = random.choice(ops)
    if (op == '/' and n2 == 0):
        n2 = 1 # jankly avoid div by 0 err
    example = str(n1) + op + str(n2)
    label = 0
    if op == '+':
        label = n1 + n2
    elif op == '-':
        label = n1 - n2
    elif op == '*':
        label = n1 * n2
    elif op == '/':
        label = n1 // n2
    return example, str(label)
generate_data()

('38+833', '871')

In [5]:
# Recurrent NN for variable vectors, both input and output
hidden_units = 128
max_time_steps = 2 * 3 + 1 # max length of input

model = Sequential([
    SimpleRNN(hidden_units, input_shape=(None, num_features)),
    RepeatVector(max_time_steps), # get singular vec representation
    # decoder starts here:
    SimpleRNN(hidden_units, return_sequences=True),
    TimeDistributed(Dense(num_features, activation='softmax'))
])

model.compile(
    loss='categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)
model.summary()

2022-09-03 15:39:27.372302: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 simple_rnn (SimpleRNN)      (None, 128)               18048     
                                                                 
 repeat_vector (RepeatVector  (None, 7, 128)           0         
 )                                                               
                                                                 
 simple_rnn_1 (SimpleRNN)    (None, 7, 128)            32896     
                                                                 
 time_distributed (TimeDistr  (None, 7, 12)            1548      
 ibuted)                                                         
                                                                 
Total params: 52,492
Trainable params: 52,492
Non-trainable params: 0
_________________________________________________________________


In [6]:
def vectorize_example(example, label):
    x = np.zeros((max_time_steps, num_features))
    y = np.zeros((max_time_steps, num_features))

    diff_x = max_time_steps - len(example)
    diff_y = max_time_steps - len(label) 
        
    for i, c in enumerate(example):
        x[i + diff_x, char_to_index[c]] = 1
    for i in range(diff_x):
        x[i, char_to_index['0']] = 1
    for i, c in enumerate(label):
        y[i + diff_y, char_to_index[c]] = 1
    for i in range(diff_y):
        y[i, char_to_index['0']] = 1
    
    return x, y
e, l = generate_data()
print(e, l)
x, y = vectorize_example(e, l)
print(x)
print(y)

441+436 877
[[0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]]
[[1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]]


In [7]:
def devectorize_example(example):
    result = [index_to_char[np.argmax(vec)] for i, vec in enumerate(example)]
    return ''.join(result)

devectorize_example(x)

'441+436'

In [8]:
l_cb = LambdaCallback(
    on_epoch_end=lambda e, l: print('{:.2f}'.format(l['val_accuracy']), end=' _ ')
)
    
es_cb = EarlyStopping(monitor='val_loss', patience=10)


In [9]:
def create_dataset(num_examples=2000):

    x_train = np.zeros((num_examples, max_time_steps, num_features))
    y_train = np.zeros((num_examples, max_time_steps, num_features))

    for i in range(num_examples):
        e, l = generate_data()
        x, y = vectorize_example(e, l)
        x_train[i] = x
        y_train[i] = y
    
    return x_train, y_train

x_train, y_train = create_dataset(20000)
print(x_train.shape, y_train.shape)

(20000, 7, 12) (20000, 7, 12)


In [10]:
model.fit(x_train, y_train, 
          epochs=500, 
          batch_size=256, 
          validation_split=0.2, 
          verbose=False,
          callbacks=[es_cb, l_cb]
         )



0.59 _ 0.61 _ 0.62 _ 0.63 _ 0.64 _ 0.67 _ 0.67 _ 0.68 _ 0.68 _ 0.69 _ 0.70 _ 0.68 _ 0.71 _ 0.71 _ 0.73 _ 0.73 _ 0.72 _ 0.74 _ 0.74 _ 0.75 _ 0.75 _ 0.76 _ 0.77 _ 0.77 _ 0.78 _ 0.78 _ 0.79 _ 0.80 _ 0.80 _ 0.81 _ 0.81 _ 0.83 _ 0.83 _ 0.84 _ 0.83 _ 0.84 _ 0.86 _ 0.86 _ 0.86 _ 0.85 _ 0.86 _ 0.86 _ 0.88 _ 0.89 _ 0.89 _ 0.88 _ 0.88 _ 0.88 _ 0.90 _ 0.90 _ 0.90 _ 0.91 _ 0.91 _ 0.90 _ 0.90 _ 0.91 _ 0.91 _ 0.91 _ 0.91 _ 0.92 _ 0.91 _ 0.92 _ 0.92 _ 0.93 _ 0.92 _ 0.93 _ 0.93 _ 0.92 _ 0.92 _ 0.92 _ 0.93 _ 0.93 _ 0.93 _ 0.94 _ 0.93 _ 0.93 _ 0.93 _ 0.93 _ 0.92 _ 0.94 _ 0.94 _ 0.94 _ 0.94 _ 0.94 _ 0.94 _ 0.94 _ 0.94 _ 0.94 _ 0.93 _ 0.93 _ 0.94 _ 0.95 _ 0.94 _ 0.92 _ 0.95 _ 0.94 _ 0.95 _ 0.95 _ 0.95 _ 0.95 _ 0.94 _ 0.94 _ 0.95 _ 0.94 _ 0.95 _ 0.94 _ 0.94 _ 0.95 _ 0.95 _ 0.94 _ 0.95 _ 0.95 _ 0.95 _ 0.95 _ 0.95 _ 0.94 _ 0.95 _ 0.95 _ 0.95 _ 0.95 _ 0.95 _ 0.94 _ 0.94 _ 0.93 _ 0.95 _ 0.95 _ 0.96 _ 0.95 _ 0.96 _ 0.95 _ 0.95 _ 0.94 _ 0.95 _ 0.94 _ 0.94 _ 0.95 _ 0.95 _ 

<keras.callbacks.History at 0x1488a2520>

In [11]:
x_test, y_test = create_dataset(1000)

results = model.evaluate(x_test, y_test, batch_size=128)
print(f"Test loss: {results[0]}")
print(f"Test acc: {results[1]}")

Test loss: 0.14828947186470032
Test acc: 0.9481428861618042


In [12]:
def del_leading_zeros(s):
    return s.lstrip('0')

In [13]:
x_test, y_test = create_dataset(1000)
preds = model.predict(x_test)

print('EXPECTED | GOT')
# see which ones are erroring and how
for i, pred in enumerate(preds):
    y = devectorize_example(y_test[i])
    y_hat = devectorize_example(pred)
    #out = 'Input: ' + devectorize_example(x_test[i])+ ' Out: ' + y + ' Pred: ' + y_hat
    c = 'green'
    if y != y_hat:
        c = 'red'
        print(del_leading_zeros(y) + '\t' + del_leading_zeros(y_hat))


EXPECTED | GOT
-917	-927
821	820
	-0
-229	-239
654	653
94	194
-136	-236
612	602
331	332
662	652
461	463
538	638
852	851
213	123
-966	-955
-195	-295
904	825
525	535
802	801
780	770
463	462
1103	334
797	886
626	625
1202	1102
209	109
66	65
	-1
975	963
-147	-148
-54	-44
390	391
1153	1163
348	358
-747	-737
-665	-664
356	346
-399	-400
-110	-111
-507	-517
1766	1856
691	773
1103	1113
983	982
-249	-349
195	294
-868	-858
-285	-385
-38	-39
889	899
740	730
235	244
690	680
180	170
443	452
-649	-659
900	990
127	137
158	57
259	269
-826	-827
1426	1326
593	693
802	702
620	610
186	176
484	494
36	35
121	22
1926	1936
-48	-58
1595	1695
754	753
1401	1301
1103	1003
29	39
641	631
86	76
15	-5
1172	1162
530	541
250	240
-584	-684
887	896
-781	-881
109	110
350	351
-19	-119
-330	-430
1099	1109
1485	1385
-912	-922
883	873
538	529
834	-152
-297	-397
1339	1349
-289	-299
-32	--32
1078	1077
609	619
451	441
1864	1863
304	303
1526	1525
766	756
-659	-669
690	790
643	741
1472	1482
649	659
-69	-60
1206	1106
20	-19
592	692
-

In [28]:
def calc_example(new_model, example:str):
    temp = np.zeros((1, max_time_steps, num_features))
    example, label = vectorize_example(example, '')
    temp[0] = example
    
    pred = new_model.predict(temp)
    r = devectorize_example(pred[0])
    return r

calc_example(model, '100-15')






'0000086'

In [36]:
!mkdir -p saved_model
#model.save('saved_model/ms_model')
tf.keras.models.save_model(
    model,
    './saved_model/ms_model',
)


INFO:tensorflow:Assets written to: saved_model/ms_model/assets


In [34]:
new_model = tf.keras.models.load_model('./saved_model/ms_model')


In [35]:
calc_example(new_model, '100-15')





'0000086'

In [None]:
"""
Test accuracies (%):
+: ~97
+, -: ~95
+, -, *: ~76
"""