In [1]:
from keras.models import Sequential
from keras.layers import LSTM, RepeatVector, Dense, Activation
from keras.layers.wrappers import TimeDistributed, Bidirectional
from keras.layers.normalization import BatchNormalization
from keras.optimizers import Adam
import numpy as np
from six.moves import range
import matplotlib.pyplot as plt

Using TensorFlow backend.


# Parameters Config

In [2]:
class colors:
    ok = '\033[92m'
    fail = '\033[91m'
    close = '\033[0m'

In [3]:
TRAINING_SIZE = 100000
SPLIT_RATIO = 0.8
DIGITS = 3
REVERSE = False
MAXLEN = DIGITS + 1 + DIGITS
OUTPUTLEN = DIGITS + 1
chars = '0123456789+ '
HIDDEN_SIZE = 128
BATCH_SIZE = 128
LAYERS = 1

In [4]:
class CharacterTable(object):
    def __init__(self, chars):
        self.chars = sorted(set(chars))
        self.char_indices = dict((c, i) for i, c in enumerate(self.chars))
        self.indices_char = dict((i, c) for i, c in enumerate(self.chars))
    
    def encode(self, C, num_rows):
        x = np.zeros((num_rows, len(self.chars)))
        for i, c in enumerate(C):
            x[i, self.char_indices[c]] = 1
        return x
    
    def decode(self, x, calc_argmax=True):
        if calc_argmax:
            x = x.argmax(axis=-1)
        return "".join(self.indices_char[i] for i in x)

In [5]:
ctable = CharacterTable(chars)

# Data Generation

In [6]:
questions = []
expected = []
seen = set()
print('Generating data...')
while len(questions) < TRAINING_SIZE:
    f = lambda: int(''.join(np.random.choice(list('0123456789')) for i in range(np.random.randint(1, DIGITS + 1))))
    a, b = f(), f()
    key = tuple(sorted((a, b)))
    if key in seen:
        continue
    seen.add(key)
    q = '{}+{}'.format(a, b)
    query = q + ' ' * (MAXLEN - len(q))
    ans = str(a + b)
    ans += ' ' * (OUTPUTLEN - len(ans))
    if REVERSE:
        query = query[::-1]
    questions.append(query)
    expected.append(ans)
print('Total addition questions:', len(questions))

Generating data...
Total addition questions: 100000


In [7]:
print(questions[:5], expected[:5])

['1+56   ', '275+12 ', '3+1    ', '831+1  ', '6+4    '] ['57  ', '287 ', '4   ', '832 ', '10  ']


# Processing

In [8]:
print('Vectorization...')
x = np.zeros((len(questions), MAXLEN, len(chars)), dtype=np.bool)
y = np.zeros((len(expected), OUTPUTLEN, len(chars)), dtype=np.bool)
for i, sentence in enumerate(questions):
    x[i] = ctable.encode(sentence, MAXLEN)
for i, sentence in enumerate(expected):
    y[i] = ctable.encode(sentence, OUTPUTLEN)

Vectorization...


In [9]:
indices = np.arange(len(y))
np.random.shuffle(indices)
x = x[indices]
y = y[indices]

idx = int(SPLIT_RATIO * TRAINING_SIZE)

# train_test_split
train_x = x[:idx]
train_y = y[:idx]
test_x = x[idx:]
test_y = y[idx:]

split_at = len(train_x) - len(train_x) // 10
(x_train, x_val) = train_x[:split_at], train_x[split_at:]
(y_train, y_val) = train_y[:split_at], train_y[split_at:]

print('Training Data:')
print(x_train.shape)
print(y_train.shape)

print('Validation Data:')
print(x_val.shape)
print(y_val.shape)

print('Testing Data:')
print(test_x.shape)
print(test_y.shape)

Training Data:
(72000, 7, 12)
(72000, 4, 12)
Validation Data:
(8000, 7, 12)
(8000, 4, 12)
Testing Data:
(20000, 7, 12)
(20000, 4, 12)


In [10]:
print("input: ", x_train[:3], '\n\n', "label: ", y_train[:3])

input:  [[[False False False False  True False False False False False False
   False]
  [False False False False False False False False False False  True
   False]
  [False False False False False False False False False False False
    True]
  [False  True False False False False False False False False False
   False]
  [False False False False False False False False False  True False
   False]
  [False False False False False  True False False False False False
   False]
  [False False False False  True False False False False False False
   False]]

 [[False False False False False  True False False False False False
   False]
  [False False False False False  True False False False False False
   False]
  [False  True False False False False False False False False False
   False]
  [False False False False False False False  True False False False
   False]
  [False False False False False  True False False False False False
   False]
  [False False False False False False Fal

# Build Model

In [11]:
print('Build model...')

def build_model():
    input_shape = (MAXLEN, len(chars))

    model = Sequential()

    # Encoder:
    model.add(Bidirectional(LSTM(20), input_shape=input_shape))
    model.add(BatchNormalization())

    # The RepeatVector-layer repeats the input n times
    model.add(RepeatVector(OUTPUTLEN))

    # Decoder:
    model.add(Bidirectional(LSTM(20, return_sequences=True)))
    model.add(BatchNormalization())

    model.add(TimeDistributed(Dense(len(chars))))
    model.add(Activation('softmax'))

    model.compile(
        loss='categorical_crossentropy',
        optimizer=Adam(lr=0.01),
        metrics=['accuracy'],
    )

    return model

model = build_model()
print(model.summary())

Build model...
Instructions for updating:
Colocations handled automatically by placer.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
bidirectional_1 (Bidirection (None, 40)                5280      
_________________________________________________________________
batch_normalization_1 (Batch (None, 40)                160       
_________________________________________________________________
repeat_vector_1 (RepeatVecto (None, 4, 40)             0         
_________________________________________________________________
bidirectional_2 (Bidirection (None, 4, 40)             9760      
_________________________________________________________________
batch_normalization_2 (Batch (None, 4, 40)             160       
_________________________________________________________________
time_distributed_1 (TimeDist (None, 4, 12)             492       
_______________________________________________________

# Training

In [12]:
for iteration in range(50):
    print()
    print('-' * 50)
    print('Iteration', iteration)
    history = model.fit(x_train, y_train,
                        batch_size=BATCH_SIZE,
                        epochs=1,
                        validation_data=(x_val, y_val))
    for i in range(10):
        ind = np.random.randint(0, len(x_val))
        rowx, rowy = x_val[np.array([ind])], y_val[np.array([ind])]
        preds = model.predict_classes(rowx, verbose=0)
        q = ctable.decode(rowx[0])
        correct = ctable.decode(rowy[0])
        guess = ctable.decode(preds[0], calc_argmax=False)
        print('Q', q[::-1] if REVERSE else q, end=' ')
        print('T', correct, end=' ')
        if correct == guess:
            print(colors.ok + '☑' + colors.close, end=' ')
        else:
            print(colors.fail + '☒' + colors.close, end=' ')
        print(guess)


--------------------------------------------------
Iteration 0
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.
Train on 72000 samples, validate on 8000 samples
Epoch 1/1
Q 49+418  T 467  [91m☒[0m 426 
Q 941+221 T 1162 [91m☒[0m 1234
Q 1+265   T 266  [91m☒[0m 260 
Q 138+485 T 623  [92m☑[0m 623 
Q 62+129  T 191  [91m☒[0m 290 
Q 33+787  T 820  [91m☒[0m 811 
Q 446+638 T 1084 [91m☒[0m 1010
Q 322+98  T 420  [91m☒[0m 421 
Q 413+49  T 462  [91m☒[0m 470 
Q 155+342 T 497  [91m☒[0m 589 

--------------------------------------------------
Iteration 1
Train on 72000 samples, validate on 8000 samples
Epoch 1/1
Q 315+19  T 334  [91m☒[0m 344 
Q 270+40  T 310  [92m☑[0m 310 
Q 72+316  T 388  [91m☒[0m 398 
Q 26+784  T 810  [92m☑[0m 810 
Q 681+767 T 1448 [91m☒[0m 1549
Q 194+643 T 837  [91m☒[0m 846 
Q 61+636  T 697  [91m☒[0m 707 
Q 135+267 T 402  [91m☒[0m 412 
Q 52+101  T 153  [92m☑[0m 153 
Q

Q 43+60   T 103  [92m☑[0m 103 
Q 760+951 T 1711 [92m☑[0m 1711
Q 30+699  T 729  [91m☒[0m 739 
Q 48+658  T 706  [92m☑[0m 706 
Q 464+71  T 535  [92m☑[0m 535 
Q 40+825  T 865  [92m☑[0m 865 
Q 90+737  T 827  [92m☑[0m 827 
Q 31+92   T 123  [92m☑[0m 123 
Q 239+8   T 247  [92m☑[0m 247 
Q 64+653  T 717  [92m☑[0m 717 

--------------------------------------------------
Iteration 14
Train on 72000 samples, validate on 8000 samples
Epoch 1/1
Q 695+943 T 1638 [92m☑[0m 1638
Q 703+583 T 1286 [92m☑[0m 1286
Q 1+54    T 55   [92m☑[0m 55  
Q 173+81  T 254  [92m☑[0m 254 
Q 545+786 T 1331 [92m☑[0m 1331
Q 741+401 T 1142 [92m☑[0m 1142
Q 69+168  T 237  [92m☑[0m 237 
Q 439+819 T 1258 [92m☑[0m 1258
Q 570+48  T 618  [92m☑[0m 618 
Q 284+71  T 355  [92m☑[0m 355 

--------------------------------------------------
Iteration 15
Train on 72000 samples, validate on 8000 samples
Epoch 1/1
Q 386+968 T 1354 [92m☑[0m 1354
Q 571+70  T 641  [91m☒[0m 651 
Q 18+43   T 61   [92m☑[0

Q 36+692  T 728  [92m☑[0m 728 
Q 382+968 T 1350 [92m☑[0m 1350
Q 827+128 T 955  [92m☑[0m 955 
Q 127+912 T 1039 [92m☑[0m 1039
Q 295+20  T 315  [92m☑[0m 315 
Q 428+73  T 501  [92m☑[0m 501 
Q 289+389 T 678  [92m☑[0m 678 
Q 438+37  T 475  [92m☑[0m 475 
Q 9+537   T 546  [92m☑[0m 546 
Q 307+458 T 765  [92m☑[0m 765 

--------------------------------------------------
Iteration 28
Train on 72000 samples, validate on 8000 samples
Epoch 1/1
Q 387+244 T 631  [92m☑[0m 631 
Q 808+25  T 833  [92m☑[0m 833 
Q 47+82   T 129  [92m☑[0m 129 
Q 838+11  T 849  [92m☑[0m 849 
Q 34+868  T 902  [92m☑[0m 902 
Q 335+871 T 1206 [92m☑[0m 1206
Q 379+48  T 427  [92m☑[0m 427 
Q 163+37  T 200  [92m☑[0m 200 
Q 8+867   T 875  [92m☑[0m 875 
Q 757+11  T 768  [92m☑[0m 768 

--------------------------------------------------
Iteration 29
Train on 72000 samples, validate on 8000 samples
Epoch 1/1
Q 122+399 T 521  [92m☑[0m 521 
Q 716+42  T 758  [92m☑[0m 758 
Q 748+90  T 838  [92m☑[0

Q 195+619 T 814  [92m☑[0m 814 
Q 76+943  T 1019 [92m☑[0m 1019
Q 566+56  T 622  [92m☑[0m 622 
Q 43+198  T 241  [92m☑[0m 241 
Q 45+179  T 224  [92m☑[0m 224 
Q 643+95  T 738  [92m☑[0m 738 
Q 823+925 T 1748 [92m☑[0m 1748
Q 50+598  T 648  [92m☑[0m 648 
Q 901+767 T 1668 [92m☑[0m 1668
Q 575+766 T 1341 [92m☑[0m 1341

--------------------------------------------------
Iteration 42
Train on 72000 samples, validate on 8000 samples
Epoch 1/1
Q 60+528  T 588  [92m☑[0m 588 
Q 28+320  T 348  [92m☑[0m 348 
Q 69+283  T 352  [92m☑[0m 352 
Q 703+49  T 752  [92m☑[0m 752 
Q 304+779 T 1083 [92m☑[0m 1083
Q 266+0   T 266  [92m☑[0m 266 
Q 819+290 T 1109 [92m☑[0m 1109
Q 588+319 T 907  [92m☑[0m 907 
Q 733+911 T 1644 [92m☑[0m 1644
Q 648+43  T 691  [92m☑[0m 691 

--------------------------------------------------
Iteration 43
Train on 72000 samples, validate on 8000 samples
Epoch 1/1
Q 92+445  T 537  [92m☑[0m 537 
Q 7+973   T 980  [92m☑[0m 980 
Q 426+41  T 467  [92m☑[0

# Testing

In [13]:
print("MSG : Prediction")
#####################################################
## Try to test and evaluate your model ##############
## ex. test_x = ["555+175", "860+7  ", "340+29 "]
## ex. test_y = ["730 ", "867 ", "369 "] 
#####################################################
    

MSG : Prediction


In [14]:
pred = model.predict_classes(test_x)

In [15]:
prediction = np.apply_along_axis(ctable.decode, 1, pred, False)

In [16]:
answer = np.apply_along_axis(lambda x: "".join(ctable.indices_char[i] for i in x), 1, test_y.argmax(axis=-1))

In [17]:
np.sum(prediction == answer) / answer.shape[0]

0.98925