In [1]:
from keras.models import Sequential
from keras.layers import LSTM, RepeatVector, Dense, Activation
from keras.layers.wrappers import TimeDistributed, Bidirectional
from keras.layers.normalization import BatchNormalization
from keras.optimizers import Adam
import numpy as np
from six.moves import range
import matplotlib.pyplot as plt

Using TensorFlow backend.


# Parameters Config

In [2]:
class colors:
    ok = '\033[92m'
    fail = '\033[91m'
    close = '\033[0m'

In [3]:
TRAINING_SIZE = 100000
SPLIT_RATIO = 0.8
DIGITS = 3
REVERSE = False
MAXLEN = DIGITS + 1 + DIGITS
OUTPUTLEN = DIGITS
chars = '0123456789- '
HIDDEN_SIZE = 128
BATCH_SIZE = 128
LAYERS = 1

In [4]:
class CharacterTable(object):
    def __init__(self, chars):
        self.chars = sorted(set(chars))
        self.char_indices = dict((c, i) for i, c in enumerate(self.chars))
        self.indices_char = dict((i, c) for i, c in enumerate(self.chars))
    
    def encode(self, C, num_rows):
        x = np.zeros((num_rows, len(self.chars)))
        for i, c in enumerate(C):
            x[i, self.char_indices[c]] = 1
        return x
    
    def decode(self, x, calc_argmax=True):
        if calc_argmax:
            x = x.argmax(axis=-1)
        return "".join(self.indices_char[i] for i in x)

In [5]:
ctable = CharacterTable(chars)

# Data Generation

In [6]:
questions = []
expected = []
seen = set()
print('Generating data...')
while len(questions) < TRAINING_SIZE:
    f = lambda: int(''.join(np.random.choice(list('0123456789')) for i in range(np.random.randint(1, DIGITS + 1))))
    a, b = f(), f()
    if a < b:
        a, b = b, a
    key = tuple(sorted((a, b)))
    if key in seen:
        continue
    seen.add(key)
    q = '{}-{}'.format(a, b)
    query = q + ' ' * (MAXLEN - len(q))
    ans = str(a - b)
    ans += ' ' * (OUTPUTLEN - len(ans))
    if REVERSE:
        query = query[::-1]
    questions.append(query)
    expected.append(ans)
print('Total addition questions:', len(questions))

Generating data...
Total addition questions: 80000


In [7]:
print(questions[:5], expected[:5])

['28-2   ', '522-5  ', '82-4   ', '46-0   ', '651-72 '] ['26 ', '517', '78 ', '46 ', '579']


# Processing

In [8]:
print('Vectorization...')
x = np.zeros((len(questions), MAXLEN, len(chars)), dtype=np.bool)
y = np.zeros((len(expected), OUTPUTLEN, len(chars)), dtype=np.bool)
for i, sentence in enumerate(questions):
    x[i] = ctable.encode(sentence, MAXLEN)
for i, sentence in enumerate(expected):
    y[i] = ctable.encode(sentence, OUTPUTLEN)

Vectorization...


In [9]:
indices = np.arange(len(y))
np.random.shuffle(indices)
x = x[indices]
y = y[indices]

idx = int(SPLIT_RATIO * TRAINING_SIZE)

# train_test_split
train_x = x[:idx]
train_y = y[:idx]
test_x = x[idx:]
test_y = y[idx:]

split_at = len(train_x) - len(train_x) // 10
(x_train, x_val) = train_x[:split_at], train_x[split_at:]
(y_train, y_val) = train_y[:split_at], train_y[split_at:]

print('Training Data:')
print(x_train.shape)
print(y_train.shape)

print('Validation Data:')
print(x_val.shape)
print(y_val.shape)

print('Testing Data:')
print(test_x.shape)
print(test_y.shape)

Training Data:
(36000, 7, 12)
(36000, 3, 12)
Validation Data:
(4000, 7, 12)
(4000, 3, 12)
Testing Data:
(40000, 7, 12)
(40000, 3, 12)


In [10]:
print("input: ", x_train[:3], '\n\n', "label: ", y_train[:3])

input:  [[[False False False False False False  True False False False False
   False]
  [False False False False  True False False False False False False
   False]
  [False False False  True False False False False False False False
   False]
  [False  True False False False False False False False False False
   False]
  [False False False False  True False False False False False False
   False]
  [False False False False False False False False  True False False
   False]
  [ True False False False False False False False False False False
   False]]

 [[False False False False False False False False False False  True
   False]
  [False False  True False False False False False False False False
   False]
  [False False False  True False False False False False False False
   False]
  [False  True False False False False False False False False False
   False]
  [False False False False False  True False False False False False
   False]
  [False False False False  True False Fal

# Build Model

In [11]:
print('Build model...')

def build_model():
    input_shape = (MAXLEN, len(chars))

    model = Sequential()

    # Encoder:
    model.add(Bidirectional(LSTM(20), input_shape=input_shape))
    model.add(BatchNormalization())

    # The RepeatVector-layer repeats the input n times
    model.add(RepeatVector(OUTPUTLEN))

    # Decoder:
    model.add(Bidirectional(LSTM(20, return_sequences=True)))
    model.add(BatchNormalization())

    model.add(TimeDistributed(Dense(len(chars))))
    model.add(Activation('softmax'))

    model.compile(
        loss='categorical_crossentropy',
        optimizer=Adam(lr=0.01),
        metrics=['accuracy'],
    )

    return model

model = build_model()
print(model.summary())

Build model...
Instructions for updating:
Colocations handled automatically by placer.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
bidirectional_1 (Bidirection (None, 40)                5280      
_________________________________________________________________
batch_normalization_1 (Batch (None, 40)                160       
_________________________________________________________________
repeat_vector_1 (RepeatVecto (None, 3, 40)             0         
_________________________________________________________________
bidirectional_2 (Bidirection (None, 3, 40)             9760      
_________________________________________________________________
batch_normalization_2 (Batch (None, 3, 40)             160       
_________________________________________________________________
time_distributed_1 (TimeDist (None, 3, 12)             492       
_______________________________________________________

# Training

In [12]:
for iteration in range(100):
    print()
    print('-' * 50)
    print('Iteration', iteration)
    history = model.fit(x_train, y_train,
                        batch_size=BATCH_SIZE,
                        epochs=1,
                        validation_data=(x_val, y_val))
    for i in range(10):
        ind = np.random.randint(0, len(x_val))
        rowx, rowy = x_val[np.array([ind])], y_val[np.array([ind])]
        preds = model.predict_classes(rowx, verbose=0)
        q = ctable.decode(rowx[0])
        correct = ctable.decode(rowy[0])
        guess = ctable.decode(preds[0], calc_argmax=False)
        print('Q', q[::-1] if REVERSE else q, end=' ')
        print('T', correct, end=' ')
        if correct == guess:
            print(colors.ok + '☑' + colors.close, end=' ')
        else:
            print(colors.fail + '☒' + colors.close, end=' ')
        print(guess)


--------------------------------------------------
Iteration 0
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 567-526 T 41  [91m☒[0m 119
Q 992-31  T 961 [91m☒[0m 997
Q 464-19  T 445 [91m☒[0m 435
Q 166-78  T 88  [91m☒[0m 90 
Q 693-148 T 545 [91m☒[0m 557
Q 396-72  T 324 [91m☒[0m 300
Q 931-678 T 253 [91m☒[0m 333
Q 195-95  T 100 [92m☑[0m 100
Q 612-143 T 469 [91m☒[0m 577
Q 610-119 T 491 [91m☒[0m 409

--------------------------------------------------
Iteration 1
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 766-4   T 762 [91m☒[0m 758
Q 871-60  T 811 [91m☒[0m 891
Q 712-214 T 498 [91m☒[0m 478
Q 781-91  T 690 [91m☒[0m 688
Q 89-16   T 73  [91m☒[0m 71 
Q 757-128 T 629 [91m☒[0m 619
Q 425-63  T 362 [91m☒[0m 356
Q 590-396 T 194 [91m☒[0m 122
Q 876-58  T 818 [91m☒[0m 808
Q 744-613 T 131 [91m☒[0m 111

-------

Q 427-92  T 335 [92m☑[0m 335
Q 865-153 T 712 [91m☒[0m 702
Q 658-88  T 570 [91m☒[0m 560
Q 314-25  T 289 [92m☑[0m 289
Q 484-65  T 419 [92m☑[0m 419
Q 851-99  T 752 [92m☑[0m 752
Q 949-927 T 22  [92m☑[0m 22 
Q 124-72  T 52  [92m☑[0m 52 
Q 693-38  T 655 [92m☑[0m 655
Q 79-71   T 8   [92m☑[0m 8  

--------------------------------------------------
Iteration 15
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 171-54  T 117 [92m☑[0m 117
Q 51-22   T 29  [92m☑[0m 29 
Q 999-2   T 997 [92m☑[0m 997
Q 708-69  T 639 [92m☑[0m 639
Q 142-39  T 103 [92m☑[0m 103
Q 231-60  T 171 [92m☑[0m 171
Q 342-19  T 323 [92m☑[0m 323
Q 465-11  T 454 [92m☑[0m 454
Q 818-218 T 600 [92m☑[0m 600
Q 849-55  T 794 [92m☑[0m 794

--------------------------------------------------
Iteration 16
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 605-35  T 570 [92m☑[0m 570
Q 728-1   T 727 [92m☑[0m 727
Q 119-12  T 107 [92m☑[0m 107
Q 332-22  T 310 [92m☑[0m 310
Q 790-41

Q 404-97  T 307 [92m☑[0m 307
Q 95-57   T 38  [92m☑[0m 38 
Q 728-3   T 725 [92m☑[0m 725
Q 675-49  T 626 [92m☑[0m 626
Q 345-80  T 265 [92m☑[0m 265
Q 804-12  T 792 [92m☑[0m 792
Q 932-11  T 921 [92m☑[0m 921
Q 567-67  T 500 [92m☑[0m 500
Q 794-56  T 738 [92m☑[0m 738
Q 593-42  T 551 [92m☑[0m 551

--------------------------------------------------
Iteration 30
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 618-61  T 557 [92m☑[0m 557
Q 585-93  T 492 [92m☑[0m 492
Q 822-68  T 754 [92m☑[0m 754
Q 145-137 T 8   [92m☑[0m 8  
Q 956-50  T 906 [92m☑[0m 906
Q 576-44  T 532 [92m☑[0m 532
Q 326-4   T 322 [92m☑[0m 322
Q 528-3   T 525 [92m☑[0m 525
Q 763-94  T 669 [92m☑[0m 669
Q 755-389 T 366 [92m☑[0m 366

--------------------------------------------------
Iteration 31
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 43-23   T 20  [92m☑[0m 20 
Q 698-10  T 688 [92m☑[0m 688
Q 960-23  T 937 [92m☑[0m 937
Q 439-71  T 368 [92m☑[0m 368
Q 282-0 

Q 535-31  T 504 [92m☑[0m 504
Q 732-551 T 181 [92m☑[0m 181
Q 922-91  T 831 [92m☑[0m 831
Q 798-307 T 491 [92m☑[0m 491
Q 25-4    T 21  [92m☑[0m 21 
Q 584-10  T 574 [92m☑[0m 574
Q 168-6   T 162 [92m☑[0m 162
Q 899-439 T 460 [92m☑[0m 460
Q 498-53  T 445 [92m☑[0m 445
Q 917-584 T 333 [92m☑[0m 333

--------------------------------------------------
Iteration 45
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 207-63  T 144 [92m☑[0m 144
Q 595-0   T 595 [92m☑[0m 595
Q 375-53  T 322 [92m☑[0m 322
Q 599-95  T 504 [92m☑[0m 504
Q 648-15  T 633 [92m☑[0m 633
Q 746-18  T 728 [92m☑[0m 728
Q 353-45  T 308 [92m☑[0m 308
Q 793-453 T 340 [92m☑[0m 340
Q 970-63  T 907 [92m☑[0m 907
Q 606-73  T 533 [92m☑[0m 533

--------------------------------------------------
Iteration 46
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 886-1   T 885 [92m☑[0m 885
Q 563-6   T 557 [92m☑[0m 557
Q 613-421 T 192 [92m☑[0m 192
Q 951-16  T 935 [92m☑[0m 935
Q 639-16

Q 167-6   T 161 [92m☑[0m 161
Q 545-216 T 329 [92m☑[0m 329
Q 942-57  T 885 [92m☑[0m 885
Q 539-119 T 420 [92m☑[0m 420
Q 96-65   T 31  [92m☑[0m 31 
Q 561-15  T 546 [92m☑[0m 546
Q 209-123 T 86  [91m☒[0m 84 
Q 114-76  T 38  [92m☑[0m 38 
Q 495-8   T 487 [92m☑[0m 487
Q 596-283 T 313 [92m☑[0m 313

--------------------------------------------------
Iteration 60
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 492-5   T 487 [92m☑[0m 487
Q 462-75  T 387 [92m☑[0m 387
Q 265-47  T 218 [92m☑[0m 218
Q 776-481 T 295 [92m☑[0m 295
Q 808-70  T 738 [92m☑[0m 738
Q 965-857 T 108 [92m☑[0m 108
Q 753-48  T 705 [92m☑[0m 705
Q 758-4   T 754 [92m☑[0m 754
Q 924-12  T 912 [92m☑[0m 912
Q 837-8   T 829 [92m☑[0m 829

--------------------------------------------------
Iteration 61
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 937-5   T 932 [92m☑[0m 932
Q 503-272 T 231 [92m☑[0m 231
Q 334-277 T 57  [92m☑[0m 57 
Q 902-643 T 259 [92m☑[0m 259
Q 974-75

Q 868-52  T 816 [92m☑[0m 816
Q 630-485 T 145 [92m☑[0m 145
Q 809-391 T 418 [92m☑[0m 418
Q 946-60  T 886 [92m☑[0m 886
Q 597-70  T 527 [92m☑[0m 527
Q 827-251 T 576 [92m☑[0m 576
Q 218-119 T 99  [92m☑[0m 99 
Q 735-4   T 731 [92m☑[0m 731
Q 405-68  T 337 [92m☑[0m 337
Q 737-501 T 236 [92m☑[0m 236

--------------------------------------------------
Iteration 75
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 519-14  T 505 [92m☑[0m 505
Q 562-78  T 484 [92m☑[0m 484
Q 586-42  T 544 [92m☑[0m 544
Q 470-52  T 418 [92m☑[0m 418
Q 73-63   T 10  [92m☑[0m 10 
Q 547-51  T 496 [92m☑[0m 496
Q 468-278 T 190 [92m☑[0m 190
Q 667-360 T 307 [92m☑[0m 307
Q 546-101 T 445 [92m☑[0m 445
Q 809-695 T 114 [92m☑[0m 114

--------------------------------------------------
Iteration 76
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 714-99  T 615 [92m☑[0m 615
Q 145-137 T 8   [92m☑[0m 8  
Q 539-119 T 420 [92m☑[0m 420
Q 596-20  T 576 [92m☑[0m 576
Q 33-3  

Q 836-89  T 747 [92m☑[0m 747
Q 773-418 T 355 [92m☑[0m 355
Q 939-176 T 763 [92m☑[0m 763
Q 81-59   T 22  [92m☑[0m 22 
Q 40-17   T 23  [92m☑[0m 23 
Q 51-33   T 18  [92m☑[0m 18 
Q 807-436 T 371 [92m☑[0m 371
Q 681-11  T 670 [92m☑[0m 670
Q 665-35  T 630 [92m☑[0m 630
Q 902-742 T 160 [92m☑[0m 160

--------------------------------------------------
Iteration 90
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 368-211 T 157 [91m☒[0m 257
Q 244-77  T 167 [92m☑[0m 167
Q 208-83  T 125 [92m☑[0m 125
Q 808-177 T 631 [91m☒[0m 621
Q 661-582 T 79  [91m☒[0m 169
Q 359-5   T 354 [92m☑[0m 354
Q 319-55  T 264 [92m☑[0m 264
Q 858-774 T 84  [91m☒[0m 86 
Q 610-503 T 107 [92m☑[0m 107
Q 463-79  T 384 [92m☑[0m 384

--------------------------------------------------
Iteration 91
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 997-73  T 924 [92m☑[0m 924
Q 851-792 T 59  [92m☑[0m 59 
Q 386-41  T 345 [92m☑[0m 345
Q 643-175 T 468 [92m☑[0m 468
Q 543-77

# Testing

In [15]:
print("MSG : Prediction")

MSG : Prediction


In [16]:
pred = model.predict_classes(test_x)

In [17]:
prediction = np.apply_along_axis(ctable.decode, 1, pred, False)

In [18]:
answer = np.apply_along_axis(lambda x: "".join(ctable.indices_char[i] for i in x), 1, test_y.argmax(axis=-1))

In [19]:
np.sum(prediction == answer) / answer.shape[0]

0.983825