# DSAI HW2: Adder & Subtractor Practice by LSTM     
## Chien, Hsin Yen
### RE6071088, Institute of Data Science  

Data Generation: 80000 data for adder, 80000 data for subtractor  
Digits available: <= 3  
LSTM:  
1 hidden layer  
Hidden layer size = 128  
Batch size = 128  
training epoch = 100  

# Import package

In [1]:
from keras.models import Sequential
from keras import layers
from keras.optimizers import RMSprop
import numpy as np
from six.moves import range
import random

Using TensorFlow backend.


# Parameters Config

In [2]:
class colors:
    ok = '\033[92m'
    fail = '\033[91m'
    close = '\033[0m'

In [3]:
TRAINING_SIZE =160000
DIGITS = 3
REVERSE = False
MAXLEN = DIGITS + 1 + DIGITS
chars = '0123456789+- '
RNN = layers.LSTM
HIDDEN_SIZE = 128
BATCH_SIZE = 128
LAYERS = 1

In [4]:
class CharacterTable(object):
    def __init__(self, chars):
        self.chars = sorted(set(chars))
        self.char_indices = dict((c, i) for i, c in enumerate(self.chars))
        self.indices_char = dict((i, c) for i, c in enumerate(self.chars))
    
    def encode(self, C, num_rows):
        x = np.zeros((num_rows, len(self.chars)))
        for i, c in enumerate(C):
            x[i, self.char_indices[c]] = 1
        return x
    
    def decode(self, x, calc_argmax=True):
        if calc_argmax:
            x = x.argmax(axis=-1)
        return "".join(self.indices_char[i] for i in x)

In [5]:
ctable = CharacterTable(chars)

In [6]:
ctable.indices_char

{0: ' ',
 1: '+',
 2: '-',
 3: '0',
 4: '1',
 5: '2',
 6: '3',
 7: '4',
 8: '5',
 9: '6',
 10: '7',
 11: '8',
 12: '9'}

# Data Generation

### Generating data for adder

In [7]:
questions = []
expected = []
seen = set()
print('Generating data...')
while len(questions) < TRAINING_SIZE/2:
    f = lambda: int(''.join(np.random.choice(list('0123456789')) for i in range(np.random.randint(1, DIGITS + 1))))
    a, b = f(), f()
    key = tuple(sorted((a, b)))
    if key in seen:
        continue
    seen.add(key)
    q = '{}+{}'.format(a, b)
    query = q + ' ' * (MAXLEN - len(q))
    ans = str(a + b)
    ans += ' ' * (DIGITS + 1 - len(ans))
    if REVERSE:
        query = query[::-1]
    questions.append(query)
    expected.append(ans)

Generating data...


### Generating data for subtractor

In [8]:
while len(questions) < TRAINING_SIZE:
    f = lambda: int(''.join(np.random.choice(list('0123456789')) for i in range(np.random.randint(1, DIGITS + 1))))
    a, b = f(), f()
    key = tuple(sorted((a, b)))
    if key in seen:
        continue
    seen.add(key)
    q = '{}-{}'.format(a, b)
    query = q + ' ' * (MAXLEN - len(q))
    ans = str(a - b)
    ans += ' ' * (DIGITS + 1 - len(ans))
    if REVERSE:
        query = query[::-1]
    questions.append(query)
    expected.append(ans)
print('Total questions:', len(questions))

Total questions: 160000


In [9]:
print(questions[:3], expected[:3])
print(questions[150000:150003], expected[150000:150003])

['8+0    ', '502+976', '9+570  '] ['8   ', '1478', '579 ']
['537-724', '160-752', '33-620 '] ['-187', '-592', '-587']


# Processing

In [10]:
print('Vectorization...')
x = np.zeros((len(questions), MAXLEN, len(chars)), dtype=np.bool)
y = np.zeros((len(expected), DIGITS + 1, len(chars)), dtype=np.bool)
for i, sentence in enumerate(questions):
    x[i] = ctable.encode(sentence, MAXLEN)
for i, sentence in enumerate(expected):
    y[i] = ctable.encode(sentence, DIGITS + 1)

Vectorization...


In [11]:
indices = np.arange(len(y))
np.random.shuffle(indices)
x = x[indices]
y = y[indices]

# train_test_split
train_x = np.concatenate((x[:20000], x[80000:100000]), axis=0)
train_y = np.concatenate((y[:20000], y[80000:100000]),axis=0)
test_x = np.concatenate((x[20000:80000], x[100000:]),axis=0)
test_y = np.concatenate((y[20000:80000], y[100000:]),axis=0)

split_at = len(train_x) - len(train_x) // 10
(x_train, x_val) = train_x[:split_at], train_x[split_at:]
(y_train, y_val) = train_y[:split_at], train_y[split_at:]

print('Training Data:')
print(x_train.shape)
print(y_train.shape)

print('Validation Data:')
print(x_val.shape)
print(y_val.shape)

print('Testing Data:')
print(test_x.shape)
print(test_y.shape)

Training Data:
(36000, 7, 13)
(36000, 4, 13)
Validation Data:
(4000, 7, 13)
(4000, 4, 13)
Testing Data:
(120000, 7, 13)
(120000, 4, 13)


In [12]:
print("input: ", x_train[:3], '\n\n', "label: ", y_train[:3])

input:  [[[False False False False False False False False False False False
   False  True]
  [False False False False False False False False False False False
   False  True]
  [False False False False False  True False False False False False
   False False]
  [False  True False False False False False False False False False
   False False]
  [False False False False False  True False False False False False
   False False]
  [False False False False False  True False False False False False
   False False]
  [ True False False False False False False False False False False
   False False]]

 [[False False False False False False False False False False False
   False  True]
  [False False False False False False False False  True False False
   False False]
  [False False False False False False False False False  True False
   False False]
  [False  True False False False False False False False False False
   False False]
  [False False False False False False False False Fals

# Build Model

In [13]:
print('Build model...')

# Initialising the RNN
model = Sequential()
model.add(RNN(HIDDEN_SIZE, input_shape=(MAXLEN, len(chars))))
model.add(layers.RepeatVector(DIGITS + 1))
for i in range(LAYERS):
    model.add(RNN(HIDDEN_SIZE, return_sequences=True))
model.add(layers.TimeDistributed(layers.Dense(len(chars), activation='softmax')))

model.compile(loss='categorical_crossentropy', optimizer=RMSprop(), metrics=['accuracy'])

model.summary()

Build model...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 128)               72704     
_________________________________________________________________
repeat_vector_1 (RepeatVecto (None, 4, 128)            0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 4, 128)            131584    
_________________________________________________________________
time_distributed_1 (TimeDist (None, 4, 13)             1677      
Total params: 205,965
Trainable params: 205,965
Non-trainable params: 0
_________________________________________________________________


# Training
Combine adder and substractor  
Training epoch = 100  

In [14]:
for iteration in range(100):
    print()
    print('-' * 50)
    print('Iteration', iteration)
    model.fit(x_train, y_train,
              batch_size=BATCH_SIZE,
              epochs=1,
              validation_data=(x_val, y_val))
    for i in range(10):
        ind = np.random.randint(0, len(x_val))
        rowx, rowy = x_val[np.array([ind])], y_val[np.array([ind])]
        preds = model.predict_classes(rowx, verbose=0)
        q = ctable.decode(rowx[0])
        correct = ctable.decode(rowy[0])
        guess = ctable.decode(preds[0], calc_argmax=False)
        print('Q', q[::-1] if REVERSE else q, end=' ')
        print('T', correct, end=' ')
        if correct == guess:
            print(colors.ok + '☑' + colors.close, end=' ')
        else:
            print(colors.fail + '☒' + colors.close, end=' ')
        print(guess)


--------------------------------------------------
Iteration 0
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 459+764 T 1223 [91m☒[0m 110 
Q 760-83  T 677  [91m☒[0m 13  
Q 378-940 T -562 [91m☒[0m -13 
Q 738-601 T 137  [91m☒[0m 127 
Q 905-949 T -44  [91m☒[0m 113 
Q 143+254 T 397  [91m☒[0m 163 
Q 332-34  T 298  [91m☒[0m -3  
Q 66+829  T 895  [91m☒[0m 100 
Q 66+634  T 700  [91m☒[0m 136 
Q 32+941  T 973  [91m☒[0m 136 

--------------------------------------------------
Iteration 1
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 836+477 T 1313 [91m☒[0m 116 
Q 575-237 T 338  [91m☒[0m 21  
Q 245+311 T 556  [91m☒[0m 111 
Q 36+386  T 422  [91m☒[0m 116 
Q 856-90  T 766  [91m☒[0m 21  
Q 91-178  T -87  [91m☒[0m -21 
Q 197+733 T 930  [91m☒[0m 116 
Q 81+925  T 1006 [91m☒[0m 106 
Q 785+465 T 1250 [91m☒[0m 1111
Q 431-844 T -413 [91m☒[0m -11 

--------------------------------------------------
Iteration 2
Train on 36000 samples, valida

Q 126-371 T -245 [91m☒[0m -219
Q 78+582  T 660  [91m☒[0m 667 
Q 77+208  T 285  [91m☒[0m 237 
Q 73+89   T 162  [91m☒[0m 148 
Q 2+802   T 804  [91m☒[0m 813 
Q 672-28  T 644  [91m☒[0m 579 
Q 970-634 T 336  [91m☒[0m 298 
Q 267-28  T 239  [91m☒[0m 269 
Q 828-258 T 570  [91m☒[0m 573 
Q 607+739 T 1346 [91m☒[0m 1311

--------------------------------------------------
Iteration 15
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 226+36  T 262  [91m☒[0m 278 
Q 217+702 T 919  [91m☒[0m 922 
Q 474-88  T 386  [91m☒[0m 388 
Q 82-623  T -541 [91m☒[0m -551
Q 479+838 T 1317 [91m☒[0m 1366
Q 549-762 T -213 [91m☒[0m -211
Q 212-603 T -391 [91m☒[0m -418
Q 93-731  T -638 [91m☒[0m -652
Q 408-716 T -308 [91m☒[0m -335
Q 596+42  T 638  [91m☒[0m 613 

--------------------------------------------------
Iteration 16
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 891+50  T 941  [91m☒[0m 900 
Q 4+464   T 468  [91m☒[0m 477 
Q 42+752  T 794  [91m☒[0

Q 676-188 T 488  [91m☒[0m 593 
Q 509+71  T 580  [91m☒[0m 581 
Q 69-754  T -685 [91m☒[0m -689
Q 851+213 T 1064 [91m☒[0m 1057
Q 464-174 T 290  [91m☒[0m 273 
Q 132+219 T 351  [92m☑[0m 351 
Q 263+464 T 727  [91m☒[0m 729 
Q 52+279  T 331  [91m☒[0m 321 
Q 606-891 T -285 [91m☒[0m -271
Q 166-125 T 41   [91m☒[0m 44  

--------------------------------------------------
Iteration 29
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 918+564 T 1482 [91m☒[0m 1377
Q 869-21  T 848  [91m☒[0m 746 
Q 175-893 T -718 [91m☒[0m -710
Q 986-589 T 397  [91m☒[0m 490 
Q 618+89  T 707  [91m☒[0m 606 
Q 385-751 T -366 [91m☒[0m -370
Q 421+78  T 499  [91m☒[0m 401 
Q 757+8   T 765  [91m☒[0m 763 
Q 760-83  T 677  [91m☒[0m 681 
Q 124+90  T 214  [91m☒[0m 204 

--------------------------------------------------
Iteration 30
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 39+9    T 48   [91m☒[0m 59  
Q 436+47  T 483  [91m☒[0m 581 
Q 454+370 T 824  [91m☒[0

Q 269-885 T -616 [91m☒[0m -618
Q 875+16  T 891  [91m☒[0m 895 
Q 94+690  T 784  [91m☒[0m 775 
Q 841+53  T 894  [91m☒[0m 896 
Q 919+79  T 998  [91m☒[0m 990 
Q 26+962  T 988  [91m☒[0m 998 
Q 870+420 T 1290 [91m☒[0m 1280
Q 803-386 T 417  [91m☒[0m 416 
Q 709+259 T 968  [91m☒[0m 970 
Q 511+37  T 548  [91m☒[0m 557 

--------------------------------------------------
Iteration 43
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 29+439  T 468  [91m☒[0m 467 
Q 339-220 T 119  [91m☒[0m 116 
Q 681-176 T 505  [91m☒[0m 405 
Q 939-334 T 605  [92m☑[0m 605 
Q 462+617 T 1079 [91m☒[0m 1066
Q 58+336  T 394  [92m☑[0m 394 
Q 664+3   T 667  [92m☑[0m 667 
Q 886-964 T -78  [91m☒[0m -95 
Q 518-147 T 371  [91m☒[0m 375 
Q 33+184  T 217  [92m☑[0m 217 

--------------------------------------------------
Iteration 44
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 885-129 T 756  [91m☒[0m 750 
Q 44-664  T -620 [91m☒[0m -619
Q 121-288 T -167 [91m☒[0

Q 8+642   T 650  [92m☑[0m 650 
Q 993-207 T 786  [91m☒[0m 686 
Q 89+679  T 768  [92m☑[0m 768 
Q 675+90  T 765  [92m☑[0m 765 
Q 978-782 T 196  [91m☒[0m 185 
Q 54-337  T -283 [91m☒[0m -284
Q 347+5   T 352  [92m☑[0m 352 
Q 821-25  T 796  [92m☑[0m 796 
Q 25-782  T -757 [91m☒[0m -758
Q 275-521 T -246 [92m☑[0m -246

--------------------------------------------------
Iteration 57
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 258-568 T -310 [91m☒[0m -300
Q 891+309 T 1200 [91m☒[0m 1210
Q 147-556 T -409 [92m☑[0m -409
Q 24+762  T 786  [91m☒[0m 785 
Q 606-891 T -285 [91m☒[0m -275
Q 133-504 T -371 [92m☑[0m -371
Q 75-936  T -861 [91m☒[0m -851
Q 267+493 T 760  [91m☒[0m 750 
Q 13+65   T 78   [92m☑[0m 78  
Q 251-154 T 97   [91m☒[0m 98  

--------------------------------------------------
Iteration 58
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 94-202  T -108 [91m☒[0m -118
Q 606-891 T -285 [92m☑[0m -285
Q 45+168  T 213  [92m☑[0

Q 716-28  T 688  [92m☑[0m 688 
Q 577-250 T 327  [92m☑[0m 327 
Q 243-720 T -477 [92m☑[0m -477
Q 124-36  T 88   [92m☑[0m 88  
Q 777-564 T 213  [92m☑[0m 213 
Q 388+64  T 452  [92m☑[0m 452 
Q 858+801 T 1659 [91m☒[0m 1668
Q 313+767 T 1080 [92m☑[0m 1080
Q 33-648  T -615 [92m☑[0m -615
Q 407+713 T 1120 [92m☑[0m 1120

--------------------------------------------------
Iteration 71
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 99+916  T 1015 [92m☑[0m 1015
Q 82+748  T 830  [92m☑[0m 830 
Q 284-119 T 165  [92m☑[0m 165 
Q 52-923  T -871 [91m☒[0m -872
Q 46-792  T -746 [92m☑[0m -746
Q 911+30  T 941  [92m☑[0m 941 
Q 56+60   T 116  [92m☑[0m 116 
Q 7+502   T 509  [91m☒[0m 510 
Q 509+50  T 559  [91m☒[0m 569 
Q 263+464 T 727  [92m☑[0m 727 

--------------------------------------------------
Iteration 72
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 384-250 T 134  [92m☑[0m 134 
Q 297-486 T -189 [92m☑[0m -189
Q 755-312 T 443  [92m☑[0

Q 999-553 T 446  [92m☑[0m 446 
Q 824-168 T 656  [92m☑[0m 656 
Q 873+62  T 935  [92m☑[0m 935 
Q 775-73  T 702  [92m☑[0m 702 
Q 147-859 T -712 [92m☑[0m -712
Q 70-964  T -894 [92m☑[0m -894
Q 874+91  T 965  [92m☑[0m 965 
Q 940+479 T 1419 [92m☑[0m 1419
Q 756-92  T 664  [92m☑[0m 664 
Q 20+582  T 602  [92m☑[0m 602 

--------------------------------------------------
Iteration 85
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 88+163  T 251  [92m☑[0m 251 
Q 366+946 T 1312 [92m☑[0m 1312
Q 568-888 T -320 [91m☒[0m -310
Q 851+255 T 1106 [92m☑[0m 1106
Q 482-784 T -302 [91m☒[0m -303
Q 24-940  T -916 [92m☑[0m -916
Q 48-138  T -90  [91m☒[0m -99 
Q 607+872 T 1479 [91m☒[0m 1478
Q 186-118 T 68   [92m☑[0m 68  
Q 90-210  T -120 [91m☒[0m -111

--------------------------------------------------
Iteration 86
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 88+605  T 693  [92m☑[0m 693 
Q 386+748 T 1134 [92m☑[0m 1134
Q 12+523  T 535  [92m☑[0

Q 360-584 T -224 [92m☑[0m -224
Q 812+250 T 1062 [91m☒[0m 1742
Q 86-664  T -578 [92m☑[0m -578
Q 14+824  T 838  [92m☑[0m 838 
Q 53-971  T -918 [92m☑[0m -918
Q 83-544  T -461 [92m☑[0m -461
Q 7+99    T 106  [92m☑[0m 106 
Q 768-611 T 157  [92m☑[0m 157 
Q 260-668 T -408 [92m☑[0m -408
Q 521+84  T 605  [92m☑[0m 605 

--------------------------------------------------
Iteration 99
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 716-28  T 688  [92m☑[0m 688 
Q 68+114  T 182  [92m☑[0m 182 
Q 73-672  T -599 [91m☒[0m -609
Q 970+434 T 1404 [91m☒[0m 1304
Q 899-875 T 24   [91m☒[0m 14  
Q 626-946 T -320 [92m☑[0m -320
Q 858+573 T 1431 [91m☒[0m 1331
Q 965-753 T 212  [92m☑[0m 212 
Q 80+239  T 319  [92m☑[0m 319 
Q 14-461  T -447 [92m☑[0m -447


# Testing 1 (by test data)

In [17]:
print("MSG : Prediction")
preds = model.predict_classes(test_x)
for i in range(10):
    q = ctable.decode(test_x[i])
    correct = ctable.decode(test_y[i])
    guess = ctable.decode(preds[i], calc_argmax=False)
    print('Q', q[::-1] if REVERSE else q, end=' ')
    print('T', correct, end=' ')
    if correct == guess:
        print(colors.ok + '☑' + colors.close, end=' ')
    else:
        print(colors.fail + '☒' + colors.close, end=' ')
    print(guess)

MSG : Prediction
Q 684+55  T 739  [92m☑[0m 739 
Q 6+674   T 680  [92m☑[0m 680 
Q 345-842 T -497 [92m☑[0m -497
Q 752-133 T 619  [91m☒[0m 629 
Q 988+227 T 1215 [92m☑[0m 1215
Q 606-94  T 512  [91m☒[0m 511 
Q 728+48  T 776  [92m☑[0m 776 
Q 913-55  T 858  [91m☒[0m 868 
Q 45+21   T 66   [92m☑[0m 66  
Q 708+33  T 741  [92m☑[0m 741 


# Testing 2 (by new question)
Q: 760+172, 529+39 , 227-530, 866+777, 10-879 , 630-342, 235-111, 688+524, 999+166

In [66]:
newQ = ['760+172', '529+39 ', '227-530', '866+777','10-879 ','630-342', '235-111','688+524', '999+166']
newA = ['932', '568 ', '-303', '1643', '-869', '288', '124', '1212', '1165']
print('Vectorization...')
x = np.zeros((len(newQ), MAXLEN, len(chars)), dtype=np.bool)
y = np.zeros((len(newA), DIGITS + 1, len(chars)), dtype=np.bool)
for i, sentence in enumerate(newQ):
    x[i] = ctable.encode(sentence, MAXLEN)
for i, sentence in enumerate(newA):
    y[i] = ctable.encode(sentence, DIGITS + 1)
    
print("MSG : Prediction")
for i in range(len(newQ)):
    preds = model.predict_classes(x)
    q = ctable.decode(x[i])
    correct = ctable.decode(y[i])
    guess = ctable.decode(preds[i], calc_argmax=False)
    print('Q', q[::-1] if REVERSE else q, end=' ')
    print('T', correct, end=' ')
    if correct == guess:
        print(colors.ok + '☑' + colors.close, end=' ')
    else:
        print(colors.fail + '☒' + colors.close, end=' ')
    print(guess)

Vectorization...
MSG : Prediction
Q 760+172 T 932  [92m☑[0m 932 
Q 529+39  T 568  [92m☑[0m 568 
Q 227-530 T -303 [92m☑[0m -303
Q 866+777 T 1643 [92m☑[0m 1643
Q 10-879  T -869 [92m☑[0m -869
Q 630-342 T 288  [92m☑[0m 288 
Q 235-111 T 124  [92m☑[0m 124 
Q 688+524 T 1212 [92m☑[0m 1212
Q 999+166 T 1165 [92m☑[0m 1165
