# DSAI-HW3

## Import package

In [25]:
from keras.models import Sequential
from keras import layers
from keras.layers import LSTM, TimeDistributed, Dense, RepeatVector, Activation, BatchNormalization,Bidirectional
from keras.models import load_model
import numpy as np
from six.moves import range

## Parameters Config

In [2]:
class colors:
    ok = '\033[92m'
    fail = '\033[91m'
    close = '\033[0m'

In [3]:
TRAINING_SIZE = 80000
DIGITS = 3
REVERSE = False
MAXLEN = DIGITS + 1 + DIGITS
chars = '0123456789-+ '
RNN = layers.LSTM
HIDDEN_SIZE = 128
BATCH_SIZE = 128
LAYERS = 1

## Data Generation

In [18]:
%%time
data = []
label = []
seen = set()

print('Generating data...')
while len(data) < TRAINING_SIZE:
    f = lambda: int(''.join(np.random.choice(list('0123456789')) for i in range(np.random.randint(1, DIGITS + 1))))
    a, b = f(), f()
    if(a<b):
        a,b = b,a
    operator = np.random.choice(list('+-'))    
    q = str(a) + operator + str(b)
    if(q not in seen):
        query = q + ' ' * (MAXLEN - len(q))
        seen.add(query)
        data.append(query)
        if(operator == "+"):
            ans = str(a+b)
        else:
            ans = str(a-b)
        ans += ' '* (DIGITS + 1 - len(ans))
        label.append(ans)
    
# print(data)
# print(label)
    

Generating data...
Wall time: 4.29 s


## Processing

In [19]:
class CharacterTable(object):
    def __init__(self, chars):
        self.chars = sorted(set(chars))
        self.char_indices = dict((c, i) for i, c in enumerate(self.chars))
        self.indices_char = dict((i, c) for i, c in enumerate(self.chars))
    
    def encode(self, C, num_rows):
        x = np.zeros((num_rows, len(self.chars)))
        for i, c in enumerate(C):
            x[i, self.char_indices[c]] = 1
        return x
    
    def decode(self, x, calc_argmax=True):
        if calc_argmax:
            x = x.argmax(axis=-1)
        return "".join(self.indices_char[i] for i in x)

In [20]:
ctable = CharacterTable(chars)

print('Vectorization...')
x = np.zeros((len(data), MAXLEN, len(chars)))
y = np.zeros((len(label), DIGITS + 1, len(chars)))
for i, sentence in enumerate(data):
    x[i] = ctable.encode(sentence, MAXLEN)
for i, sentence in enumerate(label):
    y[i] = ctable.encode(sentence, DIGITS + 1)
    
# print(x.shape)
# print(y.shape)
# print(y)

Vectorization...


In [21]:
indices = np.arange(len(y))
np.random.shuffle(indices)
x = x[indices]
y = y[indices]

# train_test_split
train_x = x[:20000]
train_y = y[:20000]
test_x = x[20000:]
test_y = y[20000:]

split_at = len(train_x) - len(train_x) // 10
print(len(train_x))

(x_train, x_val) = train_x[:split_at], train_x[split_at:]
(y_train, y_val) = train_y[:split_at], train_y[split_at:]

print('Training Data:')
print(x_train.shape)
print(y_train.shape)

print('Validation Data:')
print(x_val.shape)
print(y_val.shape)

print('Testing Data:')
print(test_x.shape)
print(test_y.shape)

20000
Training Data:
(18000, 7, 13)
(18000, 4, 13)
Validation Data:
(2000, 7, 13)
(2000, 4, 13)
Testing Data:
(60000, 7, 13)
(60000, 4, 13)


## Build Model

In [22]:
print('Build model...')
model = Sequential()

model.add(BatchNormalization(input_shape =(MAXLEN,len(chars))))
model.add(Bidirectional(RNN(HIDDEN_SIZE),merge_mode='concat'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(layers.RepeatVector(DIGITS + 1))
model.add(RNN(HIDDEN_SIZE, return_sequences=True))
model.add(layers.TimeDistributed(layers.Dense(len(chars))))
model.add(layers.Activation('softmax'))
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

model.summary()

Build model...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
batch_normalization_3 (Batch (None, 7, 13)             52        
_________________________________________________________________
bidirectional_2 (Bidirection (None, 256)               145408    
_________________________________________________________________
batch_normalization_4 (Batch (None, 256)               1024      
_________________________________________________________________
activation_3 (Activation)    (None, 256)               0         
_________________________________________________________________
repeat_vector_2 (RepeatVecto (None, 4, 256)            0         
_________________________________________________________________
lstm_4 (LSTM)                (None, 4, 128)            197120    
_________________________________________________________________
time_distributed_2 (TimeDist (None, 4, 13)             1677  

## Training

In [24]:
for iteration in range(100):
    print()
    print('-' * 50)
    print('Iteration', iteration)
    model.fit(x_train, y_train,
              batch_size=BATCH_SIZE,
              epochs=1,
              validation_data=(x_val, y_val))
    for i in range(10):
        ind = np.random.randint(0, len(x_val))
        rowx, rowy = x_val[np.array([ind])], y_val[np.array([ind])]
        preds = model.predict_classes(rowx, verbose=0)
        q = ctable.decode(rowx[0])
        correct = ctable.decode(rowy[0])
        guess = ctable.decode(preds[0], calc_argmax=False)
        print('Q', q[::-1] if REVERSE else q, end=' ')
        print('T', correct, end=' ')
        if correct == guess:
            print(colors.ok + '☑' + colors.close, end=' ')
        else:
            print(colors.fail + '☒' + colors.close, end=' ')
        print(guess)



--------------------------------------------------
Iteration 0
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 77-9    T 68   [91m☒[0m 76  
Q 230-55  T 175  [91m☒[0m 255 
Q 170-38  T 132  [91m☒[0m 11  
Q 966+39  T 1005 [91m☒[0m 155 
Q 38-4    T 34   [92m☑[0m 34  
Q 54+7    T 61   [91m☒[0m 55  
Q 5+3     T 8    [91m☒[0m 6   
Q 90+5    T 95   [92m☑[0m 95  
Q 23-6    T 17   [91m☒[0m 21  
Q 5-5     T 0    [91m☒[0m 5   

--------------------------------------------------
Iteration 1
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 48-9    T 39   [91m☒[0m 47  
Q 26+6    T 32   [91m☒[0m 20  
Q 18+4    T 22   [92m☑[0m 22  
Q 7+0     T 7    [92m☑[0m 7   
Q 7+3     T 10   [92m☑[0m 10  
Q 240+2   T 242  [91m☒[0m 244 
Q 537-27  T 510  [91m☒[0m 540 
Q 63-5    T 58   [91m☒[0m 62  
Q 171-3   T 168  [91m☒[0m 104 
Q 6-6     T 0    [91m☒[0m 2   

--------------------------------------------------
Iteration 2
Train on 18000 samples, valida

Q 615-20  T 595  [92m☑[0m 595 
Q 475-68  T 407  [92m☑[0m 407 
Q 9+2     T 11   [92m☑[0m 11  
Q 752+9   T 761  [92m☑[0m 761 
Q 702-31  T 671  [91m☒[0m 661 
Q 643-298 T 345  [91m☒[0m 455 
Q 245+2   T 247  [92m☑[0m 247 
Q 533+15  T 548  [91m☒[0m 558 
Q 468-1   T 467  [92m☑[0m 467 
Q 9-7     T 2    [92m☑[0m 2   

--------------------------------------------------
Iteration 15
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 34-3    T 31   [92m☑[0m 31  
Q 889+281 T 1170 [92m☑[0m 1170
Q 985-740 T 245  [91m☒[0m 255 
Q 8-8     T 0    [92m☑[0m 0   
Q 123-43  T 80   [92m☑[0m 80  
Q 557+61  T 618  [91m☒[0m 628 
Q 74+5    T 79   [92m☑[0m 79  
Q 209+55  T 264  [92m☑[0m 264 
Q 4-1     T 3    [92m☑[0m 3   
Q 86+81   T 167  [91m☒[0m 177 

--------------------------------------------------
Iteration 16
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 58-7    T 51   [92m☑[0m 51  
Q 91-1    T 90   [92m☑[0m 90  
Q 644+33  T 677  [92m☑[0

Q 9+0     T 9    [92m☑[0m 9   
Q 764+3   T 767  [92m☑[0m 767 
Q 694+651 T 1345 [91m☒[0m 1355
Q 922+694 T 1616 [91m☒[0m 1506
Q 5+0     T 5    [92m☑[0m 5   
Q 891-8   T 883  [92m☑[0m 883 
Q 771-8   T 763  [92m☑[0m 763 
Q 503+7   T 510  [92m☑[0m 510 
Q 39+32   T 71   [92m☑[0m 71  
Q 11-4    T 7    [92m☑[0m 7   

--------------------------------------------------
Iteration 29
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 30+3    T 33   [92m☑[0m 33  
Q 8-3     T 5    [92m☑[0m 5   
Q 810+555 T 1365 [91m☒[0m 1355
Q 401+6   T 407  [92m☑[0m 407 
Q 952-0   T 952  [92m☑[0m 952 
Q 504+11  T 515  [92m☑[0m 515 
Q 615-0   T 615  [92m☑[0m 615 
Q 33+0    T 33   [92m☑[0m 33  
Q 804+98  T 902  [92m☑[0m 902 
Q 90+47   T 137  [92m☑[0m 137 

--------------------------------------------------
Iteration 30
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 36-2    T 34   [92m☑[0m 34  
Q 9-9     T 0    [92m☑[0m 0   
Q 954-659 T 295  [92m☑[0

Q 547+503 T 1050 [91m☒[0m 1000
Q 86-20   T 66   [92m☑[0m 66  
Q 881+34  T 915  [92m☑[0m 915 
Q 59-27   T 32   [92m☑[0m 32  
Q 919+20  T 939  [91m☒[0m 929 
Q 385-95  T 290  [91m☒[0m 280 
Q 944+65  T 1009 [92m☑[0m 1009
Q 726+59  T 785  [92m☑[0m 785 
Q 781+171 T 952  [91m☒[0m 948 
Q 9+3     T 12   [92m☑[0m 12  

--------------------------------------------------
Iteration 43
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 88-5    T 83   [92m☑[0m 83  
Q 942-4   T 938  [92m☑[0m 938 
Q 45-1    T 44   [92m☑[0m 44  
Q 6-5     T 1    [92m☑[0m 1   
Q 945+4   T 949  [92m☑[0m 949 
Q 587-87  T 500  [92m☑[0m 500 
Q 364-4   T 360  [92m☑[0m 360 
Q 91-33   T 58   [92m☑[0m 58  
Q 764+505 T 1269 [91m☒[0m 1289
Q 58-1    T 57   [92m☑[0m 57  

--------------------------------------------------
Iteration 44
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 9+4     T 13   [92m☑[0m 13  
Q 230+6   T 236  [92m☑[0m 236 
Q 395-83  T 312  [92m☑[0

Q 37+8    T 45   [92m☑[0m 45  
Q 42+3    T 45   [92m☑[0m 45  
Q 214+0   T 214  [92m☑[0m 214 
Q 55-48   T 7    [92m☑[0m 7   
Q 968+834 T 1802 [91m☒[0m 1702
Q 802-53  T 749  [92m☑[0m 749 
Q 85+46   T 131  [92m☑[0m 131 
Q 795+9   T 804  [92m☑[0m 804 
Q 877-98  T 779  [92m☑[0m 779 
Q 5+5     T 10   [92m☑[0m 10  

--------------------------------------------------
Iteration 57
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 190-59  T 131  [92m☑[0m 131 
Q 268-28  T 240  [92m☑[0m 240 
Q 747+53  T 800  [92m☑[0m 800 
Q 215-90  T 125  [92m☑[0m 125 
Q 656+43  T 699  [91m☒[0m 799 
Q 60-14   T 46   [92m☑[0m 46  
Q 919+20  T 939  [91m☒[0m 929 
Q 611-89  T 522  [92m☑[0m 522 
Q 0-0     T 0    [92m☑[0m 0   
Q 887-34  T 853  [92m☑[0m 853 

--------------------------------------------------
Iteration 58
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 853-610 T 243  [91m☒[0m 151 
Q 683+52  T 735  [92m☑[0m 735 
Q 854+1   T 855  [92m☑[0

Q 716+362 T 1078 [91m☒[0m 1088
Q 976-36  T 940  [91m☒[0m 930 
Q 642+90  T 732  [92m☑[0m 732 
Q 99-99   T 0    [92m☑[0m 0   
Q 586-4   T 582  [92m☑[0m 582 
Q 667-539 T 128  [91m☒[0m 18  
Q 546-6   T 540  [92m☑[0m 540 
Q 55+9    T 64   [92m☑[0m 64  
Q 853-310 T 543  [91m☒[0m 513 
Q 730+86  T 816  [91m☒[0m 826 

--------------------------------------------------
Iteration 71
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 317+54  T 371  [92m☑[0m 371 
Q 543-34  T 509  [92m☑[0m 509 
Q 54-0    T 54   [92m☑[0m 54  
Q 621+32  T 653  [91m☒[0m 643 
Q 718+219 T 937  [92m☑[0m 937 
Q 13-1    T 12   [92m☑[0m 12  
Q 756-30  T 726  [92m☑[0m 726 
Q 873-0   T 873  [92m☑[0m 873 
Q 2+1     T 3    [92m☑[0m 3   
Q 298-7   T 291  [92m☑[0m 291 

--------------------------------------------------
Iteration 72
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 438-70  T 368  [92m☑[0m 368 
Q 33+9    T 42   [92m☑[0m 42  
Q 49+1    T 50   [92m☑[0

Q 9-6     T 3    [92m☑[0m 3   
Q 914-44  T 870  [92m☑[0m 870 
Q 862-42  T 820  [92m☑[0m 820 
Q 401+21  T 422  [92m☑[0m 422 
Q 885-8   T 877  [92m☑[0m 877 
Q 880-89  T 791  [92m☑[0m 791 
Q 803+98  T 901  [92m☑[0m 901 
Q 283-8   T 275  [92m☑[0m 275 
Q 269+4   T 273  [92m☑[0m 273 
Q 468-9   T 459  [92m☑[0m 459 

--------------------------------------------------
Iteration 85
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 535-21  T 514  [92m☑[0m 514 
Q 97-4    T 93   [92m☑[0m 93  
Q 822-9   T 813  [92m☑[0m 813 
Q 73+1    T 74   [92m☑[0m 74  
Q 26+8    T 34   [92m☑[0m 34  
Q 81+11   T 92   [92m☑[0m 92  
Q 871+6   T 877  [92m☑[0m 877 
Q 14-0    T 14   [92m☑[0m 14  
Q 57+8    T 65   [92m☑[0m 65  
Q 566+21  T 587  [92m☑[0m 587 

--------------------------------------------------
Iteration 86
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 20-4    T 16   [92m☑[0m 16  
Q 71+4    T 75   [92m☑[0m 75  
Q 91-11   T 80   [91m☒[0

Q 364-4   T 360  [92m☑[0m 360 
Q 525-8   T 517  [92m☑[0m 517 
Q 136+7   T 143  [92m☑[0m 143 
Q 19-3    T 16   [92m☑[0m 16  
Q 9+9     T 18   [92m☑[0m 18  
Q 603+43  T 646  [92m☑[0m 646 
Q 5+2     T 7    [92m☑[0m 7   
Q 553+93  T 646  [92m☑[0m 646 
Q 98-3    T 95   [92m☑[0m 95  
Q 906-95  T 811  [91m☒[0m 801 

--------------------------------------------------
Iteration 99
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 3+2     T 5    [92m☑[0m 5   
Q 489+26  T 515  [91m☒[0m 525 
Q 544-6   T 538  [92m☑[0m 538 
Q 94+1    T 95   [92m☑[0m 95  
Q 67-38   T 29   [92m☑[0m 29  
Q 7+1     T 8    [92m☑[0m 8   
Q 307+1   T 308  [92m☑[0m 308 
Q 123-5   T 118  [92m☑[0m 118 
Q 168-40  T 128  [92m☑[0m 128 
Q 851+29  T 880  [92m☑[0m 880 


## Save Model

In [26]:
# model.save('BiLSTM.h5') 

## Validation

In [30]:
right = 0
preds = model.predict_classes(test_x, verbose=0)
for i in range(len(preds)):
    q = ctable.decode(test_x[i])
    correct = ctable.decode(test_y[i])
    guess = ctable.decode(preds[i], calc_argmax=False)
#     print('Q', q[::-1] if REVERSE else q, end=' ')
#     print('T', correct, end=' ')
    if correct == guess:
#         print(colors.ok + '☑' + colors.close, end=' ')
        right += 1
#     else:
#         print(colors.fail + '☒' + colors.close, end=' ')
#     print(guess)
print("MSG : Accuracy is {}".format(right / len(preds)))


MSG : Accuracy is 0.8960666666666667
