# Neural Machine Translation with Attention

In [12]:
from keras.layers import Bidirectional, LSTM, Dense, Activation
from keras.layers import Concatenate, Permute, Dot, Input, RepeatVector, Lambda,Multiply
from keras.optimizers import Adam
from keras.utils import to_categorical
from keras.models import load_model,Model
import keras.backend as K
import numpy as np
from faker import Faker
import random
from tqdm import tqdm
from babel.dates import format_date
from nmt_utils import *
import matplotlib.pyplot as plt

%matplotlib inline 

## 1. Dataset

In [13]:
m = 10000
dataset, human_vocab, machine_vocab, inv_machine_vocab = load_dataset(m)

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10000/10000 [00:00<00:00, 22124.09it/s]


In [17]:
dataset[0:10]

[('9 may 1998', '1998-05-09'),
 ('10.09.70', '1970-09-10'),
 ('4/28/90', '1990-04-28'),
 ('thursday january 26 1995', '1995-01-26'),
 ('monday march 7 1983', '1983-03-07'),
 ('sunday may 22 1988', '1988-05-22'),
 ('tuesday july 8 2008', '2008-07-08'),
 ('08 sep 1999', '1999-09-08'),
 ('1 jan 1981', '1981-01-01'),
 ('monday may 22 1995', '1995-05-22')]

In [27]:
Tx = 30
Ty = 10

X, Y, Xoh, Yoh = preprocess_data(dataset,human_vocab,machine_vocab,Tx, Ty)

print("X.shape = ", X.shape)
print("Y.shape = ", Y.shape)
print("Xoh.shape = ", Xoh.shape)
print("Yoh.shape = ", Yoh.shape)

X.shape =  (10000, 30)
Y.shape =  (10000, 10)
Xoh.shape =  (10000, 30, 37)
Yoh.shape =  (10000, 10, 11)


In [39]:
index = 0
print("Source date:", dataset[index][0])
print("Target date:", dataset[index][1])
print()
print("Source after preprocessing (indices):", X[index])
print("Target after preprocessing (indices):", Y[index])
print()
print("Source after preprocessing (one-hot):", Xoh[index])
print("Target after preprocessing (one-hot):", Yoh[index])

Source date: 9 may 1998
Target date: 1998-05-09

Source after preprocessing (indices): [12  0 24 13 34  0  4 12 12 11 36 36 36 36 36 36 36 36 36 36 36 36 36 36
 36 36 36 36 36 36]
Target after preprocessing (indices): [ 2 10 10  9  0  1  6  0  1 10]

Source after preprocessing (one-hot): [[0. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 1.]
 [0. 0. 0. ... 0. 0. 1.]
 [0. 0. 0. ... 0. 0. 1.]]
Target after preprocessing (one-hot): [[0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]]


## 2. Neural machine tranlation with attention

In [42]:
repeator = RepeatVector(Tx)
concatenate = Concatenate(axis=-1)
densor = Dense(1,activation="relu")
activator = Activation(softmax, name = "attention_weights")
dotor = Dot(axes = 1)

In [43]:
def one_step_attention(a, s_prev):
    s_prev = repeator(s_prev)
    concat = concatenate([a, s_prev])
    e = densor(concat)
    alphas = activator(e)
    
    context = dotor([alphas, a])
    
    return context

In [44]:
n_a = 64
n_s = 128
post_activation_LSTM_cell = LSTM(n_s, return_state=True)
output_layer = Dense(len(machine_vocab),activation=softmax)

In [51]:
def model(Tx, Ty, n_a, n_s, human_vocab_size, machine_vocab_size):
    X = Input(shape=(Tx, human_vocab_size))
    s0 = Input(shape=(n_s,), name = "s0")
    c0 = Input(shape=(n_s,), name = "c0")
    s = s0
    c = c0
    outputs = []
    a = Bidirectional(LSTM(n_a, return_sequences=True))(X)
    for t in range(Ty):
        context = one_step_attention(a, s)
        s, _, c = post_activation_LSTM_cell(context, initial_state=[s, c])
        out = output_layer(s)
        outputs.append(out)
    model = Model([X, s0, c0], outputs)
    return model

In [52]:
model = model(Tx, Ty, n_a, n_s, len(human_vocab), len(machine_vocab))

In [53]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_4 (InputLayer)            (None, 30, 37)       0                                            
__________________________________________________________________________________________________
s0 (InputLayer)                 (None, 128)          0                                            
__________________________________________________________________________________________________
bidirectional_3 (Bidirectional) (None, 30, 128)      52224       input_4[0][0]                    
__________________________________________________________________________________________________
repeat_vector_2 (RepeatVector)  (None, 30, 128)      0           s0[0][0]                         
                                                                 lstm_1[10][0]                    
          

                                                                 bidirectional_3[0][0]            
__________________________________________________________________________________________________
c0 (InputLayer)                 (None, 128)          0                                            
__________________________________________________________________________________________________
lstm_1 (LSTM)                   [(None, 128), (None, 131584      dot_2[10][0]                     
                                                                 s0[0][0]                         
                                                                 c0[0][0]                         
                                                                 dot_2[11][0]                     
                                                                 lstm_1[10][0]                    
                                                                 lstm_1[10][2]                    
          

In [54]:
opt = Adam(lr = 0.005, beta_1=0.9, beta_2=0.999, decay=0.01)
model.compile(loss="categorical_crossentropy",optimizer=opt,metrics=['accuracy'])

In [55]:
s0 = np.zeros((m, n_s))
c0 = np.zeros((m, n_s))
outputs = list(Yoh.swapaxes(0,1))

In [56]:
model.fit([Xoh, s0, c0], outputs, epochs=1, batch_size=100)

Epoch 1/1










<keras.callbacks.History at 0x24701e4fd30>