In [26]:
import tensorflow as tf
import numpy as np
from utils import load_dataset, preprocessing, string_to_int
from tensorflow.keras.layers import RepeatVector, Bidirectional, Concatenate, Dense, Dot, Softmax, LSTM
from tensorflow.keras import Input
from tensorflow.keras import Model
from tensorflow.keras.optimizers.legacy import Adam
from tensorflow.keras.utils import to_categorical

%matplotlib inline
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Prepare dataset

In [27]:
m = 10000
dataset, human_vocab, machine_vocab, inv_machine_vocab = load_dataset(m)

In [28]:
print(machine_vocab)

{'-': 0, '0': 1, '1': 2, '2': 3, '3': 4, '4': 5, '5': 6, '6': 7, '7': 8, '8': 9, '9': 10}


In [29]:
dataset[:10]

[('february 26 1979', '1979-02-26'),
 ('sunday december 17 2000', '2000-12-17'),
 ('8/31/21', '2021-08-31'),
 ('tuesday december 16 2014', '2014-12-16'),
 ('saturday august 27 2011', '2011-08-27'),
 ('14 aug 1972', '1972-08-14'),
 ('july 3 2005', '2005-07-03'),
 ('sunday july 8 2007', '2007-07-08'),
 ('20 sep 2002', '2002-09-20'),
 ('thursday august 2 2001', '2001-08-02')]

In [30]:
print(*dataset)


('february 26 1979', '1979-02-26') ('sunday december 17 2000', '2000-12-17') ('8/31/21', '2021-08-31') ('tuesday december 16 2014', '2014-12-16') ('saturday august 27 2011', '2011-08-27') ('14 aug 1972', '1972-08-14') ('july 3 2005', '2005-07-03') ('sunday july 8 2007', '2007-07-08') ('20 sep 2002', '2002-09-20') ('thursday august 2 2001', '2001-08-02') ('october 25 2012', '2012-10-25') ('sunday march 12 1989', '1989-03-12') ('tuesday september 4 1984', '1984-09-04') ('27 jun 1977', '1977-06-27') ('11 01 96', '1996-01-11') ('sunday july 29 1973', '1973-07-29') ('sunday february 7 1982', '1982-02-07') ('tuesday july 1 1980', '1980-07-01') ('2/22/15', '2015-02-22') ('friday june 8 2007', '2007-06-08') ('march 20 2013', '2013-03-20') ('14 february 1993', '1993-02-14') ('tuesday august 30 1988', '1988-08-30') ('20 aug 1977', '1977-08-20') ('1 november 2010', '2010-11-01') ('14 jun 2016', '2016-06-14') ('wednesday july 24 1985', '1985-07-24') ('aug 7 2001', '2001-08-07') ('february 8 1981',

In [31]:
Tx = 30
Ty = 10
# Shape: (m, Tx, len(human_vocab))
# Shape: (m, Ty, len(machine_vocab))
X, Y, Xoh, Yoh = preprocessing(dataset, human_vocab, machine_vocab, Tx, Ty)

In [32]:
print(X.shape)

(10000, 30)


In [33]:
index = 0
print('Date', dataset[index])
print()
print('Date after preprocessing:', X[0])
print()
print('Date after preprocessing (one_hot):\n', Xoh[0])

Date ('february 26 1979', '1979-02-26')

Date after preprocessing: [18 17 14 28 31 13 28 34  0  5  9  0  4 12 10 12 36 36 36 36 36 36 36 36
 36 36 36 36 36 36]

Date after preprocessing (one_hot):
 [[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 1.]
 [0. 0. 0. ... 0. 0. 1.]
 [0. 0. 0. ... 0. 0. 1.]]


## Neural Machine Translation with Attention

In [34]:

repeator = RepeatVector(Tx)

concatenator = Concatenate(axis=-1)

dense1 = Dense(10, activation='tanh', name='dense1')

dense2 = Dense(1, activation='relu', name='dense2')

activation = Softmax(axis=1, name='attention_weights')

dotor = Dot(axes=1)

In [35]:
def one_step_attention(a, s_prev): 
    """_summary_

    ## Args:
        a -- ndarray[m, Tx, 2*n_a]: hidden state output of the Bi_LSTM
        s_prev -- ndarray[m, n_s]: previous hidden state of (post-attention) LSTM
    ## Returns: 
        context -- ndarray[m, 1, n_a*2]: context vector, input of the next (post-attention) LSTM cell
    """
    s_prev_r = repeator(s_prev)
    
    concat = concatenator([a, s_prev_r])
    
    # Small nn
    d1 = dense1(concat)
    d2 = dense2(d1)
    # Apply softmax
    alphas = activation(d2)
    context = dotor([alphas, a])
    return context
    
n_a = 32
n_s = 64
np.random.seed(1)
a = np.random.randn(m, Tx, n_a*2)
s_prev = np.random.randn(m, n_s)

context = one_step_attention(a, s_prev)

In [36]:
n_a = 32 # number of units for pre-attention, bi-directional LSTM's hidden state 'a'
n_s = 64 # number of units for post-attention LSTM's hidden state 's'

post_activation_LSTM_cell = LSTM(n_s, return_state=True)
output_layer = Dense(units=len(machine_vocab), activation='softmax')

In [37]:
def modelf(Tx, Ty, n_a, n_s, human_vocab_size, machine_vocab_size): 
    """_summary_

    ## Args:
        Tx -- int: number of (pre-attention) Bi-LSTM timesteps
        Ty -- int: number of (post-attention) LSTM timesteps
        n_a -- int: number of units for (pre-attention) Bi-LSTM
        n_s -- int: number of units for (post-attention) LSTM
        human_vocab_size -- int: size of the python dictionary "human_vocab"
        machine_vocab_size -- int: size of the python dictionary "machine_vocab"
    ## Returns: 
        model -- keras.Model: keras model
    """
    X = Input(shape=(Tx, human_vocab_size))
    # Initial hidden state
    s0 = Input(shape=(n_s,), name='s0')
    # Intial cell state
    c0 = Input(shape=(n_s,), name='c0')
    # Hidden state
    s = s0
    # Cell state
    c = c0
    a = Bidirectional(LSTM(units=n_a, return_sequences=True))(X)
    outputs = []
    
    for t in range(Ty): 
        context = one_step_attention(a, s)
        
        _, s, c = post_activation_LSTM_cell(context, [s, c])
        
        out = output_layer(s)
        
        outputs.append(out)
    
    model = Model(inputs=[X, s0, c0], outputs=outputs)
    return model

In [38]:
print(len(human_vocab))
print(len(machine_vocab))

37
11


## Intialize model

In [39]:
# model = modelf(Tx, Ty, n_a, n_s, len(human_vocab), len(machine_vocab))
# the pretrained model need 37
model = modelf(Tx, Ty, n_a, n_s, 37, len(machine_vocab))

In [40]:
print(len(human_vocab), len(machine_vocab))

37 11


In [41]:
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_2 (InputLayer)        [(None, 30, 37)]             0         []                            
                                                                                                  
 s0 (InputLayer)             [(None, 64)]                 0         []                            
                                                                                                  
 bidirectional_1 (Bidirecti  (None, 30, 64)               17920     ['input_2[0][0]']             
 onal)                                                                                            
                                                                                                  
 repeat_vector_1 (RepeatVec  (None, 30, 64)               0         ['s0[0][0]',            

In [42]:
opt = Adam(
    learning_rate=0.0005, 
    beta_1=0.9,
    beta_2=0.999, 
    decay=0.01
)
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])

## Reshape outputs (Yoh)

In [43]:
model.outputs
# The model outputs contain a list of 10 tensor
# each tensor has shape (m, 11) --> reshape Yoh

[<KerasTensor: shape=(None, 11) dtype=float32 (created by layer 'dense_1')>,
 <KerasTensor: shape=(None, 11) dtype=float32 (created by layer 'dense_1')>,
 <KerasTensor: shape=(None, 11) dtype=float32 (created by layer 'dense_1')>,
 <KerasTensor: shape=(None, 11) dtype=float32 (created by layer 'dense_1')>,
 <KerasTensor: shape=(None, 11) dtype=float32 (created by layer 'dense_1')>,
 <KerasTensor: shape=(None, 11) dtype=float32 (created by layer 'dense_1')>,
 <KerasTensor: shape=(None, 11) dtype=float32 (created by layer 'dense_1')>,
 <KerasTensor: shape=(None, 11) dtype=float32 (created by layer 'dense_1')>,
 <KerasTensor: shape=(None, 11) dtype=float32 (created by layer 'dense_1')>,
 <KerasTensor: shape=(None, 11) dtype=float32 (created by layer 'dense_1')>]

In [44]:
print('Yoh shape', Yoh.shape)

Yoh shape (10000, 10, 11)


In [45]:
Yoh_reshaped = Yoh.swapaxes(0, 1)
Yoh_reshaped_list = list(Yoh_reshaped)
print(len(Yoh_reshaped_list))

10


In [46]:
print(Xoh.shape)

(10000, 30, 37)


In [47]:
print(model.inputs)

[<KerasTensor: shape=(None, 30, 37) dtype=float32 (created by layer 'input_2')>, <KerasTensor: shape=(None, 64) dtype=float32 (created by layer 's0')>, <KerasTensor: shape=(None, 64) dtype=float32 (created by layer 'c0')>]


In [48]:
s0 = np.zeros((m, n_s))
c0 = np.zeros((m, n_s))

In [49]:
print(Xoh.shape)
print(len(Yoh_reshaped_list))

(10000, 30, 37)
10


## Lets train the model for one epochs

In [50]:
history = model.fit([Xoh, s0, c0], Yoh_reshaped_list, epochs=1, batch_size=100)



- We have trained and save weights, so it doesn't take time and computational resource to train.
- If you wish, you can train the model for longer (higher epochs)

In [51]:
model.load_weights('./models/model.h5')

## Test with our examples

In [52]:
EXAMPLES  = ['3 May 1979', '5 April 09', '21th of August 2016', 'Tue 10 Jul 2007', 'Saturday May 9 2018', 'March 3 2001', 'March 3rd 2001', '1 March 2001']
X_test = np.array([string_to_int(e, Tx, human_vocab) for e in EXAMPLES])
X_test1 = np.array(list(map(lambda x: to_categorical(x, len(human_vocab)), X_test)))
s00 = np.zeros((len(EXAMPLES), n_s))
c00 = np.zeros((len(EXAMPLES), n_s))
Y_test1 = model.predict([X_test1, s00, c00])
read_outputs =np.array(tf.argmax(Y_test1, axis=-1))
test_outputs = []
for i in range(len(EXAMPLES)): 
    e = read_outputs[:, i]
    e_idx = ''.join([inv_machine_vocab[e_i] for e_i in e])
    test_outputs.append(e_idx)



In [53]:
for i in range(len(EXAMPLES)): 
    print(f'Date: {EXAMPLES[i]} | Predictions: {test_outputs[i]}')

Date: 3 May 1979 | Predictions: 1979-05-33
Date: 5 April 09 | Predictions: 2009-04-05
Date: 21th of August 2016 | Predictions: 2016-08-20
Date: Tue 10 Jul 2007 | Predictions: 2007-07-10
Date: Saturday May 9 2018 | Predictions: 2018-05-09
Date: March 3 2001 | Predictions: 2001-03-03
Date: March 3rd 2001 | Predictions: 2001-03-03
Date: 1 March 2001 | Predictions: 2001-03-01
