In [47]:
import numpy as np
import tqdm
import tensorflow as tf
from nmt_utils import *
import pickle
import random

In [50]:
with open('datasets/dataset','rb') as fb:
    dataset = pickle.load(fb)
    
with open('vocabs/human_vocab','rb') as fb:
    human_vocab = pickle.load(fb)

with open('vocabs/machine_vocab','rb') as fb:
    machine_vocab = pickle.load(fb)

with open('vocabs/inv_machine_vocab','rb') as fb:
    inv_machine_vocab = pickle.load(fb)


m = len(dataset)
random.shuffle(dataset)

# 0.3% test set
train = dataset[:-(m//300)]
test = dataset[-(m//300):]

len(train),len(test)

(10323, 34)

In [51]:
print(human_vocab)

{' ': 0, ',': 1, '.': 2, '/': 3, '0': 4, '1': 5, '2': 6, '3': 7, '4': 8, '5': 9, '6': 10, '7': 11, '8': 12, '9': 13, 'a': 14, 'b': 15, 'c': 16, 'd': 17, 'e': 18, 'f': 19, 'i': 20, 'k': 21, 'l': 22, 'm': 23, 'n': 24, 'o': 25, 'p': 26, 'q': 27, 'r': 28, 's': 29, 't': 30, 'u': 31, 'v': 32, 'x': 33, 'y': 34, 'z': 35, 'ç': 36, 'ü': 37, 'ı': 38, 'ş': 39, 'ə': 40, '<unk>': 41, '<pad>': 42}


In [52]:
#maximum length for input and output dates, in order to make all input data in the same length
Tx = 30
Ty = 10  # xxxx-xx-xx output will be in this format so all outputs will be in 10 character long
X, Y, Xoh, Yoh = preprocess_data(dataset, human_vocab, machine_vocab, Tx, Ty)

# Each character has its index X and Y we represent data as a list of indices. Then, we change each indices to 
# one hot encoding vector in depth axis 

print("X.shape:", X.shape)
print("Y.shape:", Y.shape)
print("Xoh.shape:", Xoh.shape)
print("Yoh.shape:", Yoh.shape)

X.shape: (10357, 30)
Y.shape: (10357, 10)
Xoh.shape: (10357, 30, 43)
Yoh.shape: (10357, 10, 11)


In [53]:
def softmax(x, axis=1):
    """Softmax activation function.
    # Arguments
        x : Tensor.
        axis: Integer, axis along which the softmax normalization is applied.
    # Returns
        Tensor, output of softmax transformation.
    # Raises
        ValueError: In case `dim(x) == 1`.
    """
    ndim = K.ndim(x)
    if ndim == 2:
        return K.softmax(x)
    elif ndim > 2:
        e = K.exp(x - K.max(x, axis=axis, keepdims=True))
        s = K.sum(e, axis=axis, keepdims=True)
        return e / s
    else:
        raise ValueError('Cannot apply softmax to a tensor that is 1D')

In [54]:
len_human_vocab = len(list(human_vocab.keys()))
len_machine_vocab = len(list(machine_vocab.keys()))

n_s = 64 # number of units for the post-attention LSTM's hidden state "s"
n_a = 32 # number of units for the pre-attention, bi-directional LSTM's hidden state 'a' 

In [55]:
from tensorflow.keras import layers

class neural_translation_model(layers.Layer):
    
    def __init__(self, Tx = 30, Ty = 10, n_a = 32, n_s = 64, human_vocab_size = len_human_vocab
                                                           , machine_vocab_size = len_machine_vocab):
        
        # Default parameter for model
        self.Tx = Tx
        self.Ty = Ty        
        self.n_a = n_a # number of units for the pre-attention, bi-directional LSTM's hidden state 'a' 
        self.n_s = n_s # number of units for the post-attention LSTM's hidden state "s"
        self.human_vocab_size = human_vocab_size
        self.machine_vocab_size = machine_vocab_size
        
        
        
        # We will share weights with those layer. In order to prevent them to be intialized for each time step we can either 
        # define them as a global variable or we can create their object
        self.repeator = layers.RepeatVector(Tx)
        self.concatenator =  layers.Concatenate(axis=-1)
        self.densor1 = layers.Dense(10, activation = "tanh")
        self.densor2 = layers.Dense(1, activation = "relu")
        self.activator = layers.Activation(softmax, name='attention_weights') # We are using a custom softmax(axis = 1) loaded in this notebook
        self.dotor = layers.Dot(axes = 1)
        
        self.post_activation_LSTM_cell = layers.LSTM(n_s, return_state = True) # Please do not modify this global variable.
        self.output_layer = layers.Dense(len(machine_vocab), activation=softmax)
        
    def a_step_attention(self, a, s_prev):
        #it is same activation that will be shared for all t_delta activations to calculate alpha
        s_prev = self.repeator(s_prev)
        #concatenate the activations with hidden state of post attention LSTM 
        concatenation = self.concatenator([a,s_prev])
        
        #Here is the small fully connected neural network to find attention weights 
        # intermediate energies
        e = self.densor1(concatenation)
        # Energies
        energies = self.densor2(e)
        #softmax to calculate alphas
        alpha = self.activator(energies)
        
        # context = sum_over_t_x( alpha(t_y,t_x)) * a(t_x)
        context = self.dotor([alpha,a])
        
        return context
    
    def model(self):
        
        X  = layers.Input(shape = (self.Tx,self.human_vocab_size))
        s0 = layers.Input(shape = (self.n_s,), name ='s0')
        c0 = layers.Input(shape = (self.n_s,), name ='c0')
        
        s = s0 
        c = c0 
        
        a = layers.Bidirectional(layers.LSTM(self.n_a ,return_sequences= True))(X)
        
        outputs = []
        
        for t in range(self.Ty):
            
            context = self.a_step_attention(a, s)
            
            s, _, c = self.post_activation_LSTM_cell(context,initial_state=[s, c])
            
            out = self.output_layer(s)
            
            outputs.append(out)
            
        model = tf.keras.Model(inputs = [X,s0,c0] , outputs = outputs)
        
        return model


In [56]:
attention_model = neural_translation_model().model()

In [57]:
attention_model.summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_2 (InputLayer)           [(None, 30, 43)]     0           []                               
                                                                                                  
 s0 (InputLayer)                [(None, 64)]         0           []                               
                                                                                                  
 bidirectional_1 (Bidirectional  (None, 30, 64)      19456       ['input_2[0][0]']                
 )                                                                                                
                                                                                                  
 repeat_vector_1 (RepeatVector)  (None, 30, 64)      0           ['s0[0][0]',               

                                                                  'attention_weights[4][0]',      
                                                                  'bidirectional_1[0][0]',        
                                                                  'attention_weights[5][0]',      
                                                                  'bidirectional_1[0][0]',        
                                                                  'attention_weights[6][0]',      
                                                                  'bidirectional_1[0][0]',        
                                                                  'attention_weights[7][0]',      
                                                                  'bidirectional_1[0][0]',        
                                                                  'attention_weights[8][0]',      
                                                                  'bidirectional_1[0][0]',        
          

In [58]:
opt = tf.keras.optimizers.legacy.Adam(learning_rate = 0.01,beta_1 = 0.9,beta_2 = 0.999,decay = 0.01) 
attention_model.compile(loss = 'categorical_crossentropy', optimizer = opt, metrics = ['accuracy'])

In [59]:
s0 = np.zeros((m, n_s))
c0 = np.zeros((m, n_s))
outputs = list(Yoh.swapaxes(0,1))

In [60]:
attention_model.fit([Xoh, s0, c0], outputs, epochs=70, batch_size=16)

Epoch 1/70
Epoch 2/70
Epoch 3/70
Epoch 4/70
Epoch 5/70
Epoch 6/70
Epoch 7/70
Epoch 8/70
Epoch 9/70
Epoch 10/70
Epoch 11/70
Epoch 12/70
Epoch 13/70
Epoch 14/70


Epoch 15/70
Epoch 16/70
Epoch 17/70
Epoch 18/70
Epoch 19/70
Epoch 20/70
Epoch 21/70
Epoch 22/70
Epoch 23/70
Epoch 24/70
Epoch 25/70
Epoch 26/70


Epoch 27/70
Epoch 28/70
Epoch 29/70
Epoch 30/70
Epoch 31/70
Epoch 32/70
Epoch 33/70
Epoch 34/70
Epoch 35/70
Epoch 36/70
Epoch 37/70
Epoch 38/70


Epoch 39/70
Epoch 40/70
Epoch 41/70
Epoch 42/70
Epoch 43/70
Epoch 44/70
Epoch 45/70
Epoch 46/70
Epoch 47/70
Epoch 48/70
Epoch 49/70
Epoch 50/70


Epoch 51/70
Epoch 52/70
Epoch 53/70
Epoch 54/70
Epoch 55/70
Epoch 56/70
Epoch 57/70
Epoch 58/70

KeyboardInterrupt: 

In [61]:
EXAMPLES = ['10 iyun 2025', '21 avqust 2016', '10 iyun 2007', 'Şənbə May 9 2018', 'Mart 3 2001', '1 mart 2001','aprelin 18-də 98']
s00 = np.zeros((1, n_s))
c00 = np.zeros((1, n_s))
for example in EXAMPLES:
    example = example.lower().replace('-',' ')
    source = string_to_int(example, Tx, human_vocab)
    source = string_to_int(example, Tx, human_vocab)
    source = np.array(list(map(lambda x: to_categorical(x, num_classes=len(human_vocab)), source))).swapaxes(0,1)
    source = np.swapaxes(source, 0, 1)
    source = np.expand_dims(source, axis=0)
    prediction = attention_model.predict([source, s00, c00])
    prediction = np.argmax(prediction, axis = -1)
    output = [inv_machine_vocab[int(i)] for i in prediction]
    print("source:", example)
    print("output:", ''.join(output),"\n")

source: 10 iyun 2025
output: 2025-06-10 

source: 21 avqust 2016
output: 2016-08-21 

source: 10 iyun 2007
output: 2007-06-10 

source: şənbə may 9 2018
output: 2018-05-09 

source: mart 3 2001
output: 2001-03-03 

source: 1 mart 2001
output: 2001-03-01 

source: aprelin 18 də 98
output: 1998-04-18 



In [62]:
attention_model.save_weights('models/58epochs_weights.h5')

In [66]:
test_list = [test_sample[0] for test_sample in test]
test_list

['fevral 18 2019',
 '4 oktyabr 1973',
 '7 iyun 2000',
 '23.08.94',
 '17 iyun 2007',
 '28 iyun 1974 cümə',
 'avqustun 17 də 1990',
 '20 fev 1979',
 'mart 15 1998',
 '18 may 1995 cümə axşamı',
 '6 may 1991 bazar ertəsi',
 '17 yanvar 2004 şənbə',
 '13 yan 2012',
 '2 mart 2023 cümə axşamı',
 '13 noy 1999',
 '17.11.09',
 '20 oktyabr 2001 şənbə',
 '28 mart 1973 çərşənbə',
 '19 iyul 2018',
 '22 oktyabr 1994',
 '19 yanvar 2013 şənbə',
 '9 may 1998',
 'oktyabrın 19 da 1972',
 '21 okt 2005',
 '17 noyabr 2007 şənbə',
 '13 avqust 1974 çərşənbə axşamı',
 '15 iyun 1984',
 '20 oktyabr 1995 cümə',
 '29 aprel 1978 şənbə',
 '13 oktyabr 1977 cümə axşamı',
 '27 may 2022 cümə',
 '22 dekabr 2013 bazar',
 '4 iyun 1979',
 '9 iyun 1998 çərşənbə axşamı']

In [67]:
for example in test_list:
    example = example.lower().replace('-',' ')
    source = string_to_int(example, Tx, human_vocab)
    source = string_to_int(example, Tx, human_vocab)
    source = np.array(list(map(lambda x: to_categorical(x, num_classes=len(human_vocab)), source))).swapaxes(0,1)
    source = np.swapaxes(source, 0, 1)
    source = np.expand_dims(source, axis=0)
    prediction = attention_model.predict([source, s00, c00])
    prediction = np.argmax(prediction, axis = -1)
    output = [inv_machine_vocab[int(i)] for i in prediction]
    print("source:", example)
    print("output:", ''.join(output),"\n")

source: fevral 18 2019
output: 2019-02-18 

source: 4 oktyabr 1973
output: 1973-10-04 

source: 7 iyun 2000
output: 2000-06-07 

source: 23.08.94
output: 1994-08-23 

source: 17 iyun 2007
output: 2007-06-17 

source: 28 iyun 1974 cümə
output: 1974-06-28 

source: avqustun 17 də 1990
output: 1990-08-17 

source: 20 fev 1979
output: 1979-02-20 

source: mart 15 1998
output: 1998-03-15 

source: 18 may 1995 cümə axşamı
output: 1995-05-18 

source: 6 may 1991 bazar ertəsi
output: 1991-05-06 

source: 17 yanvar 2004 şənbə
output: 2004-01-17 

source: 13 yan 2012
output: 2012-01-13 

source: 2 mart 2023 cümə axşamı
output: 2023-03-02 

source: 13 noy 1999
output: 1999-11-13 

source: 17.11.09
output: 2009-11-17 

source: 20 oktyabr 2001 şənbə
output: 2001-10-20 

source: 28 mart 1973 çərşənbə
output: 1973-03-28 

source: 19 iyul 2018
output: 2018-07-19 

source: 22 oktyabr 1994
output: 1994-10-22 

source: 19 yanvar 2013 şənbə
output: 2013-01-19 

source: 9 may 1998
output: 1998-05-09 

sour