In [1]:
import numpy as np
import urllib.request
import tensorflow as tf
from keras.layers import *
from tensorflow.keras import optimizers
from keras import Model
from keras.layers import Layer
import keras.backend as K
from keras.layers import Input, Dense
from keras.models import Sequential, load_model
from keras.metrics import mean_squared_error

In [3]:
with urllib.request.urlopen("https://scale-static-assets.s3-us-west-2.amazonaws.com/ml-interview/expand/train.txt") as url:
    data = url.read()

In [4]:
data = data.decode('utf-8')
data = data.split('\n')
np.random.shuffle(data)
factors, expansion = [], []
for item in data:
    temp = item.split('=')
    if temp!=['']:
        factors.append(temp[0])
        expansion.append(temp[1])

In [5]:
vocab = {' ':0,'<START>':1, '<END>':2}
vocab.update({str(i):i+3 for i in range(10)})
vocab.update({item.strip():i+13 for i,item in enumerate('a, c, h, i, j, k, n, o, s, t, x, y, z'.split(','))})
vocab.update({'sin':26, 'cos':27, 'tan': 28, '+':29, '-':30, '*':31, '/':32, '**':33, '(':34, ')':35})

In [6]:
class CharacterTable(object):
    """Given a set of characters:
    + Encode them based on the vocabulary created
    + Decode the representation to their character output
    """
    def __init__(self, chars):
        # initialize the character table
        self.chars = list(chars)
        self.char_indices = chars
        self.indices_char = {v:k for k,v in self.char_indices.items()}

    def encode(self, C, num_rows):
        x = np.array([self.char_indices[c] for i, c in enumerate(C)])
        x = np.concatenate((x, np.zeros((1,num_rows-len(x)))), axis=None)
        return x

    def decode(self, x, calc_argmax=True):
        return [self.indices_char[c] for c in x]

In [7]:
ctable = CharacterTable(vocab)

In [8]:
x = np.array([ctable.encode(item,29) for item in factors])
y = []
for i in range(len(expansion)):
    temp = np.append(ctable.encode(expansion[i],len(expansion[i])), ctable.char_indices['<END>'], axis=None)
    temp = np.append(temp, [0]*(28-len(expansion[i])), axis=None)
    y.append(temp)
y = np.array(y)
y = np.hstack([np.reshape(np.ones(len(y)),(-1,1)), y])

------------------------------------------------------------------------------------------------------------

In [10]:
# attention model

import tensorflow as tf
import os
from tensorflow.keras.layers import Layer
from tensorflow.keras import backend as K


class AttentionLayer(Layer):
    """
    This class implements Bahdanau attention (https://arxiv.org/pdf/1409.0473.pdf).
    There are three sets of weights introduced W_a, U_a, and V_a
     """

    def __init__(self, **kwargs):
        super(AttentionLayer, self).__init__(**kwargs)

    def build(self, input_shape):
        assert isinstance(input_shape, list)
        # Create a trainable weight variable for this layer.

        self.W_a = self.add_weight(name='W_a',
                                   shape=tf.TensorShape((input_shape[0][2], input_shape[0][2])),
                                   initializer='uniform',
                                   trainable=True)
        self.U_a = self.add_weight(name='U_a',
                                   shape=tf.TensorShape((input_shape[1][2], input_shape[0][2])),
                                   initializer='uniform',
                                   trainable=True)
        self.V_a = self.add_weight(name='V_a',
                                   shape=tf.TensorShape((input_shape[0][2], 1)),
                                   initializer='uniform',
                                   trainable=True)

        super(AttentionLayer, self).build(input_shape)  # Be sure to call this at the end

    def call(self, inputs, verbose=False):
        """
        inputs: [encoder_output_sequence, decoder_output_sequence]
        """
        assert type(inputs) == list
        encoder_out_seq, decoder_out_seq = inputs
        if verbose:
            print('encoder_out_seq>', encoder_out_seq.shape)
            print('decoder_out_seq>', decoder_out_seq.shape)

        def energy_step(inputs, states):
            """ Step function for computing energy for a single decoder state """

            assert_msg = "States must be a list. However states {} is of type {}".format(states, type(states))
            assert isinstance(states, list) or isinstance(states, tuple), assert_msg

            """ Some parameters required for shaping tensors"""
            en_seq_len, en_hidden = encoder_out_seq.shape[1], encoder_out_seq.shape[2]
            de_hidden = inputs.shape[-1]

            """ Computing S.Wa where S=[s0, s1, ..., si]"""
            # <= batch_size*en_seq_len, latent_dim
            reshaped_enc_outputs = K.reshape(encoder_out_seq, (-1, en_hidden))
            # <= batch_size*en_seq_len, latent_dim
            W_a_dot_s = K.reshape(K.dot(reshaped_enc_outputs, self.W_a), (-1, en_seq_len, en_hidden))
            if verbose:
                print('wa.s>',W_a_dot_s.shape)

            """ Computing hj.Ua """
            U_a_dot_h = K.expand_dims(K.dot(inputs, self.U_a), 1)  # <= batch_size, 1, latent_dim
            if verbose:
                print('Ua.h>',U_a_dot_h.shape)

            """ tanh(S.Wa + hj.Ua) """
            # <= batch_size*en_seq_len, latent_dim
            reshaped_Ws_plus_Uh = K.tanh(K.reshape(W_a_dot_s + U_a_dot_h, (-1, en_hidden)))
            if verbose:
                print('Ws+Uh>', reshaped_Ws_plus_Uh.shape)

            """ softmax(va.tanh(S.Wa + hj.Ua)) """
            # <= batch_size, en_seq_len
            e_i = K.reshape(K.dot(reshaped_Ws_plus_Uh, self.V_a), (-1, en_seq_len))
            # <= batch_size, en_seq_len
            e_i = K.softmax(e_i)

            if verbose:
                print('ei>', e_i.shape)

            return e_i, [e_i]

        def context_step(inputs, states):
            """ Step function for computing ci using ei """
            # <= batch_size, hidden_size
            c_i = K.sum(encoder_out_seq * K.expand_dims(inputs, -1), axis=1)
            if verbose:
                print('ci>', c_i.shape)
            return c_i, [c_i]

        def create_inital_state(inputs, hidden_size):
            # We are not using initial states, but need to pass something to K.rnn funciton
            fake_state = K.zeros_like(inputs)  # <= (batch_size, enc_seq_len, latent_dim
            fake_state = K.sum(fake_state, axis=[1, 2])  # <= (batch_size)
            fake_state = K.expand_dims(fake_state)  # <= (batch_size, 1)
            fake_state = K.tile(fake_state, [1, hidden_size])  # <= (batch_size, latent_dim
            return fake_state

        fake_state_c = create_inital_state(encoder_out_seq, encoder_out_seq.shape[-1])
        fake_state_e = create_inital_state(encoder_out_seq, encoder_out_seq.shape[1])  # <= (batch_size, enc_seq_len, latent_dim

        """ Computing energy outputs """
        # e_outputs => (batch_size, de_seq_len, en_seq_len)
        last_out, e_outputs, _ = K.rnn(
            energy_step, decoder_out_seq, [fake_state_e],
        )

        """ Computing context vectors """
        last_out, c_outputs, _ = K.rnn(
            context_step, e_outputs, [fake_state_c],
        )

        return c_outputs, e_outputs

    def compute_output_shape(self, input_shape):
        """ Outputs produced by the layer """
        return [
            tf.TensorShape((input_shape[1][0], input_shape[1][1], input_shape[1][2])),
            tf.TensorShape((input_shape[1][0], input_shape[1][1], input_shape[0][1]))
        ]

In [11]:
from tensorflow.keras.layers import Dense, LSTM, Bidirectional, Embedding, Concatenate
from tensorflow.keras import Input, Model

# Encoder input
encoder_inputs = Input(shape=(29,)) 

# Embedding layer- i am using 1024 output-dim for embedding you can try diff values 100,256,512,1000
enc_emb = Embedding(len(vocab), 654)(encoder_inputs)

# Bidirectional lstm layer
enc_lstm1 = Bidirectional(LSTM(256,return_sequences=True,return_state=True))
encoder_outputs1, forw_state_h, forw_state_c, back_state_h, back_state_c = enc_lstm1(enc_emb)

# Concatenate both h and c 
final_enc_h = Concatenate()([forw_state_h,back_state_h])
final_enc_c = Concatenate()([forw_state_c,back_state_c])

# get Context vector
encoder_states =[final_enc_h, final_enc_c]

In [12]:
decoder_inputs = Input(shape=(None,)) 

# decoder embedding with same number as encoder embedding
dec_emb_layer = Embedding(len(vocab), 654) 
dec_emb = dec_emb_layer(decoder_inputs)   # apply this way because we need embedding layer for prediction 

# In encoder we used Bidirectional so it's having two LSTM's so we have to take double units(256*2=512) for single decoder lstm
# LSTM using encoder's final states as initial state
decoder_lstm = LSTM(512, return_sequences=True, return_state=True) 
decoder_outputs, _, _ = decoder_lstm(dec_emb, initial_state=encoder_states)

# Using Attention Layer
attention_layer = AttentionLayer()
attention_result, attention_weights = attention_layer([encoder_outputs1, decoder_outputs])

# Concat attention output and decoder LSTM output 
decoder_concat_input = Concatenate(axis=-1, name='concat_layer')([decoder_outputs, attention_result])
# Dense layer with softmax
decoder_dense = Dense(len(vocab), activation='softmax')
decoder_outputs = decoder_dense(decoder_concat_input)


# Define the model
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

In [14]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(x,y, train_size=0.8)

In [15]:
# compile model
model.compile(optimizer='rmsprop', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Define callbacks
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
checkpoint = ModelCheckpoint("give Your path to save check points", monitor='val_accuracy')
early_stopping = EarlyStopping(monitor='val_accuracy', patience=5)
callbacks_list = [checkpoint, early_stopping]

# Training set
encoder_input_data = X_train

# To make same as target data skip last number which is just padding
decoder_input_data = y_train[:,:-1]

# Decoder target data has to be one step ahead so we are taking from 1 as told in keras docs
decoder_target_data =  y_train[:,1:]

# devlopment set
encoder_input_test = X_test
decoder_input_test = y_test[:,:-1]
decoder_target_test=  y_test[:,1:]

model.summary()
EPOCHS= 1
history = model.fit([encoder_input_data, decoder_input_data],decoder_target_data, 
                    epochs=EPOCHS, 
                    batch_size=128,
                    validation_data = ([encoder_input_test, decoder_input_test],decoder_target_test),
                    callbacks= callbacks_list)

# Don't forget to save weights of trained model 
model.save("model_new.h5") # can give whole path to save model

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 29)]         0           []                               
                                                                                                  
 embedding (Embedding)          (None, 29, 654)      23544       ['input_1[0][0]']                
                                                                                                  
 input_2 (InputLayer)           [(None, None)]       0           []                               
                                                                                                  
 bidirectional (Bidirectional)  [(None, 29, 512),    1865728     ['embedding[0][0]']              
                                 (None, 256),                                                 

In [16]:
del model 
custom_obj = {"CustomLayer": AttentionLayer}
model = load_model('model_new.h5', custom_objects={'AttentionLayer': AttentionLayer})

encoder_model = Model(encoder_inputs, outputs = [encoder_outputs1, final_enc_h, final_enc_c])

# Decoder Inference
decoder_state_h = Input(shape=(512,)) # This numbers has to be same as units of lstm's on which model is trained
decoder_state_c = Input(shape=(512,))

# we need hidden state for attention layer
decoder_hidden_state_input = Input(shape=(29,512)) 
# get decoder states
dec_states = [decoder_state_h, decoder_state_c]

# embedding layer 
dec_emb2 = dec_emb_layer(decoder_inputs)
decoder_outputs2, state_h2, state_c2 = decoder_lstm(dec_emb2, initial_state=dec_states)

# Attention inference
attention_result_inf, attention_weights_inf = attention_layer([decoder_hidden_state_input, decoder_outputs2])
decoder_concat_input_inf = Concatenate(axis=-1, name='concat_layer')([decoder_outputs2, attention_result_inf])

dec_states2= [state_h2, state_c2]
decoder_outputs2 = decoder_dense(decoder_concat_input_inf)

# get decoder model
decoder_model= Model(
                    [decoder_inputs] + [decoder_hidden_state_input, decoder_state_h, decoder_state_c],
                     [decoder_outputs2]+ dec_states2)

In [17]:
def get_predicted_sentence(input_seq):
    # Encode the input as state vectors.
    enc_output, enc_h, enc_c = encoder_model.predict(input_seq)
    # print('------DONE ENCODING------')
  
    # Generate empty target sequence of length 1.
    target_seq = np.zeros((1,1))
    # print('------DONE EMPTY TARGET------')
    
    # Populate the first character of target sequence with the start character.
    target_seq[0, 0] = vocab['<START>']
    # print('------DONE START TAG------')
    
    # Sampling loop for a batch of sequences
    # (to simplify, here we assume a batch of size 1).
    stop_condition = False
    decoded_sentence = ''
    
    while not stop_condition:
        output_tokens, h, c = decoder_model.predict([target_seq] + [enc_output, enc_h, enc_c ])
        # print('------DONE OUTPUT TOKEN------')
        # Sample a token
        sampled_token_index = np.argmax(output_tokens[0, -1, :])
        # print('------DONE SAMPLE TOKEN------')
        # convert max index number to marathi word
        sampled_char = ctable.indices_char[sampled_token_index]
        # print('------DONE CONVERT TOKEN------')
        # aapend it to decoded sent
        decoded_sentence += ' '+sampled_char
        # print('------DONE APPEND TOKEN------')
        
        # Exit condition: either hit max length or find stop token.
        if (sampled_char == '<END>' or len(decoded_sentence.split()) >= 29):
            stop_condition = True
            # print('------DONE STOP------')
        
        # Update the target sequence (of length 1).
        target_seq = np.zeros((1,1))
        target_seq[0, 0] = sampled_token_index
        
        # Update states
        enc_h, enc_c = h, c
    
    return decoded_sentence

In [18]:
temp = [np.random.choice(10) for i in range(20)]

In [None]:
for i in temp:
    print('TRUE: ', ''.join(ctable.decode(y_test[i][1:])), '------------ ', 'PRED: ', ''.join(get_predicted_sentence(X_test[i].reshape(1,29)).split()[:-1]))

In [None]:
encoder_model.save('encoder.h5')
encoder_model.save('decoder.h5')









In [None]:
from google.colab import files
files.download('encoder.h5') 
files.download('decoder.h5') 