In [0]:
from google.colab import drive
drive.mount('/content/drive/')

In [0]:
!pip install -q keras

In [0]:
!pip install -q pydrive

In [0]:
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

In [0]:
import numpy as np
from keras.models import Model
from keras.layers import Input, LSTM, GRU, Dense, Embedding,Bidirectional, RepeatVector, Concatenate, Activation, Dot, Lambda
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
import keras.backend as K

In [0]:
if len(K.tensorflow_backend._get_available_gpus()) > 0:
  print("GPU Available !!!!!")
  from keras.layers import CuDNNLSTM as LSTM
  from keras.layers import CuDNNGRU as GRU

In [0]:
from sklearn.model_selection import train_test_split

def getData(language='ben',N_MAX=1000000):
    train_input_texts = []  # sentence in original language
    train_target_texts = []  # sentence in target language
    train_target_texts_inputs = []  # sentence in target language offset by 1

    test_input_texts = []  # sentence in original language
    test_target_texts = []  # sentence in target language
    test_target_texts_inputs = []  # sentence in target language offset by 1

    k=0

    input_list=[]
    translation_list=[]
    for line in open(str(language)+'.txt', encoding='utf-8'):

        k += 1
        if k > N_MAX:
            break


        if '\t' not in line:
            continue

        input_text, translation = line.rstrip().split('\t')

        input_list.append(input_text)
        translation_list.append(translation)
        
    print('Total data got: '+str(k))

    train_input_list,test_input_list,train_translation_list,test_translation_list=train_test_split(input_list,translation_list,
                                                                                            test_size=0.2,random_state=42)
    #print(len(train_input_list))

    for input_text,translation in zip(train_input_list,train_translation_list):

        target_text = translation + ' <eos>'
        target_text_input = '<sos> ' + translation

        train_input_texts.append(input_text)
        train_target_texts.append(target_text)
        train_target_texts_inputs.append(target_text_input)

    test_input_texts=list(test_input_list)
    test_target_texts=list(test_translation_list)

    print("num train samples:", len(train_input_texts))
    print(train_target_texts[0])
    

    return train_input_texts,train_target_texts,train_target_texts_inputs,test_input_texts,test_target_texts

In [0]:
MAX_TOKENS=20000
EMBEDDING_SIZE=100


input_texts,target_texts,target_texts_inputs,test_input_texts,test_target_texts=getData(language='ben',N_MAX=10000)
N=len(input_texts)

In [0]:
input_tokenizer=Tokenizer(num_words=MAX_TOKENS)
input_tokenizer.fit_on_texts(input_texts)
input_sequences=input_tokenizer.texts_to_sequences(input_texts)
MAX_LEN_INPUT=max(len(i)for i in input_sequences)
encoder_inputs=pad_sequences(input_sequences,maxlen=MAX_LEN_INPUT)

output_tokenizer=Tokenizer(num_words=MAX_TOKENS,filters='')
output_tokenizer.fit_on_texts(['<sos>']+target_texts)
target_sequences=output_tokenizer.texts_to_sequences(target_texts)
MAX_LEN_OUTPUT=max(len(i)for i in target_sequences)
decoder_targets=pad_sequences(target_sequences,maxlen=MAX_LEN_OUTPUT,padding='post')

target_input_sequences=output_tokenizer.texts_to_sequences(target_texts_inputs)
decoder_inputs=pad_sequences(target_input_sequences,maxlen=MAX_LEN_OUTPUT,padding='post')

input_vocab_size=len(input_tokenizer.word_index)+1
output_vocab_size=len(output_tokenizer.word_index)+1

In [0]:
f = open('glove.6B.100d.txt',encoding='utf-8')
embeddings={}
for line in f:
    values = line.split()
    word = values[0]
    coefs = np.asarray(values[1:], dtype='float32')
    embeddings[word] = coefs
f.close()

print('Total %s word vectors.' % len(embeddings))

embedding_matrix = np.zeros((input_vocab_size, EMBEDDING_SIZE))
for word, i in input_tokenizer.word_index.items():
	embedding_vector = embeddings.get(word)
	if embedding_vector is not None:
		embedding_matrix[i] = embedding_vector

In [0]:
target_one_hot_vector=np.zeros((len(target_texts),MAX_LEN_OUTPUT,output_vocab_size),dtype='float32')

for sample,text in enumerate(decoder_targets):
    for word_position,word_index in enumerate(text):
        target_one_hot_vector[sample,word_position,word_index]=1

In [0]:
test_input_sequences=input_tokenizer.texts_to_sequences(test_input_texts)
test_encoder_inputs=pad_sequences(test_input_sequences,maxlen=MAX_LEN_INPUT)


test_target_sequences=output_tokenizer.texts_to_sequences(test_target_texts)
test_decoder_targets=pad_sequences(test_target_sequences,maxlen=MAX_LEN_OUTPUT,padding='post')

In [0]:
test_target_one_hot_vector=np.zeros(shape=(len(test_target_texts),MAX_LEN_OUTPUT,output_vocab_size),dtype='float32')

for sample,text in enumerate(test_decoder_targets):
    for word_position,word_index in enumerate(text):
        test_target_one_hot_vector[sample,word_position,word_index]=1

In [0]:
max_input_length=MAX_LEN_INPUT
max_output_length=MAX_LEN_OUTPUT
embedding_size=EMBEDDING_SIZE


NN_dim_1=256
NN_dim_2=10
NN_dim_3=256

output_words=output_vocab_size

In [0]:
def time_distributed_softmax(x):
    # x = N x Time x Dimension
    normalize_x=x-K.max(x,axis=1,keepdims=True) # max_x[i,j] = max(x[i,:,j]) as axis = 1
    exp_x=K.exp(normalize_x)
    softmax=exp_x / K.sum(exp_x,axis=1,keepdims=True) # sum_x[i,j] = sum(x[i,:,j]) as axis = 1
    return softmax

In [0]:
def stack(x):
    x = K.stack(x)
    x = K.permute_dimensions(x, pattern=(1, 0, 2))
    return x

In [0]:
encoder_input=Input(shape=(max_input_length,),)
encoder_embedding = Embedding(input_dim=embedding_matrix.shape[0], output_dim=embedding_size,
                              weights=[embedding_matrix],
                              input_length=max_input_length)


encoder_embedding_layer=encoder_embedding(encoder_input)
layer1=Bidirectional(LSTM(NN_dim_1,return_sequences=True))
encoder_output=layer1(encoder_embedding_layer)

In [0]:
repeat_decoder_state=RepeatVector(max_input_length)

enc_dec_state_concat=Concatenate(axis=-1)

dense1=Dense(NN_dim_2,activation='tanh')

dense2 = Dense(1, activation=time_distributed_softmax)

h_alphas_dot=Dot(axes=1)

In [0]:
def attention_for_one_step(encoder_states_h,decoder_state_s_t_prev):
  #######################################   ATTENTION LAYER for 1 Step #########################################


  # it is one step, which will repeated in the decoder max_output_length times
  # will return context vector for one time step

  # before decoder_state_s_t_prev dim = NN_dim_3
  decoder_state_s_t_prev=repeat_decoder_state(decoder_state_s_t_prev)
  # after decoder_state_s_t_prev dim = (max_input_length,NN_dim_3)
  # we need to repeat it to concat with all encoder_states_h

  # encoder_states_h dim = (max_input_length,NN_dim_1 *2) as Bidirectional
  
  h_s_t_prev_concat=enc_dec_state_concat([encoder_states_h,decoder_state_s_t_prev])
  # h_s_t_prev_concat dim = (max_input_length,NN_dim_1 *2 + NN_dim_3)
  
  attn_layer1=dense1(h_s_t_prev_concat)

  # Now we get alphas with special over time activation softmax
  alphas=dense2(attn_layer1)

  context_vector=h_alphas_dot([alphas,encoder_states_h]) # context vector for one time step. phew!

  return context_vector

In [0]:
# max_output_length is which we define in Padding
decoder_target_input=Input(shape=(max_output_length,),)

decoder_embedding = Embedding(input_dim=output_words, output_dim=embedding_size)


decoder_embedding_layer=decoder_embedding(decoder_target_input)


layer1=LSTM(NN_dim_3,return_state=True)
layer2=Dense(output_words,activation='softmax')

s0=Input(shape=(NN_dim_3,),)
c0 = Input(shape=(NN_dim_3,), )

s=s0
c=c0

outputs=[]

for t in range(max_output_length):
    decoder_state_s_t_prev=s
    
    #######################################   ATTENTION LAYER for 1 Step #########################################
    
    
    context_vector=attention_for_one_step(encoder_output,decoder_state_s_t_prev)
    
    #######################################   ATTENTION LAYER for 1 Step #########################################

    target_input_t=Lambda(lambda x: x[:,t:t+1])
    teacher_forcing=target_input_t(decoder_embedding_layer)

    context_teacher_concat=Concatenate(axis=2)
    decoder_layer1_input=context_teacher_concat([context_vector,teacher_forcing])

    decoder_layer1,s,c=layer1(decoder_layer1_input,initial_state=[s,c])
    decoder_output=layer2(decoder_layer1)

    outputs.append(decoder_output)

In [0]:
#outputs shape =  max_output_length x N x Dimension. We need to convert it to N x max_output_length x Dimension
change_shape=Lambda(stack)
outputs=change_shape(outputs)

In [0]:
initial_s=Input(shape=(NN_dim_3,))
initial_c = Input(shape=(NN_dim_3,))

train_model=Model(inputs=[encoder_input,decoder_target_input,s0,c0],outputs=outputs)

In [0]:
from keras import callbacks

def callback(model_name,tf_log_dir_name='./tf-log/',patience_lr=10,):
    cb = []
    """
    Tensorboard log callback
    """
    #tb = callbacks.TensorBoard(log_dir=tf_log_dir_name, histogram_freq=0)
    #cb.append(tb)

    """
    Model-Checkpoint
    """
    #m = callbacks.ModelCheckpoint(filepath=model_name,monitor='val_loss',mode='auto',save_best_only=True)
    #cb.append(m)

    """
    Reduce Learning Rate
    """
    reduce_lr_loss = callbacks.ReduceLROnPlateau(monitor='loss', factor=0.1, patience=patience_lr, verbose=1, min_delta=1e-4, mode='min')
    cb.append(reduce_lr_loss)

    """
    Early Stopping callback
    """
    # Uncomment for usage
    early_stop = callbacks.EarlyStopping(monitor='val_acc', min_delta=0, patience=5, verbose=1, mode='auto')
    cb.append(early_stop)



    return cb

In [0]:
train_model.compile(optimizer='rmsprop',loss='categorical_crossentropy',metrics=['accuracy'])
#train_model.summary()

s_c = np.zeros((N, NN_dim_3))  # initial [s, c]

cb=callback(model_name='/content/drive/My Drive/Machine Learning/Udemy/Deep Learning Advanced NLP and RNNs/Attention/Neural Machine Translation/Model_NML_1.h5')


history=train_model.fit([encoder_inputs, decoder_inputs, s_c, s_c], target_one_hot_vector,
            epochs=50, batch_size=64, validation_split=0.2, shuffle=True, callbacks=cb)

Train on 2807 samples, validate on 702 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 00044: early stopping


In [0]:

test_encoder_model=Model(encoder_input,encoder_output)


'''#########################  Start Input from the place, at where the graph is changing. Here we do not have the previous decoder embedding
and so, teacher forcing is going to be changed. 
For this reason we need to build a seperate encoder model because previous encoder was connected to teacher forcing. ############ ''' 

test_decoder_input=Input(shape=(1,),)
decoder_single_input=decoder_embedding(test_decoder_input)

encoder2decoder_input=Input(shape=(max_input_length,2*NN_dim_1,),)

context_vector=attention_for_one_step(encoder2decoder_input,s0)
decoder_layer1_input=context_teacher_concat([context_vector,decoder_single_input])

decoder_layer1,s,c=layer1(decoder_layer1_input,initial_state=[s0,c0])
outputs=layer2(decoder_layer1)


test_decoder_model=Model(inputs=[test_decoder_input,encoder2decoder_input,s0,c0],outputs=[outputs,s,c])


In [0]:
input_idx2word={i:w for w,i in input_tokenizer.word_index.items()}
output_idx2word={i:w for w,i in output_tokenizer.word_index.items()}


def decode_sequence(input_sequence):
  encoder2decoder=test_encoder_model.predict(input_sequence)
  
  target_input=np.zeros(shape=(1,1))
  target_input[0,0]=output_tokenizer.word_index['<sos>']
  #target_input[0,0]=7
  
  eos=output_tokenizer.word_index['<eos>']
  
  s= np.zeros((1, NN_dim_3))
  c = np.zeros((1, NN_dim_3))
  
  outputs=[]
  
  for i in range(max_output_length):

    o,s,c=test_decoder_model.predict([target_input,encoder2decoder,s,c])
    
    #print(o.shape)
    word_idx=np.argmax(o.flatten())
    #print(word_idx)
    
    if(eos==word_idx):
      break
    
    if(word_idx>0):
      outputs.append(output_idx2word[word_idx])
    target_input[0,0]=word_idx
   
  return ' '.join(outputs)
    
    
  

In [0]:
for i in range(len(test_input_texts)):
  print('Input: ' + test_input_texts[i])
  print('Output: ' + test_target_texts[i])
  print('Predicted Output: '+ decode_sequence(test_encoder_inputs[i:i+1]))

Input: How could they forget us?
Output: তারা আমাদের কী করে ভুলে যেতে পারে?
Predicted Output: তাঁরা আমাদের কী ভুলে যেতে পারেন?
Input: At what time does it close?
Output: কটার সময এটা বন্ধ হয়?
Predicted Output: কটার কটার সময় বন্ধ হয়?
Input: You should know better.
Output: আপনিই ভালো করে জানেন।
Predicted Output: তোমরাই ভালো করে জানো।
Input: They screamed.
Output: তাঁরা চিৎকার করলেন।
Predicted Output: ওরা চেঁচালো।
Input: My head aches.
Output: আমার মাথা ব্যাথা করছে।
Predicted Output: আমার বাবা ধরো।
Input: Grab Tom.
Output: টমকে ধরুন।
Predicted Output: টমকে ধরো।
Input: I shouldn't have logged off.
Output: আমার লগ আউট করা উচিৎ হয়নি।
Predicted Output: আমি তিনটে গেছিলাম।
Input: What is the distance from here to the station?
Output: এখান থেকে স্টেশনের দূরত্ব কতটা?
Predicted Output: ট্রেনটা কটার সময় ছাড়ে?
Input: Tom is taller than Mary.
Output: টম মেরির থেকে লম্বা।
Predicted Output: টম মেরির থেকে রয়েছে।
Input: Do you want to eat now or later?
Output: তুমি কি এখন খেতে চাও না পরে?
Predicted Outpu