In [None]:
import os, sys

from keras.models import Model
from keras.layers import Input,LSTM,GRU,Dense,Embedding,Bidirectional,RepeatVector
from keras.layers import Concatenate,Activation, Add ,Dot,Lambda, TimeDistributed
from keras.preprocessing.text import Tokenizer 
from keras.preprocessing.sequence import pad_sequences
import keras.backend as K

import numpy as np
import matplotlib.pyplot as plt 

In [None]:
if len(K.tensorflow_backend._get_available_gpus())>0:
    from keras.layers import CuDNNLSTM as LSTM
    from keras.layers import CuDNNGRU as GRU

In [None]:
BATCH_SIZE=8
EPOCHS=5
LATENT_DIM=256
NUM_EXAMPLES=1000
FETC_EXAMPLES=200000
MAX_NUM_WORDS=40000
EMBEDDING_DIM=50


In [None]:
# Load data "tur.txt"
temporal_encoder_data=[]
temporal_decoder_data=[]

t=0

for line in open("tur.txt"):
    t+=1
    
    if t > FETC_EXAMPLES:
        break
        
    if "\t" not in line:
        continue
    
    encoder_data,decoder_data=line.rstrip().split("\t")
    
    temporal_encoder_data.append(encoder_data)
    temporal_decoder_data.append(decoder_data)

print("num samples :",len(temporal_encoder_data))

In [None]:
input_texts=[]
target_texts_outputs=[]
target_texts_inputs=[]

indices=np.arange(FETC_EXAMPLES)
np.random.shuffle(indices)

restricted_indices=indices[:NUM_EXAMPLES]

for i in range(NUM_EXAMPLES):
    
    input_texts.append(temporal_encoder_data[restricted_indices[i]])
    target_texts_inputs.append("<sos> "+temporal_decoder_data[restricted_indices[i]])
    target_texts_outputs.append(temporal_decoder_data[restricted_indices[i]]+" <eos>")

**Tokenizer Encoder Part**

In [None]:
tokenizer_inputs=Tokenizer(num_words=MAX_NUM_WORDS)
tokenizer_inputs.fit_on_texts(input_texts)
input_sequences=tokenizer_inputs.texts_to_sequences(input_texts)

In [None]:
word2idx_inputs=tokenizer_inputs.word_index

In [None]:
max_len_input=max(len(i) for i in input_sequences)
max_len_input

**Tokenizer Decoder Part**

In [None]:
tokenizer_outputs=Tokenizer(num_words=MAX_NUM_WORDS,filters="")
tokenizer_outputs.fit_on_texts(target_texts_inputs+target_texts_outputs)
target_sequence_outputs=tokenizer_outputs.texts_to_sequences(target_texts_outputs)
target_sequence_inputs=tokenizer_outputs.texts_to_sequences(target_texts_inputs)

In [None]:
word2idx_outputs=tokenizer_outputs.word_index

In [None]:
num_words_outputs=len(word2idx_outputs)+1
num_words_outputs

In [None]:
max_len_target=max(len(s) for s in target_sequence_outputs)
max_len_target

**Pad Sequences Part**

In [None]:
encoder_inputs = pad_sequences(input_sequences, maxlen=max_len_input)
print(encoder_inputs.shape)
print(encoder_inputs[1])

In [None]:
decoder_inputs=pad_sequences(target_sequence_inputs,maxlen=max_len_target,padding="post")
decoder_targets=pad_sequences(target_sequence_outputs,maxlen=max_len_target,padding="post")

print(decoder_inputs[0])
print(decoder_targets[0])

**Word Vectors Part**

In [None]:
# Load word vectors
word2vec={}

with open("glove.6B.50d.txt") as f:
    
    for line in f:
        values=line.split()
        word=values[0]
        vec=np.asarray(values[1:],dtype="float32")
        word2vec[word]=vec

    print(len(word2vec))

In [None]:
# Create embedding matrix

num_words_inputs=min(MAX_NUM_WORDS,len(word2idx_inputs)+1)
embedding_matrix=np.zeros((num_words_inputs,EMBEDDING_DIM))

for word,i in word2idx_inputs.items():
    if i<MAX_NUM_WORDS:
        embedding_vector=word2vec.get(word)
        if embedding_vector is not None:
            embedding_matrix[i]:embedding_vector

**Transform Decoder Target Vector**

In [None]:
num_words_outputs=min(MAX_NUM_WORDS,len(word2idx_outputs)+1)

decoder_targets_one_hot=np.zeros(
    (
        len(input_texts),
        max_len_target,
        num_words_outputs
    ),
    dtype="float32"
)

decoder_targets_one_hot.shape

In [None]:
for i,d in enumerate(decoder_targets):
    for t,word in enumerate(d):
        decoder_targets_one_hot[i,t,word]=1

In [None]:
decoder_targets.shape

### Model Part

In [None]:
# embedding layer
embedding_layer=Embedding(
    num_words_inputs,
    EMBEDDING_DIM,
    weights=[embedding_matrix],
    input_length=max_len_input,
    name="encoder_embedding"
    #trainable=True
)

In [None]:
encoder_inputs_placeholder=Input(shape=(max_len_input,),name="encoder_inputs")
x=embedding_layer(encoder_inputs_placeholder,)

encoder=GRU(LATENT_DIM,return_sequences=True,return_state=True,unroll=True)
encoder_outputs,encoder_hidden=encoder(x)



In [None]:
decoder_inputs_placeholder=Input(shape=(max_len_target,),name="decoder_inputs")
decoder_embedding=Embedding(num_words_outputs,256)
decoder_inputs_x=decoder_embedding(decoder_inputs_placeholder)


decoder=GRU(LATENT_DIM,return_state=True)
decoder_dense=Dense(num_words_outputs,activation="softmax")


#
dense_w1=Dense(128)
dense_w2=Dense(128)
dense_v=Dense(1)

attn_dot = Dot(axes=1)
attention_concat_layer=Concatenate(axis=-1)
expand_layer = Lambda(lambda x: K.expand_dims(x, axis=1))
attn_repeat_layer = RepeatVector(max_len_input)

In [None]:
def one_step_attention(hidden,encoder_outputs):
    #print(hidden.shape)
    #hidden = attn_repeat_layer(hidden)
    score=dense_v(Activation("tanh")(Add()([dense_w1(encoder_outputs),dense_w2(expand_layer(hidden))])))
    #print(score)
    #attention_weights=K.softmax(score,axis=1)
    attention_weights=Activation("softmax")(score)

    
    c=attn_dot([attention_weights,encoder_outputs])
    #print("c",c)
    #context_vector=attention_weights*encoder_outputs
    #context_vector_reduce_sum=K.sum(context_vector,axis=1)
    
    return c


In [None]:
# first hidden for decoder
hidden=encoder_hidden
outputs=[]

for t in range(max_len_target):   
    c=one_step_attention(hidden,encoder_outputs)

    selector = Lambda(lambda x: x[:, t:t+1])
    xt=selector(decoder_inputs_x)

    x=attention_concat_layer([c,xt])

    output,state=decoder(x)
    pred=decoder_dense(output)

    hidden=state
    outputs.append(pred)
    


In [None]:
def stack_and_transpose(x):
  # x is a list of length T, each element is a batch_size x output_vocab_size tensor
    x = K.stack(x) # is now T x batch_size x output_vocab_size tensor

    x = K.permute_dimensions(x, pattern=(1, 0, 2)) # is now batch_size x T x output_vocab_size
    return x

In [None]:
stacker = Lambda(stack_and_transpose)
outputs = stacker(outputs)

In [None]:
model=Model(inputs=[encoder_inputs_placeholder,decoder_inputs_placeholder],outputs=outputs)

In [None]:
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot

SVG(model_to_dot(model).create(prog='dot', format='svg'))

In [None]:
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
r = model.fit(
  [encoder_inputs, decoder_inputs], decoder_targets_one_hot,
  batch_size=BATCH_SIZE,
  epochs=100,
  validation_split=0.2
)