In [1]:
import os
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from keras.models import Model,Sequential
# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory
from keras.layers import GRU,Embedding,Bidirectional,InputLayer,Input,Dense,TimeDistributed,Activation,RepeatVector,Bidirectional,Dot,Concatenate,LSTM
from keras.optimizers import Adam
from keras.preprocessing.text import Tokenizer
print(os.listdir("../input"))
from keras.preprocessing.sequence import pad_sequences
from keras.losses import sparse_categorical_crossentropy
from keras.utils.np_utils import to_categorical
# Any results you write to the current directory are saved as output.

Using TensorFlow backend.


['small_vocab_en.txt', 'small_vocab_fr.txt']


In [2]:
with open("../input/small_vocab_en.txt") as f:
    eng=f.read()

In [3]:
with open("../input/small_vocab_fr.txt") as f:
    fr=f.read()

In [4]:
fr=fr.split("\n")

In [5]:
eng=eng.split("\n")

In [6]:
from collections import Counter

In [7]:
engdict=Counter([token for sent in eng for token in sent.split(" ")])

In [8]:
frdict=Counter(token for sent in fr for token in sent.split(" "))

In [9]:
eng_vocabsize=len(engdict.keys())

In [10]:
fr_vocabsize=len(frdict.keys())

In [11]:
#Tokenize words into indexes we use keras tokenizer
engtokenizer=Tokenizer()
engtokenizer.fit_on_texts(eng)

In [12]:
eng_token=engtokenizer.texts_to_sequences(eng)

In [13]:
frtokenizer=Tokenizer()
frtokenizer.fit_on_texts(fr)
fr_token=frtokenizer.texts_to_sequences(fr)

In [14]:
eng_ind=engtokenizer.word_index

In [15]:
fr_ind=frtokenizer.word_index

In [16]:
eng_token_pad=pad_sequences(eng_token,padding='post')

In [17]:
fr_token_pad=pad_sequences(fr_token,padding='post')

In [18]:
eng_token_pad.shape

(137861, 15)

In [19]:
fr_token_pad.shape

(137861, 21)

In [20]:
ind_w=frtokenizer.index_word

In [21]:
ind_w[0]='<PAD>'

In [22]:
max_eng_length=eng_token_pad.shape[1]
max_fr_length=fr_token_pad.shape[1]

In [23]:
word_ind=frtokenizer.word_index

In [24]:
word_ind['<pad>']=0

In [25]:
Tx=max_eng_length
Ty=max_fr_length

In [26]:
densor=Dense(32,activation='relu')
densor1=Dense(1,activation='relu')
repeat=RepeatVector(Tx)
activator=Activation('softmax')
dotor=Dot(axes=1)

In [27]:
def one_step_attention(a,st_prev):
    st_prev1=repeat(st_prev)  # m X Tx X n_s
    concat=Concatenate(axis=-1)([a,st_prev1])
    dens=densor(concat)
    dens1=densor1(dens)  #weights for attention  m X Tx
    context=dotor([dens1,a])
    context=activator(context)
    return context

In [28]:
n_a=128
n_s=128
post_activation_lstm=LSTM(n_s,return_state=True)
outputlayer=Dense(len(word_ind),activation='softmax')

In [29]:
eng_w=engtokenizer.word_index
eng_w['<pad>']=0
len1=len(eng_w)

In [30]:
x=Input(shape=(Tx,))
embed=Embedding(len1,64,input_length=Tx)(x)

In [31]:
s0=Input(shape=(n_s,))
c0=Input(shape=(n_s,))
s=s0
c=c0

In [32]:
outputs=[]
a=LSTM(n_a,return_sequences=True)(embed)

In [33]:
for i in range(Ty):
    context=one_step_attention(a,s)
    s,_,c=post_activation_lstm(context,initial_state=[s,c])
    
    out=outputlayer(s)
    outputs.append(out)
    
model=Model(inputs=[x,s0,c0],outputs=outputs)

In [34]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 15)           0                                            
__________________________________________________________________________________________________
embedding_1 (Embedding)         (None, 15, 64)       12800       input_1[0][0]                    
__________________________________________________________________________________________________
input_2 (InputLayer)            (None, 128)          0                                            
__________________________________________________________________________________________________
lstm_2 (LSTM)                   (None, 15, 128)      98816       embedding_1[0][0]                
__________________________________________________________________________________________________
repeat_vec

In [35]:
model.compile(optimizer='adam',metrics=['accuracy'],loss='categorical_crossentropy')

In [36]:
m=len(eng_token_pad)
epochs=5

In [37]:
s0=np.zeros((m,n_s))
c0=np.zeros((m,n_s))


In [38]:
len(word_ind)

346

In [39]:
y_hot=to_categorical(fr_token_pad,len(word_ind))

In [40]:
model.fit([eng_token_pad,s0,c0],list(y_hot.swapaxes(0,1)),epochs=epochs,batch_size=64)

Epoch 1/5
Epoch 2/5

In [41]:
#while training models on machine translation on different attention models
# scalar product gives better accuracy in from the starting but it is slow as compared to attention applied
# in this notebook