In [1]:
import tensorflow as tf
import numpy as np
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, Dropout, Embedding, LSTM, Bidirectional
from keras.datasets import imdb 
from keras.layers import *
from keras.models import *
from keras import backend as K

In [2]:
n_unique_words = 10000
(x_train, y_train),(x_test, y_test) = imdb.load_data(num_words=n_unique_words)

In [4]:
maxlen = 200
batch_size=128
x_train = pad_sequences(x_train, maxlen=maxlen)
x_test = pad_sequences(x_test, maxlen=maxlen)
y_train = np.array(y_train)
y_test = np.array(y_test) 

In [5]:
class Attention(Layer):
  def __init__(self, return_sequences=True):
      self.return_sequences = return_sequences
      super(Attention,self).__init__()

  def build(self, input_shape):
      self.W=self.add_weight(name="att_weight", shape=(input_shape[-1],1),
                              initializer="normal")
      self.b=self.add_weight(name="att_bias", shape=(input_shape[1],1),
                              initializer="zeros")

      super(Attention,self).build(input_shape)
  def call(self, x):
      e = K.tanh(K.dot(x,self.W)+self.b)
      a = K.softmax(e, axis=1)
      output = x*a
      if self.return_sequences:

          return output
      return K.sum(output, axis=1)

In [6]:
model = Sequential()
model.add(Embedding(n_unique_words, 128, input_length=maxlen))
model.add(Bidirectional(LSTM(64, return_sequences=True)))
model.add(Attention(return_sequences=True)) # receive 3D and output 3D
model.add(LSTM(32))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))
model.summary()
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) 


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 200, 128)          1280000   
_________________________________________________________________
bidirectional (Bidirectional (None, 200, 128)          98816     
_________________________________________________________________
attention (Attention)        (None, 200, 128)          328       
_________________________________________________________________
lstm_1 (LSTM)                (None, 32)                20608     
_________________________________________________________________
dropout (Dropout)            (None, 32)                0         
_________________________________________________________________
dense (Dense)                (None, 1)                 33        
Total params: 1,399,785
Trainable params: 1,399,785
Non-trainable params: 0
______________________________________________

In [7]:
history3d=model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=12,
          validation_data=(x_test, y_test))

print(history3d.history['loss'])
print(history3d.history['accuracy']) 

Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
[0.6344530582427979, 0.4405810534954071, 0.3037141263484955, 0.22802868485450745, 0.17116324603557587, 0.16467730700969696, 0.13747334480285645, 0.09135077148675919, 0.07895759493112564, 0.05699450150132179, 0.08110612630844116, 0.05559977516531944]
[0.6143199801445007, 0.8197600245475769, 0.887440025806427, 0.91948002576828, 0.9437599778175354, 0.9434800148010254, 0.9544399976730347, 0.9744399785995483, 0.9783999919891357, 0.9860799908638, 0.9752399921417236, 0.9840800166130066]
