In [1]:
import tensorflow as tf
import numpy as np
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, Dropout, Embedding, LSTM, Bidirectional
from keras.datasets import imdb 
from keras.layers import *
from keras.models import *
from keras import backend as K
from sklearn.model_selection import train_test_split

In [2]:
n_unique_words = 10000
(x_train, y_train),(x_test, y_test) = imdb.load_data(num_words=n_unique_words)

In [3]:
maxlen = 200
batch_size=128
x_train = pad_sequences(x_train, maxlen=maxlen)
x_test = pad_sequences(x_test, maxlen=maxlen)
y_train = np.array(y_train)
y_test = np.array(y_test) 
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.2, random_state=2022)

In [4]:
class Attention(Layer):
  def __init__(self, return_sequences=True):
      self.return_sequences = return_sequences
      super(Attention,self).__init__()

  def build(self, input_shape):
      self.W=self.add_weight(name="att_weight", shape=(input_shape[-1],1),
                              initializer="normal")
      self.b=self.add_weight(name="att_bias", shape=(input_shape[1],1),
                              initializer="zeros")

      super(Attention,self).build(input_shape)
  def call(self, x):
      e = K.tanh(K.dot(x,self.W)+self.b)
      a = K.softmax(e, axis=1)
      output = x*a
      if self.return_sequences:

          return output
      return K.sum(output, axis=1)

In [5]:
model = Sequential()
model.add(Embedding(n_unique_words, 128, input_length=maxlen))
model.add(Bidirectional(LSTM(64, return_sequences=True)))
model.add(Attention(return_sequences=True))
model.add(LSTM(32))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))
model.summary()
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) 


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 200, 128)          1280000   
_________________________________________________________________
bidirectional (Bidirectional (None, 200, 128)          98816     
_________________________________________________________________
attention (Attention)        (None, 200, 128)          328       
_________________________________________________________________
lstm_1 (LSTM)                (None, 32)                20608     
_________________________________________________________________
dropout (Dropout)            (None, 32)                0         
_________________________________________________________________
dense (Dense)                (None, 1)                 33        
Total params: 1,399,785
Trainable params: 1,399,785
Non-trainable params: 0
______________________________________________

In [6]:
history3d=model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=12,
          validation_data=(x_val, y_val))

Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12


In [8]:
cross_entropy = model.evaluate(x_test, y_test)
print("Test loss and accuracy: ",cross_entropy)

Test loss and accuracy:  [0.6891041994094849, 0.8475199937820435]


In [9]:
y_preds=model.predict(x_test)
print("First 10 test results:")
for i in range(10):
    print("Prediction: {} Actual: {}".format(y_preds[i],y_test[i]))

First 10 test results:
Prediction: [0.04758357] Actual: 0
Prediction: [0.9995814] Actual: 1
Prediction: [0.12353067] Actual: 1
Prediction: [0.36380428] Actual: 0
Prediction: [0.9995702] Actual: 1
Prediction: [0.99945325] Actual: 1
Prediction: [0.99951017] Actual: 1
Prediction: [0.00077298] Actual: 0
Prediction: [0.99961734] Actual: 0
Prediction: [0.9996784] Actual: 1
