## Text Classification using Attention Mechanism in Keras

In [2]:
import tensorflow as tf
from keras_preprocessing import sequence
from tensorflow import keras
from tensorflow.python.keras import Input

### Step1. Load imdb dataset

In [3]:
vocab_size = 10000

In [4]:
padding_id = 0
start_id = 1
oov_id = 2
index_offset = 2

In [5]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.imdb.load_data(path="imdb.npz",
                                                      num_words=vocab_size,
                                                      skip_top=0,
                                                      maxlen=None,
                                                      seed=113,
                                                      start_char=start_id,
                                                      oov_char=oov_id,
                                                      index_from=index_offset)       

Keras has dataset which already 

In [6]:
x_train.shape

(25000,)

In [7]:
x_train

array([list([1, 13, 21, 15, 42, 529, 972, 1621, 1384, 64, 457, 4467, 65, 3940, 3, 172, 35, 255, 4, 24, 99, 42, 837, 111, 49, 669, 2, 8, 34, 479, 283, 4, 149, 3, 171, 111, 166, 2, 335, 384, 38, 3, 171, 4535, 1110, 16, 545, 37, 12, 446, 3, 191, 49, 15, 5, 146, 2024, 18, 13, 21, 3, 1919, 4612, 468, 3, 21, 70, 86, 11, 15, 42, 529, 37, 75, 14, 12, 1246, 3, 21, 16, 514, 16, 11, 15, 625, 17, 2, 4, 61, 385, 11, 7, 315, 7, 105, 4, 3, 2222, 5243, 15, 479, 65, 3784, 32, 3, 129, 11, 15, 37, 618, 4, 24, 123, 50, 35, 134, 47, 24, 1414, 32, 5, 21, 11, 214, 27, 76, 51, 4, 13, 406, 15, 81, 2, 7, 3, 106, 116, 5951, 14, 255, 3, 2, 6, 3765, 4, 722, 35, 70, 42, 529, 475, 25, 399, 316, 45, 6, 3, 2, 1028, 12, 103, 87, 3, 380, 14, 296, 97, 31, 2070, 55, 25, 140, 5, 193, 7485, 17, 3, 225, 21, 20, 133, 475, 25, 479, 4, 143, 29, 5534, 17, 50, 35, 27, 223, 91, 24, 103, 3, 225, 64, 15, 37, 1333, 87, 11, 15, 282, 4, 15, 4471, 112, 102, 31, 14, 15, 5344, 18, 177, 31]),
       list([1, 193, 1152, 193, 8254, 77, 227, 

In [8]:
word2idx = tf.keras.datasets.imdb.get_word_index()

In [9]:
len(word2idx)

88584

In [10]:
word2idx

{'ahead': 1401,
 'autopsied': 72570,
 'liberatore': 52427,
 'daines': 35785,
 'spt11': 70739,
 'faggoty': 63940,
 'plow': 58779,
 'laughless': 45087,
 'twitty': 49744,
 'reductionism': 78372,
 'expectedly': 40096,
 'ides': 59207,
 'anyhoo': 26419,
 "schygulla's": 53063,
 'apollonius': 62412,
 'postrevolutionary': 46238,
 "mendez'": 83495,
 'confessions': 12053,
 "anymore'": 52384,
 'logging': 31356,
 "sharks'": 67979,
 'persona': 3567,
 'blech': 23529,
 'actess': 85702,
 'ishtar': 17345,
 'augmenting': 68795,
 'simplify': 40874,
 "typewriter's": 57195,
 'rollo': 25648,
 'pia': 8745,
 'awsome': 23012,
 'symbolizes': 16511,
 "brave'": 85200,
 'donut': 21482,
 'stinkers': 16017,
 'depict': 6354,
 'disrobe': 26274,
 'repulse': 35238,
 "ferula's": 53495,
 '3p0': 77127,
 'pe': 49011,
 'pearson': 32271,
 'assumption': 10878,
 'stainless': 39858,
 'fostering': 34553,
 'an': 32,
 'bellows': 26351,
 "che's": 7258,
 'postponement': 75376,
 'rerecorded': 71012,
 'wondrous': 9830,
 'nickles': 59065

In [11]:
idx2word = {v + index_offset: k for k, v in word2idx.items()}

idx2word[padding_id] = "<PAD>"
idx2word[start_id] = "<START>"
idx2word[oov_id] = "<OOV>" # out of vocablary

In [12]:
len(idx2word)

88587

In [15]:
idx2word

{0: '<PAD>',
 1: '<START>',
 2: '<OOV>',
 3: 'the',
 4: 'and',
 5: 'a',
 6: 'of',
 7: 'to',
 8: 'is',
 9: 'br',
 10: 'in',
 11: 'it',
 12: 'i',
 13: 'this',
 14: 'that',
 15: 'was',
 16: 'as',
 17: 'for',
 18: 'with',
 19: 'movie',
 20: 'but',
 21: 'film',
 22: 'on',
 23: 'not',
 24: 'you',
 25: 'are',
 26: 'his',
 27: 'have',
 28: 'he',
 29: 'be',
 30: 'one',
 31: 'all',
 32: 'at',
 33: 'by',
 34: 'an',
 35: 'they',
 36: 'who',
 37: 'so',
 38: 'from',
 39: 'like',
 40: 'her',
 41: 'or',
 42: 'just',
 43: 'about',
 44: "it's",
 45: 'out',
 46: 'has',
 47: 'if',
 48: 'some',
 49: 'there',
 50: 'what',
 51: 'good',
 52: 'more',
 53: 'when',
 54: 'very',
 55: 'up',
 56: 'no',
 57: 'time',
 58: 'she',
 59: 'even',
 60: 'my',
 61: 'would',
 62: 'which',
 63: 'only',
 64: 'story',
 65: 'really',
 66: 'see',
 67: 'their',
 68: 'had',
 69: 'can',
 70: 'were',
 71: 'me',
 72: 'well',
 73: 'than',
 74: 'we',
 75: 'much',
 76: 'been',
 77: 'bad',
 78: 'get',
 79: 'will',
 80: 'do',
 81: 'also',
 

Padding for input LSTM models

In [57]:
# maxlen: determine the length of the output arrays, padding and trimming
maxlen = 200

In [59]:
x_train = sequence.pad_sequences(x_train, maxlen=maxlen, truncating="post", padding="post", value=padding_id)

In [60]:
x_test = sequence.pad_sequences(x_test, maxlen=maxlen, truncating="post", padding="post", value=padding_id)

### Step 2. Create Attention Layer  
   
Attention helps to keep information in long sequence.

In [96]:
class Attention(tf.keras.Model):
    
    def __init__(self, units):
        # initialize initial values
        super(Attention, self).__init__()
        # architecture of nn
        self.W1 = tf.keras.layers.Dense(units)
        self.W2 = tf.keras.layers.Dense(units)
        self.V = tf.keras.layers.Dense(1)
        
    def call(self, features, hidden):
        hidden_with_time_axis = tf.expand_dims(hidden, 1)
        prob_score = tf.nn.tanh(self.W1(features) + self.W2(hidden_with_time_axis))
        attention_weights = tf.nn.softmax(self.V(prob_score), axis=1)
        context_vector = attention_weights * features
        context_vector = tf.reduce_sum(context_vector, axis=1)
        return context_vector, attention_weights

前提: ニューラルネットワークは線形代数と非線形活性化関数からの演算子の合成です。

### Step 3. Embed Layer  

*Function*: Embed words as a vector of numbers.
   
*Method*: Random Initialization by Embedding Layer   
(Other choices:  Word2Vec, GloVe, Fast Text, doc2vec, lda2vec)   
  

In [65]:
sequence_input = Input(shape=(maxlen, ), dtype="int32")
embedded_sequence = keras.layers.Embedding(input_dim=vocab_size, 
                                           output_dim=128, 
                                           input_length=maxlen)(sequence_input)

### Step 4. Bi-directional Recurrent neural network

In [68]:
import os

rnn_cell_size = 128

In [69]:
lstm = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(units=rnn_cell_size,
                                                          dropout=0.3,
                                                          return_sequences=True,
                                                          return_state=True,
                                                          recurrent_activation="relu",
                                                          recurrent_initializer="glorot_uniform"),
                                     name="bi_lstm_0")(embedded_sequence)      

In [72]:
lstm, forward_h, forward_c, backward_h, backward_c = tf.keras.layers.Bidirectional \
                                                            (tf.keras.layers.LSTM
                                                             (rnn_cell_size, 
                                                              dropout=0.2,
                                                              return_sequences=True,
                                                              return_state=True,
                                                              recurrent_activation="relu",
                                                              recurrent_initializer="glorot_uniform"))(lstm)

Concatenate the hidden states from each RNN 

In [61]:
from tensorflow.python.keras.layers import Concatenate

In [107]:
state_h = Concatenate()([forward_h, backward_h])
state_c = Concatenate()([forward_c, backward_c])
 
context_vector, attention_weights = Attention()
 
output = keras.layers.Dense(1, activation='sigmoid')(context_vector)
 
model = keras.Model(inputs=sequence_input, outputs=output)

# summarize layers
print(model.summary())

TypeError: __init__() missing 1 required positional argument: 'units'

Multi-layer もあるよ！

In [106]:
model.compile(optimizer=tf.train.AdamOptimizer(),
              loss='binary_crossentropy',
              metrics=['accuracy'])
 
early_stopping_callback = keras.callbacks.EarlyStopping(monitor='val_loss',
                                                        min_delta=0,
                                                        patience=1,
                                                        verbose=0, mode='auto')

NameError: name 'model' is not defined

In [109]:
history = model.fit(x_train,
                    y_train,
                    epochs=10,
                    batch_size=200,
                    validation_split=.3, verbose=1, callbacks=[early_stopping_callback])

NameError: name 'model' is not defined

In [None]:
result = model.evaluate(x_test, y_test)
print(result)