In [1]:
import numpy as np

from sklearn.metrics import accuracy_score
from keras.datasets import reuters
from keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, LSTM, Activation, GRU
from keras import optimizers


In [2]:
# parameters for data load
num_words = 30000
maxlen = 50
test_split = 0.3

In [3]:
(X_train, y_train), (X_test, y_test) = reuters.load_data(num_words = num_words, maxlen = maxlen, test_split = test_split)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/reuters.npz


In [4]:
# pad the sequences with zeros
# padding parameter is set to 'post' => 0's are appended to end of sequences
X_train = pad_sequences(X_train, padding = 'post')
X_test = pad_sequences(X_test, padding = 'post')

X_train = np.array(X_train).reshape((X_train.shape[0], X_train.shape[1], 1))
X_test = np.array(X_test).reshape((X_test.shape[0], X_test.shape[1], 1))

y_data = np.concatenate((y_train, y_test))
y_data = to_categorical(y_data)

y_actual = y_test

y_train = y_data[:1395]
y_test = y_data[1395:]

In [5]:
X_train.shape

(1395, 49, 1)

In [6]:
y_data.shape

(1994, 46)

## 1. Deep LSTM
- LSTMs can be made deep, with multiple layers, like CNNs or MLPs
- Beware that LSTMs take long to train compared to CNNs

In [7]:
from keras.models import Sequential
from keras.layers import Dense, LSTM, Activation, GRU
from keras import optimizers


In [8]:
def deep_lstm():
    model = Sequential()
    model.add(LSTM(20, input_shape = (49,1), return_sequences = True))
    model.add(LSTM(20, return_sequences = True))
    model.add(LSTM(20, return_sequences = True))
    model.add(LSTM(20, return_sequences = False))
    model.add(Dense(46))
    model.add(Activation('softmax'))

    adam = optimizers.Adam(lr = 0.001)
    model.compile(loss = 'categorical_crossentropy', optimizer = adam, metrics = ['accuracy'])

    return model

In [9]:
model = deep_lstm()
model.fit(X_train, y_train, epochs = 200, batch_size = 50, verbose = 1)



Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

<keras.src.callbacks.History at 0x78b8adfa7a30>

In [10]:
y_pred = model.predict(X_test)
y_test_ = np.argmax(y_pred, axis = 1)



In [11]:
print(accuracy_score(y_actual, y_test_))

0.8480801335559266


## 2. Bidirectional RNN
- Bidirectional RNNs consider not only one-way influence of sequence, but also the other way
- Actually, they can be thought as building two separate RNNs, and merging them\
<br>
<img src="http://d3kbpzbmcynnmx.cloudfront.net/wp-content/uploads/2015/09/bidirectional-rnn.png" style="width: 400px"/>
</br>

In [12]:
from keras.layers import Bidirectional

In [13]:
def bidirectional_lstm():
    model = Sequential()
    model.add(Bidirectional(LSTM(20, return_sequences = False), input_shape = (49,1)))
    model.add(Dense(46))
    model.add(Activation('softmax'))

    adam = optimizers.Adam(lr = 0.001)
    model.compile(loss = 'categorical_crossentropy', optimizer = adam, metrics = ['accuracy'])

    return model

In [14]:
%%time
model = bidirectional_lstm()
model.fit(X_train, y_train, epochs = 2, batch_size = 5, verbose = 1)



Epoch 1/2
Epoch 2/2
CPU times: user 8.4 s, sys: 345 ms, total: 8.74 s
Wall time: 8.16 s


<keras.src.callbacks.History at 0x78b853959e70>

Epoch = passing all the data once to my network
batch = group of input sent at once
Iteration = number of time the batch of data to pass through the network to complete one epoch

sample size =  1395
Epoch = 2
batch = 5
Iteration = 1395/5 = 279

In [15]:
y_pred = model.predict(X_test)
y_test_ = np.argmax(y_pred, axis = 1)



In [16]:
print(accuracy_score(y_actual, y_test_))

0.8063439065108514


## 3. Deep Bidirectional RNN
- Bidirectional RNNs can be stacked

<img src="http://www.wildml.com/wp-content/uploads/2015/09/Screen-Shot-2015-09-16-at-2.21.51-PM-272x300.png" style="width: 300px"/>

In [17]:
def deep_bidirectional_lstm():
    model = Sequential()
    model.add(Bidirectional(LSTM(10, return_sequences = True), input_shape = (49,1)))
    model.add(Bidirectional(LSTM(10, return_sequences = True)))
    model.add(Bidirectional(LSTM(10, return_sequences = True)))
    model.add(Bidirectional(LSTM(10, return_sequences = False)))
    model.add(Dense(46))
    model.add(Activation('softmax'))

    adam = optimizers.Adam(lr = 0.001)
    model.compile(loss = 'categorical_crossentropy', optimizer = adam, metrics = ['accuracy'])

    return model

In [18]:
model = deep_bidirectional_lstm()
model.fit(X_train, y_train, epochs = 200, batch_size = 50, verbose = 1)



Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

<keras.src.callbacks.History at 0x78b832a62230>

In [19]:
y_pred = model.predict(X_test)
y_test_ = np.argmax(y_pred, axis = 1)



In [20]:
print(accuracy_score(y_actual, y_test_))

0.8063439065108514


In [None]:
1. SimpleRNN
2. Stacked RNN
3. LSTM
4. Stacked LSTM
5. Bidirectional LSTM
6. Stacked Bidirectional LSTM

## Bidirectional GRU


In [21]:
from keras.layers import Bidirectional

In [22]:
def bidirectional_gru():
    model = Sequential()
    model.add(Bidirectional(GRU(20, return_sequences = False), input_shape = (49,1)))
    model.add(Dense(46))
    model.add(Activation('softmax'))

    adam = optimizers.Adam()
    model.compile(loss = 'categorical_crossentropy', optimizer = adam, metrics = ['accuracy'])

    return model

In [23]:
%%time
model = bidirectional_gru()
model.fit(X_train, y_train, epochs = 2, batch_size = 5, verbose = 1)

Epoch 1/2
Epoch 2/2
CPU times: user 7.85 s, sys: 371 ms, total: 8.22 s
Wall time: 7.75 s


<keras.src.callbacks.History at 0x78b833bf92d0>

In [24]:
y_pred = model.predict(X_test)
y_test_ = np.argmax(y_pred, axis = 1)



In [25]:
print(accuracy_score(y_actual, y_test_))

0.7646076794657763
