# Design Choices in Recurrent Neural Networks
# **Ömer Faruk Güzel**

###  Importing packages

In [4]:
import re
import numpy as np
import pandas as pd 

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split

from keras.preprocessing import sequence
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, Embedding, LSTM
from keras.datasets import imdb

from keras.utils.np_utils import to_categorical

import timeit
import warnings
warnings.filterwarnings('ignore')
import ssl
ssl._create_default_https_context = ssl._create_unverified_context

### Preparing Dataset

In [2]:
max_features = 1000
maxlen = 80  # cut texts after this number of words (among top max_features most common words)
batch_size = 32

# save np.load
#np_load_old = np.load

# modify the default parameters of np.load
#np.load = lambda *a,**k: np_load_old(*a, allow_pickle=True, **k)

print('Loading data...')
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
print(len(x_train), 'train sequences')
print(len(x_test), 'test sequences')

#np.load = np_load_old

print('Pad sequences (samples x time)')
x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)

Loading data...
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
25000 train sequences
25000 test sequences
Pad sequences (samples x time)
x_train shape: (25000, 80)
x_test shape: (25000, 80)


## Part 1: Influence of number of nodes

Try the models with different number of nodes such as 32, 64, 128 etc.

Analyze the number of model parameters, accuracy and training time

### LSTM with 8 nodes

In [10]:
model = Sequential()
model.add(Embedding(max_features, 8))
model.add(LSTM(8, dropout=0.0, recurrent_dropout=0.0))
model.add(Dense(1, activation='sigmoid'))
model.summary()

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
start = timeit.default_timer()
model.fit(x_train, y_train, batch_size=batch_size, epochs=1, validation_data=(x_test, y_test))
end = timeit.default_timer()

score, acc = model.evaluate(x_test, y_test, batch_size=batch_size)
print('Test score:', score)
print('Test accuracy:', acc)
print("Time Taken to run the model:",end - start, "seconds")

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_5 (Embedding)      (None, None, 8)           8000      
_________________________________________________________________
lstm_5 (LSTM)                (None, 8)                 544       
_________________________________________________________________
dense_5 (Dense)              (None, 1)                 9         
Total params: 8,553
Trainable params: 8,553
Non-trainable params: 0
_________________________________________________________________
Test score: 0.4142504632472992
Test accuracy: 0.8130000233650208
Time Taken to run the model: 8.500383197999781 seconds


### LSTM with 16 nodes

In [11]:
model = Sequential()
model.add(Embedding(max_features, 8))
model.add(LSTM(16, dropout=0.0, recurrent_dropout=0.0))
model.add(Dense(1, activation='sigmoid'))
model.summary()

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
start = timeit.default_timer()
model.fit(x_train, y_train, batch_size=batch_size, epochs=1, validation_data=(x_test, y_test))
end = timeit.default_timer()

score, acc = model.evaluate(x_test, y_test, batch_size=batch_size)
print('Test score:', score)
print('Test accuracy:', acc)
print("Time Taken to run the model:",end - start, "seconds")

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_6 (Embedding)      (None, None, 8)           8000      
_________________________________________________________________
lstm_6 (LSTM)                (None, 16)                1600      
_________________________________________________________________
dense_6 (Dense)              (None, 1)                 17        
Total params: 9,617
Trainable params: 9,617
Non-trainable params: 0
_________________________________________________________________
Test score: 0.4084605872631073
Test accuracy: 0.8132799863815308
Time Taken to run the model: 8.538254277000078 seconds


In [13]:
model = Sequential()
model.add(Embedding(max_features, 8))
model.add(LSTM(128, dropout=0.0, recurrent_dropout=0.0))
model.add(Dense(1, activation='sigmoid'))
model.summary()

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
start = timeit.default_timer()
model.fit(x_train, y_train, batch_size=batch_size, epochs=1, validation_data=(x_test, y_test))
end = timeit.default_timer()

score, acc = model.evaluate(x_test, y_test, batch_size=batch_size)
print('Test score:', score)
print('Test accuracy:', acc)
print("Time Taken to run the model:",end - start, "seconds")

Model: "sequential_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_8 (Embedding)      (None, None, 8)           8000      
_________________________________________________________________
lstm_8 (LSTM)                (None, 128)               70144     
_________________________________________________________________
dense_8 (Dense)              (None, 1)                 129       
Total params: 78,273
Trainable params: 78,273
Non-trainable params: 0
_________________________________________________________________
Test score: 0.41530758142471313
Test accuracy: 0.8080400228500366
Time Taken to run the model: 9.315172520000033 seconds


### Write your findings about number of nodes here?

1.   Finding 1
2.   Finding 2




## Part 2: Influence of Embedding



In [14]:
model = Sequential()
model.add(Embedding(max_features, 4))
model.add(LSTM(16, dropout=0.0, recurrent_dropout=0.0))
model.add(Dense(1, activation='sigmoid'))
model.summary()

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
start = timeit.default_timer()
model.fit(x_train, y_train, batch_size=batch_size, epochs=1, validation_data=(x_test, y_test))
end = timeit.default_timer()

score, acc = model.evaluate(x_test, y_test, batch_size=batch_size)
print('Test score:', score)
print('Test accuracy:', acc)
print("Time Taken to run the model:",end - start, "seconds")

Model: "sequential_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_9 (Embedding)      (None, None, 4)           4000      
_________________________________________________________________
lstm_9 (LSTM)                (None, 16)                1344      
_________________________________________________________________
dense_9 (Dense)              (None, 1)                 17        
Total params: 5,361
Trainable params: 5,361
Non-trainable params: 0
_________________________________________________________________
Test score: 0.42111504077911377
Test accuracy: 0.810479998588562
Time Taken to run the model: 8.39360831399972 seconds


In [15]:
model = Sequential()
model.add(Embedding(max_features, 64))
model.add(LSTM(16, dropout=0.0, recurrent_dropout=0.0))
model.add(Dense(1, activation='sigmoid'))
model.summary()

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
start = timeit.default_timer()
model.fit(x_train, y_train, batch_size=batch_size, epochs=1, validation_data=(x_test, y_test))
end = timeit.default_timer()

score, acc = model.evaluate(x_test, y_test, batch_size=batch_size)
print('Test score:', score)
print('Test accuracy:', acc)
print("Time Taken to run the model:",end - start, "seconds")

Model: "sequential_10"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_10 (Embedding)     (None, None, 64)          64000     
_________________________________________________________________
lstm_10 (LSTM)               (None, 16)                5184      
_________________________________________________________________
dense_10 (Dense)             (None, 1)                 17        
Total params: 69,201
Trainable params: 69,201
Non-trainable params: 0
_________________________________________________________________
Test score: 0.39583641290664673
Test accuracy: 0.8168399930000305
Time Taken to run the model: 9.576857149999796 seconds


In [24]:
model = Sequential()
model.add(Embedding(max_features, 256))
model.add(LSTM(16, dropout=0.0, recurrent_dropout=0.0))
model.add(Dense(1, activation='sigmoid'))
model.summary()

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
start = timeit.default_timer()
model.fit(x_train, y_train, batch_size=batch_size, epochs=1, validation_data=(x_test, y_test))
end = timeit.default_timer()

score, acc = model.evaluate(x_test, y_test, batch_size=batch_size)
print('Test score:', score)
print('Test accuracy:', acc)
print("Time Taken to run the model:",end - start, "seconds")

Model: "sequential_19"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_19 (Embedding)     (None, None, 256)         256000    
_________________________________________________________________
lstm_19 (LSTM)               (None, 16)                17472     
_________________________________________________________________
dense_19 (Dense)             (None, 1)                 17        
Total params: 273,489
Trainable params: 273,489
Non-trainable params: 0
_________________________________________________________________
Test score: 0.39228808879852295
Test accuracy: 0.8209599852561951
Time Taken to run the model: 12.151193093000074 seconds


In [20]:
model = Sequential()
model.add(Embedding(max_features, 512))
model.add(LSTM(16, dropout=0.0, recurrent_dropout=0.0))
model.add(Dense(1, activation='sigmoid'))
model.summary()

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
start = timeit.default_timer()
model.fit(x_train, y_train, batch_size=batch_size, epochs=1, validation_data=(x_test, y_test))
end = timeit.default_timer()

score, acc = model.evaluate(x_test, y_test, batch_size=batch_size)
print('Test score:', score)
print('Test accuracy:', acc)
print("Time Taken to run the model:",end - start, "seconds")

Model: "sequential_15"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_15 (Embedding)     (None, None, 512)         512000    
_________________________________________________________________
lstm_15 (LSTM)               (None, 16)                33856     
_________________________________________________________________
dense_15 (Dense)             (None, 1)                 17        
Total params: 545,873
Trainable params: 545,873
Non-trainable params: 0
_________________________________________________________________
Test score: 0.40349820256233215
Test accuracy: 0.8180400133132935
Time Taken to run the model: 15.398136687999795 seconds


### Write your findings about number of embedding features here?

1.   Increasing the number of embedding features leads longer training time, higher number of trainable parameters as expected.
2.   But the increase of that feature number leads both increase and decrease in accuracy. I tried 4, 64, 256 and 512 features. I realized that 256 features can be considered as treshold for the number of embedding features. I got highest accuracy with 256 and when I keep increasing, accuracy started decreasing.



## Part 3: Influence of Dropout

Try the models with different rates of dropout from 0 to 1

Analyze the number of model parameters, accuracy and training time

### Dropout with rate 0.5

In [31]:
model = Sequential()
model.add(Embedding(max_features, 32))
model.add(LSTM(8, dropout=0.5, recurrent_dropout=0.5))
model.add(Dense(1, activation='sigmoid'))
model.summary()

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
start = timeit.default_timer()
model.fit(x_train, y_train, batch_size=batch_size, epochs=1, validation_data=(x_test, y_test))
end = timeit.default_timer()

score, acc = model.evaluate(x_test, y_test, batch_size=batch_size)
print('Test score:', score)
print('Test accuracy:', acc)
print("Time Taken to run the model:",end - start, "seconds")

Model: "sequential_26"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_26 (Embedding)     (None, None, 32)          32000     
_________________________________________________________________
lstm_31 (LSTM)               (None, 8)                 1312      
_________________________________________________________________
dense_26 (Dense)             (None, 1)                 9         
Total params: 33,321
Trainable params: 33,321
Non-trainable params: 0
_________________________________________________________________
Test score: 0.4276007413864136
Test accuracy: 0.8057600259780884
Time Taken to run the model: 237.63186271500035 seconds


In [32]:
model = Sequential()
model.add(Embedding(max_features, 32))
model.add(LSTM(8, dropout=0.1, recurrent_dropout=0.5))
model.add(Dense(1, activation='sigmoid'))
model.summary()

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
start = timeit.default_timer()
model.fit(x_train, y_train, batch_size=batch_size, epochs=1, validation_data=(x_test, y_test))
end = timeit.default_timer()

score, acc = model.evaluate(x_test, y_test, batch_size=batch_size)
print('Test score:', score)
print('Test accuracy:', acc)
print("Time Taken to run the model:",end - start, "seconds")

Model: "sequential_27"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_27 (Embedding)     (None, None, 32)          32000     
_________________________________________________________________
lstm_32 (LSTM)               (None, 8)                 1312      
_________________________________________________________________
dense_27 (Dense)             (None, 1)                 9         
Total params: 33,321
Trainable params: 33,321
Non-trainable params: 0
_________________________________________________________________
Test score: 0.42101287841796875
Test accuracy: 0.8082399964332581
Time Taken to run the model: 238.04765940500056 seconds


In [33]:
model = Sequential()
model.add(Embedding(max_features, 32))
model.add(LSTM(8, dropout=0.9, recurrent_dropout=0.5))
model.add(Dense(1, activation='sigmoid'))
model.summary()

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
start = timeit.default_timer()
model.fit(x_train, y_train, batch_size=batch_size, epochs=1, validation_data=(x_test, y_test))
end = timeit.default_timer()

score, acc = model.evaluate(x_test, y_test, batch_size=batch_size)
print('Test score:', score)
print('Test accuracy:', acc)
print("Time Taken to run the model:",end - start, "seconds")

Model: "sequential_28"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_28 (Embedding)     (None, None, 32)          32000     
_________________________________________________________________
lstm_33 (LSTM)               (None, 8)                 1312      
_________________________________________________________________
dense_28 (Dense)             (None, 1)                 9         
Total params: 33,321
Trainable params: 33,321
Non-trainable params: 0
_________________________________________________________________
Test score: 0.48935937881469727
Test accuracy: 0.7652400135993958
Time Taken to run the model: 237.34768932099996 seconds


### Write your findings about influence of dropout rate here?

1.   The training time remains nearly same with the change of dropout rate. It didn't effect number of trainable parameters as expected.
2.   The accuracy is slightly increased when I tune dropout ratio 0.5 to 0.1. But decreased clearly in 0.9.


## Part 4: Multilayered RNNs

Try the models with different number of layers from smaller to larger.

Analyze the number of model parameters, accuracy and training time

### RNN with 2 layer LSTM

In [34]:
model = Sequential()
model.add(Embedding(max_features, 8))
model.add(LSTM(8, dropout=0.0, recurrent_dropout=0.0, return_sequences=True))
model.add(LSTM(8, dropout=0.0, recurrent_dropout=0.0, return_sequences=True))
model.add(LSTM(8, dropout=0.0, recurrent_dropout=0.0))
model.add(Dense(1, activation='sigmoid'))
model.summary()

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
start = timeit.default_timer()
model.fit(x_train, y_train, batch_size=batch_size, epochs=1, validation_data=(x_test, y_test))
end = timeit.default_timer()

score, acc = model.evaluate(x_test, y_test, batch_size=batch_size)
print('Test score:', score)
print('Test accuracy:', acc)
print("Time Taken to run the model:",end - start, "seconds")

Model: "sequential_29"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_29 (Embedding)     (None, None, 8)           8000      
_________________________________________________________________
lstm_34 (LSTM)               (None, None, 8)           544       
_________________________________________________________________
lstm_35 (LSTM)               (None, None, 8)           544       
_________________________________________________________________
lstm_36 (LSTM)               (None, 8)                 544       
_________________________________________________________________
dense_29 (Dense)             (None, 1)                 9         
Total params: 9,641
Trainable params: 9,641
Non-trainable params: 0
_________________________________________________________________
Test score: 0.4197038412094116
Test accuracy: 0.8073599934577942
Time Taken to run the model: 17.71710273700046 secon

### RNN with 3 layer LSTM

In [35]:
model = Sequential()
model.add(Embedding(max_features, 8))
model.add(LSTM(8, dropout=0.0, recurrent_dropout=0.0, return_sequences=True))
model.add(LSTM(8, dropout=0.0, recurrent_dropout=0.0, return_sequences=True))
model.add(LSTM(8, dropout=0.0, recurrent_dropout=0.0, return_sequences=True))
model.add(LSTM(8, dropout=0.0, recurrent_dropout=0.0))
model.add(Dense(1, activation='sigmoid'))
model.summary()

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
start = timeit.default_timer()
model.fit(x_train, y_train, batch_size=batch_size, epochs=1, validation_data=(x_test, y_test))
end = timeit.default_timer()

score, acc = model.evaluate(x_test, y_test, batch_size=batch_size)
print('Test score:', score)
print('Test accuracy:', acc)
print("Time Taken to run the model:",end - start, "seconds")

Model: "sequential_30"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_30 (Embedding)     (None, None, 8)           8000      
_________________________________________________________________
lstm_37 (LSTM)               (None, None, 8)           544       
_________________________________________________________________
lstm_38 (LSTM)               (None, None, 8)           544       
_________________________________________________________________
lstm_39 (LSTM)               (None, None, 8)           544       
_________________________________________________________________
lstm_40 (LSTM)               (None, 8)                 544       
_________________________________________________________________
dense_30 (Dense)             (None, 1)                 9         
Total params: 10,185
Trainable params: 10,185
Non-trainable params: 0
_________________________________________________

### Write your findings about number of layers here?

1.   It took longer 4 more seconds to train when I increased the number of LSTM layers. But the number of trainable parameters is increased.
2.   The accuracy is slightly decreased when I add 1 more LSTM layer.
