# Sequence Classification with RNN's (LSTM and Conv1D)

In [1]:
import numpy as np
from keras.datasets import imdb
from keras.models import Sequential
from keras.layers import Dense, LSTM
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence
from keras.layers.convolutional import Conv1D, MaxPooling1D

In [2]:
# set the size of the dictionary to 5000 (say top_words)
top_words = 5000
(X_train,y_train),(X_test,y_test) = imdb.load_data(num_words=top_words)

Downloading data from https://s3.amazonaws.com/text-datasets/imdb.npz


In [3]:
# The data has already been pre-processed and ready to build the network
# set the length of the words to 500
max_review_length = 500
X_train = sequence.pad_sequences(X_train, maxlen=max_review_length)
X_test = sequence.pad_sequences(X_test, maxlen = max_review_length)

In [9]:
# Model building
embedding_vector_length = 32 # 32 dimensional vector space after embedding
# (500 --> 32)
model = Sequential()
model.add(Embedding(top_words,embedding_vector_length, input_length=max_review_length))
model.add(Dropout(0.2))
model.add(LSTM(100))
#model.add(LSTM(100, dropout=0.2, recurrent_dropout=0.2))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
#model.compile(loss='binary_crossentropy',optimizer='adam', metrics=['accuracy'])
print(model.summary())
model.fit(X_train,y_train,validation_data=(X_test,y_test),epochs=10,batch_size=128)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 500, 32)           160000    
_________________________________________________________________
lstm_1 (LSTM)                (None, 100)               53200     
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 101       
Total params: 213,301
Trainable params: 213,301
Non-trainable params: 0
_________________________________________________________________
None
Train on 25000 samples, validate on 25000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x181dfa0cc0>

# The model is producting an accuracy of 95%

In [13]:
# Evaluation of the model
scores =  model.evaluate(X_test,y_test, verbose=0)
print('Accuracy: {}'.format(scores[1]))

# Hybrid (Conv1D and LSTM) model for sequence classification:

In [11]:
# Model building
#embedding_vector_length = 32 # 32 dimensional vector space after embedding
#model = Sequential()
#model.add(Embedding(top_words,embedding_vector_length, input_length=max_review_length))
#model.add(Conv1D(filters=32,kernel_size=3,padding='same',activation='relu'))
#model.add(MaxPooling1D(pool_size=2))
#model.add(Dropout(0.2))
#model.add(LSTM(100))
##model.add(LSTM(100, dropout=0.2, recurrent_dropout=0.2))
#model.add(Dropout(0.2))
#model.add(Dense(1, activation='sigmoid'))
#model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
#print(model.summary())
#model.fit(X_train,y_train,validation_data=(X_test,y_test),epochs=10,batch_size=128)