# Sentiment Analysis LSTM and GRU

In [1]:
import tensorflow as tf

In [2]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  1


In [3]:
import numpy as np
from tensorflow.python.keras.models import Sequential, Model
from tensorflow.python.keras.layers import Embedding, Reshape, Activation, Input, Lambda, Dense, GRU, LSTM, CuDNNLSTM, CuDNNGRU, Dropout
from tensorflow.python.keras.layers.merge import Dot
from tensorflow.python.keras.utils import np_utils
from tensorflow.python.keras.utils.data_utils import get_file
from tensorflow.python.keras.preprocessing.text import Tokenizer
from tensorflow.python.keras.preprocessing.sequence import skipgrams, pad_sequences
from tensorflow.python.keras.utils.np_utils import to_categorical
from tensorflow.python.keras.callbacks import TensorBoard, EarlyStopping, ModelCheckpoint
from tensorflow.python.keras.optimizers import Adam
from sklearn.manifold import TSNE
from sklearn.metrics.pairwise import cosine_similarity
import gensim

In [4]:
from tensorflow.python.keras.datasets import imdb

In [5]:
from string import punctuation
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.tokenize import sent_tokenize
from nltk.stem import WordNetLemmatizer # takes into consideration the morphological analysis of the words
from nltk.stem.porter import PorterStemmer # cutting off the end or the beginning of the word.

In [6]:
remove_terms = punctuation + '0123456789'

In [7]:
num_words = 20000

Dividing Datasets

In [8]:
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=num_words)

  x_train, y_train = np.array(xs[:idx]), np.array(labels[:idx])
  x_test, y_test = np.array(xs[idx:]), np.array(labels[idx:])


In [9]:
print(len(X_train),'train_data')
print(len(X_test),'test_data')

25000 train_data
25000 test_data


In [10]:
print('text already in tokens:')
len(X_train[0]),len(X_train[1]),len(X_train[2])

text already in tokens:


(218, 189, 141)

Setting some parameters

In [11]:
max_len = 256
embedding_size = 10
batch_size = 128
n_epochs = 10

In [12]:
pad = 'pre' #'post' # with you want to pad pre or post the text

In [13]:
X_train_pad = pad_sequences(X_train, maxlen=max_len, padding=pad, truncating=pad)
X_test_pad = pad_sequences(X_test, maxlen=max_len, padding=pad, truncating=pad)

- Creating Model - 

In [14]:
model = Sequential()
model.add(Embedding(
    input_dim=num_words,output_dim=embedding_size,input_length=max_len,
    name='layer_embedding'                
    ))
model.add(Dropout(0.2))
model.add(LSTM(128,dropout=0.2,recurrent_dropout=0.2))
#model.add(CuDNNLSTM(128,return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(1,activation='sigmoid',name='classification'))



In [15]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
layer_embedding (Embedding)  (None, 256, 10)           200000    
_________________________________________________________________
dropout (Dropout)            (None, 256, 10)           0         
_________________________________________________________________
lstm (LSTM)                  (None, 128)               71168     
_________________________________________________________________
dropout_1 (Dropout)          (None, 128)               0         
_________________________________________________________________
classification (Dense)       (None, 1)                 129       
Total params: 271,297
Trainable params: 271,297
Non-trainable params: 0
_________________________________________________________________


In [16]:
model.compile(optimizer='adam',loss='binary_crossentropy', metrics=['accuracy'])

In [17]:
callback_early_stopping = EarlyStopping(monitor='val_loss',patience=5,verbose=1)

In [18]:
model.fit(
    X_train_pad, y_train,
    epochs=n_epochs,
    batch_size=batch_size,
    validation_split=0.05,
    callbacks=[callback_early_stopping]
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 00008: early stopping


<tensorflow.python.keras.callbacks.History at 0x7f2afb401d30>

In [19]:
eval_ = model.evaluate(X_test_pad, y_test)



In [20]:
print('Loss', eval_[0])
print('Accuracy', eval_[1])

Loss 0.5117790699005127
Accuracy 0.8590400218963623


In [21]:
model.save('Sentiment-LSTM')

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
INFO:tensorflow:Assets written to: Sentiment-LSTM/assets


GRU

In [26]:
model_GRU = Sequential()
model_GRU.add(Embedding(
    input_dim=num_words,output_dim=embedding_size,input_length=max_len,
    name='layer_embedding'                
    ))
model_GRU.add(Dropout(0.2))
#model_GRU.add(GRU(16,dropout=0.2,recurrent_dropout=0.2))
model_GRU.add(CuDNNGRU(units=16,return_sequences=True))
model_GRU.add(CuDNNGRU(units=8,return_sequences=True))
model_GRU.add(CuDNNGRU(units=4,return_sequences=False))
model_GRU.add(Dropout(0.2))
model_GRU.add(Dense(1,activation='sigmoid',name='classification'))

In [27]:
model_GRU.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
layer_embedding (Embedding)  (None, 256, 10)           200000    
_________________________________________________________________
dropout_4 (Dropout)          (None, 256, 10)           0         
_________________________________________________________________
cu_dnngru_3 (CuDNNGRU)       (None, 256, 16)           1344      
_________________________________________________________________
cu_dnngru_4 (CuDNNGRU)       (None, 256, 8)            624       
_________________________________________________________________
cu_dnngru_5 (CuDNNGRU)       (None, 4)                 168       
_________________________________________________________________
dropout_5 (Dropout)          (None, 4)                 0         
_________________________________________________________________
classification (Dense)       (None, 1)                

In [30]:
model_GRU.compile(optimizer='rmsprop',loss='binary_crossentropy', metrics=['accuracy'])

In [31]:
callback_early_stopping = EarlyStopping(monitor='val_loss',patience=5,verbose=1)

In [32]:
model_GRU.fit(
    X_train_pad, y_train,
    epochs=n_epochs,
    batch_size=batch_size,
    validation_split=0.05,
    callbacks=[callback_early_stopping]
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 00009: early stopping


<tensorflow.python.keras.callbacks.History at 0x7f2b04137850>

In [33]:
eval_ = model_GRU.evaluate(X_test_pad, y_test)



In [34]:
print('Loss', eval_[0])
print('Accuracy', eval_[1])

Loss 0.3416987955570221
Accuracy 0.8646399974822998
