# Sentiment Classification With Recurrent Neural Networks 

* Recurrent neural networks works great with sequential data as with the addition of output O<sub>1</sub> it has also has O<sub>t</sub> with respect to time 

In [3]:
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# get the data first
imdb = tfds.load('imdb_reviews', as_supervised=True)

[1mDownloading and preparing dataset imdb_reviews/plain_text/1.0.0 (download: 80.23 MiB, generated: Unknown size, total: 80.23 MiB) to /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0...[0m


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Dl Completed...', max=1.0, style=Progre…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Dl Size...', max=1.0, style=ProgressSty…







HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Shuffling and writing examples to /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0.incomplete8G40DY/imdb_reviews-train.tfrecord


HBox(children=(FloatProgress(value=0.0, max=25000.0), HTML(value='')))



HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Shuffling and writing examples to /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0.incomplete8G40DY/imdb_reviews-test.tfrecord


HBox(children=(FloatProgress(value=0.0, max=25000.0), HTML(value='')))



HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Shuffling and writing examples to /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0.incomplete8G40DY/imdb_reviews-unsupervised.tfrecord


HBox(children=(FloatProgress(value=0.0, max=50000.0), HTML(value='')))



[1mDataset imdb_reviews downloaded and prepared to /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0. Subsequent calls will reuse this data.[0m


In [4]:
# we will only take train_data (for demonstration purpose)
# do the same for test_data in your case 
train_data, test_data = imdb['train'], imdb['test']

training_sentences = []
training_labels = []

for sentence, label in train_data:
    training_sentences.append(str(sentence.numpy()))
    training_labels.append(str(label.numpy()))

training_labels_final = np.array(training_labels).astype(np.float)
print(training_sentences[0])    # first samples
print(training_labels_final[0]) # first label 

# b"This was an absolutely terrible movie. ...."
# 0.0

b"This was an absolutely terrible movie. Don't be lured in by Christopher Walken or Michael Ironside. Both are great actors, but this must simply be their worst role in history. Even their great acting could not redeem this movie's ridiculous storyline. This movie is an early nineties US propaganda piece. The most pathetic scenes were those when the Columbian rebels were making their cases for revolutions. Maria Conchita Alonso appeared phony, and her pseudo-love affair with Walken was nothing but a pathetic emotional plug in a movie that was devoid of any real meaning. I am disappointed that there are movies like this, ruining actor's like Christopher Walken's good name. I could barely sit through it."
0.0


In [5]:
test_sentences = []
test_labels = []

for sentence, label in test_data:
    test_sentences.append(str(sentence.numpy()))
    test_labels.append(str(label.numpy()))

test_labels_final = np.array(test_labels).astype(np.float)
print(test_sentences[0])    # first samples
print(test_labels_final[0]) # first label 

# b"This was an absolutely terrible movie. ...."
# 0.0

b"There are films that make careers. For George Romero, it was NIGHT OF THE LIVING DEAD; for Kevin Smith, CLERKS; for Robert Rodriguez, EL MARIACHI. Add to that list Onur Tukel's absolutely amazing DING-A-LING-LESS. Flawless film-making, and as assured and as professional as any of the aforementioned movies. I haven't laughed this hard since I saw THE FULL MONTY. (And, even then, I don't think I laughed quite this hard... So to speak.) Tukel's talent is considerable: DING-A-LING-LESS is so chock full of double entendres that one would have to sit down with a copy of this script and do a line-by-line examination of it to fully appreciate the, uh, breadth and width of it. Every shot is beautifully composed (a clear sign of a sure-handed director), and the performances all around are solid (there's none of the over-the-top scenery chewing one might've expected from a film like this). DING-A-LING-LESS is a film whose time has come."
1.0


In [6]:
vocab_size = 2000 # The maximum number of words to keep, based on word frequency. 
embed_size = 30   # Dimension of the dense embedding.
max_len = 100     # Length of input sequences, when it is constant.

# https://keras.io/api/preprocessing/text/
tokenizer = Tokenizer(num_words=vocab_size, 
                      filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n',
                      lower=True,
                      split=" ",
                      oov_token="<OOV>")
tokenizer.fit_on_texts(training_sentences)
tokenizer.fit_on_texts(test_sentences)
print(tokenizer.word_index) 
# {'<OOV>': 1, 'the': 2, 'and': 3, 'a': 4, 'of': 5, 'to': 6, 'is': 7, ...

# tokenized and padding 
training_sequences = tokenizer.texts_to_sequences(training_sentences)
training_padded = pad_sequences(training_sequences, maxlen=max_len, truncating='post')

print(training_sentences[0])
print()
print(training_padded[0])

# b"This was an absolutely terrible movie. ...."

b"This was an absolutely terrible movie. Don't be lured in by Christopher Walken or Michael Ironside. Both are great actors, but this must simply be their worst role in history. Even their great acting could not redeem this movie's ridiculous storyline. This movie is an early nineties US propaganda piece. The most pathetic scenes were those when the Columbian rebels were making their cases for revolutions. Maria Conchita Alonso appeared phony, and her pseudo-love affair with Walken was nothing but a pathetic emotional plug in a movie that was devoid of any real meaning. I am disappointed that there are movies like this, ruining actor's like Christopher Walken's good name. I could barely sit through it."

[  59   12   14   35  431  391   16  175   29    1    9   33 1482    1
   41  508    1  197   25   84  153   19   12  205  339   29   70  246
  214    9  493   62   70   84  117   99   24    1   12    1  660  788
   12   16    7   35  413    1  179    1  425    2   92 1170  138   73
  

In [7]:
# Input for variable-length sequences of integers
inputs = tf.keras.Input(shape=(None,), dtype="int32")
# Embed each integer 
x = tf.keras.layers.Embedding(input_dim = vocab_size, 
                              output_dim = embed_size,
                              input_length=max_len)(inputs)
# Add 2 bidirectional LSTMs
x = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, return_sequences=True))(x)
x = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64))(x)
# Add a classifier
outputs = tf.keras.layers.Dense(1, activation="sigmoid")(x)
model = tf.keras.Model(inputs, outputs)

# Compile and Run 
model.compile(loss='binary_crossentropy',
                      optimizer='adam',
                      metrics=['accuracy'])
model.fit(training_padded,
          training_labels_final,
          epochs=10,
          verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7fed21c501d0>

In [8]:
#model.evaluate(training_padded,training_labels_final)

In [9]:
#model.predict(test_labels_final)

In [10]:
import tempfile
import os
MODEL_DIR = tempfile.gettempdir()
version = 1
export_path = os.path.join(MODEL_DIR, str(version))
print('export_path = {}\n'.format(export_path))

tf.keras.models.save_model(
    model,
    export_path,
    overwrite=True,
    include_optimizer=True,
    save_format=None,
    signatures=None,
    options=None
)

print('\nSaved model:')
!ls -l {export_path}

export_path = /tmp/1





INFO:tensorflow:Assets written to: /tmp/1/assets


INFO:tensorflow:Assets written to: /tmp/1/assets



Saved model:
total 4968
drwxr-xr-x 2 root root    4096 Mar  9 06:15 assets
-rw-r--r-- 1 root root 5078258 Mar  9 06:15 saved_model.pb
drwxr-xr-x 2 root root    4096 Mar  9 06:15 variables
