# Movie Sentiment Analysis

For this article, we'll use IMDB dataset that contains 50,000 movie reviews from Internet movie database. The IMDB dataset usually comes pre-packaged with Keras.

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:

from keras.datasets import imdb
import numpy as np

import matplotlib.pyplot as plt
import tensorflow as tf

from tensorflow.keras import layers
from tensorflow.keras import losses

In [None]:
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], enable=True)
tf.config.experimental.set_memory_growth(physical_devices[1], enable=True)

tf.config.run_functions_eagerly(True)

2021-12-14 09:44:12.262673: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-12-14 09:44:12.263254: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-12-14 09:44:12.268697: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-12-14 09:44:12.269368: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-12-14 09:44:12.269952: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from S

## Getting the data

In [None]:
vocab_size=10000
seed = 42 

(x_train, y_train), (x_test, y_test) = tf.keras.datasets.imdb.load_data(num_words=vocab_size, seed=seed)


### Ragged Tensors

Ragged Tensors are new first class citizens in Tensorflow library. This allows us to work with non-similar tensor lengths.

In [None]:
r_train_x = tf.ragged.constant(x_train)
r_test_x  = tf.ragged.constant(x_test)

raw_train_ds = tf.data.Dataset.from_tensor_slices((r_train_x, y_train))
raw_test_ds = tf.data.Dataset.from_tensor_slices((r_test_x, y_test))


In [None]:
BUFFER_SIZE = 1000
BATCH_SIZE = 32

AUTOTUNE = tf.data.AUTOTUNE
train_ds_str =            raw_train_ds \
                        .cache() \
                        .shuffle(buffer_size=BUFFER_SIZE) \
                        .batch(batch_size=BATCH_SIZE, drop_remainder=True) \
                        .prefetch(tf.data.experimental.AUTOTUNE)
                       
                        
test_ds_str =            raw_test_ds \
                        .cache() \
                        .shuffle(buffer_size=BUFFER_SIZE) \
                        .batch(batch_size=BATCH_SIZE, drop_remainder=True) \
                        .prefetch(tf.data.experimental.AUTOTUNE)
                       
                        

In [None]:
from tensorflow.keras.layers import Embedding, LSTM, Dense, Bidirectional
from tensorflow.keras import Input, Model



In [None]:
input1 = Input(shape=(None, ), ragged=True)
x = Embedding(vocab_size, 64)(input1)
x = LSTM(64)(x)

out = Dense(1, activation='sigmoid')(x)

model = Model(inputs=[input1], outputs=[out])
model.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, None)]            0         
                                                                 
 embedding_1 (Embedding)     (None, None, 64)          640000    
                                                                 
 lstm_1 (LSTM)               (None, 64)                33024     
                                                                 
 dense_1 (Dense)             (None, 1)                 65        
                                                                 
Total params: 673,089
Trainable params: 673,089
Non-trainable params: 0
_________________________________________________________________


In [None]:
model.compile(optimizer='adam', loss='bce', metrics=['acc'])

filepath = 'model.h5'

mc = tf.keras.callbacks.ModelCheckpoint(filepath, verbose=0, save_weights_only=True, 
                                        monitor='val_loss', mode='auto', save_best_only=True)

es = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, verbose=1, mode='auto')

In [None]:
%%time
history = model.fit(train_ds_str, epochs=100, batch_size=16, 
                    validation_data= test_ds_str, callbacks=[mc, es])