### Importing the Libraries

In [1]:
#import the required libraries for the implementation
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense, Embedding, LSTM, SpatialDropout1D
from keras.preprocessing import sequence


### Load the data from Keras to Train, Test variables

In [2]:
#load the imdb dataset into train and test set
from keras.datasets import imdb
vocabulary_size = 5000

(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words = vocabulary_size)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


### Print one instance of Review and Sentiment

In [13]:
#understanding how the dataset looks like
words = imdb.get_word_index()

#the words are already vectorized in the dataset, hence we reverse the process to see the word distribution
vects = {i: word for word, i in words.items()}
print('review')
print([vects.get(i, ' ') for i in X_train[10]])

#the sentiment is 1 if the review is positive and 0 if the review is negative
print('sentiment')
print(y_train[10])

review
[' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', 'the', 'clear', 'fact', 'entertaining', 'there', 'life', 'back', 'br', 'is', 'and', 'show', 'of', 'performance', 'stars', 'br', 'actors', 'film', 'him', 'many', 'should', 'movie', 'reasons', 'to', 'and', 'reading', 'and', 'are', 'in', 'of', 'scenes', 'and', 'and', 'of', 'and', 'out', 'compared', 'not', 'boss', 'yes', 'to', 'and', 'show', 'its', 'disappointed', 'fact', 'raw', 'to', 'it', 'justice', 'by', 'br', 'of', 'where', 'clear', 'fact', 'many', 'your', 'way', 'and', 'with', 'city', 'nice', 'are', 'is', 'along', 'wrong', 'not', 'as', 'it', 'way', 'she', 'but', 'this', 'anything', 'up', "haven't", 'been', 'by', 'who', 'of', 'choices', 'br', 'of', 'you', 'to', 'as', 'this', "i'd", 'it', 'and', 'who', 'of', 'shot', "you'll", 'to', '

### Pad the data sequence to make the inputs to same length 

In [4]:
#for the RNN to work all our input dependencies must have same length 
total_words = 500
X_train = sequence.pad_sequences(X_train, maxlen=total_words)
X_test = sequence.pad_sequences(X_test, maxlen=total_words)

### Build a Basic LSTM Model

In [8]:
#we will build a simple LSTM model with one embedding layer, one LSTM and one output layer
embedding_size=32
max_words = 500
model=Sequential()
model.add(Embedding(vocabulary_size, embedding_size, input_length=max_words))
model.add(LSTM(100))
model.add(Dense(1, activation='sigmoid'))
print(model.summary())

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 500, 32)           160000    
_________________________________________________________________
lstm (LSTM)                  (None, 100)               53200     
_________________________________________________________________
dense (Dense)                (None, 1)                 101       
Total params: 213,301
Trainable params: 213,301
Non-trainable params: 0
_________________________________________________________________
None


### Compile the model

In [9]:
#compile the model by passing the optimizer and loss function and the evaluation metric

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

### Pass the parameters and fit the model

In [12]:
#fit the data to the model and begin training
batch_size = 128
num_epochs = 5
x_val, y_val = X_train[:batch_size], y_train[:batch_size]
xtrain, ytrain = X_train[batch_size:], y_train[batch_size:]
model.fit(xtrain, ytrain, validation_data=(x_val, y_val), batch_size=batch_size, epochs=num_epochs)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x1de1e66a548>

### Evaluate model on Test sets

In [14]:
#evaluate the model accuracy on unseen test data
scores = model.evaluate(X_test, y_test, verbose=0)
print('Test accuracy:', scores[1])

Test accuracy: 0.8718000054359436


### Make model predictions

In [15]:
#make model predictions on test data
print("Prediction: ",model.predict_classes(X_test[1:10]))

Instructions for updating:
Please use instead:* `np.argmax(model.predict(x), axis=-1)`,   if your model does multi-class classification   (e.g. if it uses a `softmax` last-layer activation).* `(model.predict(x) > 0.5).astype("int32")`,   if your model does binary classification   (e.g. if it uses a `sigmoid` last-layer activation).
Prediction:  [[1]
 [1]
 [0]
 [1]
 [1]
 [1]
 [0]
 [1]
 [1]]


### Actual

In [16]:
#compare the model prediction with actual data
print("Actual: ",y_test[1:10])

Actual:  [1 1 0 1 1 1 0 0 1]


### Thank you