In [0]:
#import
from keras.datasets import imdb
from keras.preprocessing import sequence
from keras import Sequential
from keras.layers import Embedding, LSTM, Dense, Dropout
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [0]:
#load imdb data and split it into training and testing datasets
vocabulary_size=5000
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words = vocabulary_size)
print('Loaded dataset with {} training samples, {} test samples'.format(len(X_train), len(X_test)))

In [0]:
#inspecting data
#print(X_train)
#print(y_train)
print("Review:",X_train[0])
print("Label:",y_train[0])

In [0]:
#Decode the sentences to see the reviews as text
word_index = imdb.get_word_index()

#print(word_index)
reverse_word_index = dict(
[(value, key) for (key, value) in word_index.items()])

decoded_review = ' '.join(
[reverse_word_index.get(i - 3, '?') for i in X_train[0]])

print(X_train[0])
print(decoded_review)

decoded_review = ' '.join(
[reverse_word_index.get(i - 3, '?') for i in X_train[2]])

print(X_train[2])
print(decoded_review)

In [0]:
#pad the sequences
max_words = 500
X_train = sequence.pad_sequences(X_train, maxlen=max_words)
X_test = sequence.pad_sequences(X_test, maxlen=max_words)
print(X_train[0])

In [0]:
#initialize the model
embedding_size=32
model=tf.keras.Sequential()
model.add(tf.keras.layers.Embedding(vocabulary_size, embedding_size, input_length=max_words))
model.add(tf.keras.layers.LSTM(100))
model.add(tf.keras.layers.Dense(1, activation='sigmoid'))
model.summary()

In [0]:
#compile model
model.compile(loss='binary_crossentropy', 
             optimizer='adam', 
             metrics=['accuracy'])

In [0]:
#divide X_train in train and validation datasets
batch_size = 64
X_valid, y_valid = X_train[:batch_size], y_train[:batch_size]
X_train_partial, y_train_partial = X_train[batch_size:], y_train[batch_size:]

In [0]:
class myCallback(tf.keras.callbacks.Callback):
  def on_epoch_end(self, epoch, logs={}):
    DESIRED_ACC = 0.9
    if(logs.get('val_accuracy')>=DESIRED_ACC):
      print("\nStopping training as validation accuracy is reached to %.2f!" % DESIRED_ACC )
      self.model.stop_training = True
callbacks = myCallback()

In [0]:
# fit the model
num_epochs = 5
history=model.fit(X_train_partial, y_train_partial, validation_data=(X_valid, y_valid), batch_size=batch_size, epochs=num_epochs, callbacks=[callbacks])

In [0]:
#test the model and print test accuracy score
scores = model.evaluate(X_test, y_test)
print('Test accuracy:', scores[1])

In [0]:
#try your model with your reviews

#sentence1: This film is the worst film I have ever seen
#sentence2: Excellent! superb film, worth watching
sentence = input()

#get the word_index from imdb dataset
word_index = imdb.get_word_index()
#print(word_index)

#Get the tokens for the words of the entered sentence
X_tmp = [word_index[word]+3 for word in sentence.split() if word in word_index]

#pad the sentence to make it of equal length
X_new = pad_sequences([X_tmp], maxlen=max_words)

#predict sentiment for the entered review
y_pred=model.predict_classes(X_new)
#print(X_new)

if y_pred[0]==1:
    print("Positive") #1 means positive
else:
    print("Negative") #0 means negative

In [0]:
model.save_weights('IMDBReviewModel.h5')
model_json = model.to_json()
with open('IMDBReviewModel.json', "w") as json_file:
    json_file.write(model_json)
json_file.close()