In [1]:
import tensorflow as tf
from tensorflow import keras
import numpy as np

In [2]:
imdb = keras.datasets.imdb
(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=10000)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


In [3]:
word_index = imdb.get_word_index()
# The first indices are reserved
word_index = {k: (v + 3) for k, v in word_index.items()}
word_index["<PAD>"] = 0
word_index["<START>"] = 1
word_index["<UNK>"] = 2 
word_index["<UNUSED>"] = 3

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json


In [4]:
reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])

In [5]:
def decode_review(text):
  return ' '.join([reverse_word_index.get(i, '?') for i in text])

In [6]:
train_data = keras.preprocessing.sequence.pad_sequences(train_data,value=word_index["<PAD>"], padding='post', maxlen=256)
test_data = keras.preprocessing.sequence.pad_sequences(test_data,value=word_index["<PAD>"], padding='post', maxlen=256)

In [7]:
model = keras.Sequential([
keras.layers.Embedding(10000, 16),
keras.layers.GlobalAveragePooling1D(),
keras.layers.Dense(16, activation=tf.nn.relu),
keras.layers.Dense(1, activation=tf.nn.sigmoid)
])


In [8]:
model.compile(optimizer='adam', loss='binary_crossentropy',metrics=['accuracy'])

In [9]:
history = model.fit(train_data, train_labels, epochs=30, batch_size=512,validation_split=0.2)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [10]:
results = model.evaluate(test_data, test_labels)
print('Test accuracy:', results[1])

Test accuracy: 0.8758000135421753


In [11]:
new_reviews = ['The movie was great!',  "Don't watch boring","The movie was excellent,outstanding,fantastic,magical,once should watch it, great movie","The worst movie ever,one should never go to watch it bad movie,worst horrible"]
new_review_sequences = [np.array([word_index.get(word, 2) for word in review.lower().split()]) for review in new_reviews]
new_review_sequences = keras.preprocessing.sequence.pad_sequences(new_review_sequences, value=word_index["<PAD>"],padding='post', maxlen=256)
print('Predictions:', model.predict(new_review_sequences).flatten())

Predictions: [0.63369805 0.43155566 0.75200784 0.15746865]
