In [2]:
import tensorflow as tf
from tensorflow import keras

In [3]:
data = keras.datasets.imdb
(train_data, train_labels), (test_data, test_labels) = data.load_data(num_words=88000)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


In [4]:
word_index = data.get_word_index()
word_index = {k: v+3 for (k, v) in word_index.items()}
word_index["<PAD>"] = 0
word_index["<START>"] = 1
word_index["<UNK>"] = 2
word_index["<UNUSED>"] = 3
reversed_word_index = {v: k for (k, v) in word_index.items()}

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json


In [5]:
def decode_review(text):
  return " ".join(reversed_word_index.get(word, "?") for word in text)

In [6]:
train_data = keras.preprocessing.sequence.pad_sequences(train_data, value=word_index["<PAD>"], maxlen=250, padding="post")
test_data = keras.preprocessing.sequence.pad_sequences(test_data, value=word_index["<PAD>"], maxlen=250, padding="post")

In [7]:
# Model
model = keras.Sequential()
model.add(keras.layers.Embedding(88000, 16))
model.add(keras.layers.GlobalAveragePooling1D())
model.add(keras.layers.Dense(16, activation="relu"))
model.add(keras.layers.Dense(1, activation="sigmoid"))

In [8]:
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

In [9]:
# Validation Data
x_val = train_data[:10000]
x_train = train_data[10000:]

y_val = train_labels[:10000]
y_train = train_labels[10000:]

In [10]:
model.fit(x_train, y_train, batch_size=512, epochs=40, validation_data=(x_val, y_val), verbose=1)

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


<keras.callbacks.History at 0x7f669d44bd10>

In [11]:
results = model.evaluate(test_data, test_labels)



In [12]:
model.save("reviews_classifier.h5")

In [13]:
classifier = keras.models.load_model("reviews_classifier.h5")

In [14]:
from re import subn
def encode_review(text):
  rep = "[,.:/\\|()+-=*!@#$%^&]"
  text = subn(rep, "", text)[0].lower().split(' ')
  review = [1]
  for word in text:
    review.append(word_index.get(word, 2))
  
  review = keras.preprocessing.sequence.pad_sequences([review], padding="post", value=0, maxlen=250)
  return review

In [15]:
review = "this movie was great"
review = encode_review(review)
print(review)

[[ 1 14 20 16 87  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
   0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
   0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
   0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
   0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
   0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
   0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
   0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
   0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
   0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
   0  0  0  0  0  0  0  0  0  0]]


In [16]:
pred = classifier.predict(review)

In [17]:
print(pred[0][0])

0.72413015
