In [2]:
import tensorflow as tf
from tensorflow import keras
import numpy as np

In [3]:
rating = keras.datasets.imdb

In [4]:
(train_data, train_label),(test_data, test_label) = rating.load_data(num_words=10000)#num_words=10000 means it will load only words which are frequently used

In [7]:
print(train_data[0]) # rating is in digital(value). we need to change it to user friendly language(key-normal sentence)

[1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65, 458, 4468, 66, 3941, 4, 173, 36, 256, 5, 25, 100, 43, 838, 112, 50, 670, 2, 9, 35, 480, 284, 5, 150, 4, 172, 112, 167, 2, 336, 385, 39, 4, 172, 4536, 1111, 17, 546, 38, 13, 447, 4, 192, 50, 16, 6, 147, 2025, 19, 14, 22, 4, 1920, 4613, 469, 4, 22, 71, 87, 12, 16, 43, 530, 38, 76, 15, 13, 1247, 4, 22, 17, 515, 17, 12, 16, 626, 18, 2, 5, 62, 386, 12, 8, 316, 8, 106, 5, 4, 2223, 5244, 16, 480, 66, 3785, 33, 4, 130, 12, 16, 38, 619, 5, 25, 124, 51, 36, 135, 48, 25, 1415, 33, 6, 22, 12, 215, 28, 77, 52, 5, 14, 407, 16, 82, 2, 8, 4, 107, 117, 5952, 15, 256, 4, 2, 7, 3766, 5, 723, 36, 71, 43, 530, 476, 26, 400, 317, 46, 7, 4, 2, 1029, 13, 104, 88, 4, 381, 15, 297, 98, 32, 2071, 56, 26, 141, 6, 194, 7486, 18, 4, 226, 22, 21, 134, 476, 26, 480, 5, 144, 30, 5535, 18, 51, 36, 28, 224, 92, 25, 104, 4, 226, 65, 16, 38, 1334, 88, 12, 16, 283, 5, 16, 4472, 113, 103, 32, 15, 16, 5345, 19, 178, 32]


In [8]:
# to convert digital language into user friendly language
word_index = rating.get_word_index()
word_index = {k:(v+4) for k, v in word_index.items()}   # v+4 because we are adding more keys. so rest every keys will shift by 4
word_index["<PAD>"]=0
word_index["<START>"]=1
word_index["<UNK>"]=2
word_index["<UNUSED>"]=3
word_index["<BR>"]=4
reverse_word_index = dict([(value,key) for (key, value) in word_index.items()])

In [9]:
train_data = keras.preprocessing.sequence.pad_sequences(train_data, value=word_index["<PAD>"], padding="post", maxlen=500)
test_data = keras.preprocessing.sequence.pad_sequences(test_data, value=word_index["<PAD>"], padding="post", maxlen=500)

In [10]:
def decode_review(text):
    return " ".join([reverse_word_index.get(i, "?") for i in text])

In [11]:
print(decode_review(test_data[1]))

<START> i but comics and every a suggested great in scenario film drawn the films ask <BR> life to when cut the seen a <BR> why fact looks film <BR> few trying br seats <UNK> <BR> slash arms films allen movie <BR> but occurred of and considered likely is is <BR> think potential by third <UNK> for japanese party the streets <UNK> about when almost this mostly choppy etc i decides <BR> perfect the 14 a are people two end <BR> america's choppy bare perfect to overlooked man the some it after there story actually i but actors to <BR> viewer might at slash arms about human be a like definite think movie time american in who towards the ballet this about david writing complicated ever and what year was <BR> brainless pretty still psychological choppy andy writing views out and understand same <BR> <UNK> formulaic trying united <BR> confusing a <BR> only when me br either i with to and joke favor a literary japanese <UNK> the instant he perform <UNK> very <BR> third the break basically of <BR

In [12]:
print(len(test_data[0]), len(test_data[1]))

500 500


In [13]:
# model starts here

model = keras.Sequential()
model.add(keras.layers.Embedding(10000, 16))  # filters out similar meaning words. gives word vector
model.add(keras.layers.GlobalAveragePooling1D())  # averaging the word vector
model.add(keras.layers.Dense(16, activation='relu'))
model.add(keras.layers.Dense(1, activation='sigmoid'))

model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"]) # binary_crossentropy because we have 2 possible outputs: 0 or 1

# validation so that it can deal with new data.
x_val = train_data[:10000]
x_train = train_data[10000:]
y_val = train_label[:10000]
y_train = train_label[10000:]

fitModel = model.fit(x_train, y_train, epochs=40, batch_size=512, validation_data=(x_val, y_val), verbose=1)

result = model.evaluate(test_data, test_label)

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Train on 15000 samples, validate on 10000 samples
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


In [66]:
model.save("model.h5")

In [67]:
model = keras.models.load_model("model.h5")

In [69]:
test_review = test_data[2]
predict = model.predict(test_review)
print(decode_review(test_review))
print("prediction: "+ str(predict[0]))
print("actual: "+ str(test_label[2]))

all and no more week boy ten a one hong that br made <UNK> is is <BR> gay <UNK> a <BR> scared hired robin <UNK> of parents of bar over be a are other mexican <UNK> that and friend gay jimmy <UNK> particularly was <UNK> her <BR> <UNK> they unfortunately and silly and youngsters a nice times had he each br saw one a two movie man in to when friend lee neil good all genuinely kick they had band <BR> damaged suppose well  they my him pieces look <BR> likely <UNK> back happy pleasant the each in things <UNK> highly the lee <UNK> a <BR> but serial as ends scenery re a <BR> sync can line supposed introduces revenge for <UNK> the <UNK> br saying of poor <UNK> <BR> faced <UNK> a <BR> opposite is is especially minute to trying br and intestines <BR> <UNK> <UNK> over <BR> heist you comparing for see besides the his evil and left shepard of really there an had will of hank crowd an failed of <UNK> <UNK> as and silly <BR> sheila you <UNK> german br i reality local in title his much lots of way who

In [76]:
def review_encode(s):
    encoded =[1]
    
    for word in s:
        if word.lower() in word_index:
            encoded.append(word_index[word.lower()])
        else:
            encoded.append(2)
    return encoded        

In [77]:
# now use this above model to predict reviews from outside.
with open("review.txt") as f:
    for line in f.readlines():
        nline = line.replace(",","").replace(".","").replace("(","").replace(")","").replace(":","").replace("\"","").strip().split(" ")
        encode = review_encode(nline)
        encode = keras.preprocessing.sequence.pad_sequences([encode], value=word_index["<PAD>"], padding="post", maxlen=500)
        predict = model.predict(encode)
        print(line)
        print(encode)
        print(predict[0])

all and no more week boy ten a one hong that br made is is gay a scared hired robin of parents of bar over be a are other mexican that and friend gay jimmy particularly was her they unfortunately and silly and youngsters a nice times had he each br saw one a two movie man in to when friend lee neil good all genuinely kick they had band damaged suppose well Â– they my him pieces look likely back happy pleasant the each in things highly the lee a but serial as ends scenery re a sync can line supposed introduces revenge forthe br saying of faced a opposite 
[[   1   33    6   58   54 1270  431  748    7   32 2580   16   11   94
    10   10  993    7 1766 2634 2164    8  847    8 1450  121   31    7
    27   86 2663   16    6  465  993 2006  573   17   42   37  473    6
   711    6 8869    7  328  212   70   30  258   11  220   32    7  108
    21  133   12    9   55  465  849 3711   53   33 2071 1969   37   70
  1144 5940 1409   74    2   37   62   91 1329  169 1330  146  655 2212
     5 