In [1]:
import numpy as np
from tensorflow.keras.datasets import imdb
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, GlobalAveragePooling1D

In [2]:
max_features = 10000 
maxlen = 256
epochs = 20

In [3]:
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)

In [4]:
print(x_train[0])

[1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65, 458, 4468, 66, 3941, 4, 173, 36, 256, 5, 25, 100, 43, 838, 112, 50, 670, 2, 9, 35, 480, 284, 5, 150, 4, 172, 112, 167, 2, 336, 385, 39, 4, 172, 4536, 1111, 17, 546, 38, 13, 447, 4, 192, 50, 16, 6, 147, 2025, 19, 14, 22, 4, 1920, 4613, 469, 4, 22, 71, 87, 12, 16, 43, 530, 38, 76, 15, 13, 1247, 4, 22, 17, 515, 17, 12, 16, 626, 18, 2, 5, 62, 386, 12, 8, 316, 8, 106, 5, 4, 2223, 5244, 16, 480, 66, 3785, 33, 4, 130, 12, 16, 38, 619, 5, 25, 124, 51, 36, 135, 48, 25, 1415, 33, 6, 22, 12, 215, 28, 77, 52, 5, 14, 407, 16, 82, 2, 8, 4, 107, 117, 5952, 15, 256, 4, 2, 7, 3766, 5, 723, 36, 71, 43, 530, 476, 26, 400, 317, 46, 7, 4, 2, 1029, 13, 104, 88, 4, 381, 15, 297, 98, 32, 2071, 56, 26, 141, 6, 194, 7486, 18, 4, 226, 22, 21, 134, 476, 26, 480, 5, 144, 30, 5535, 18, 51, 36, 28, 224, 92, 25, 104, 4, 226, 65, 16, 38, 1334, 88, 12, 16, 283, 5, 16, 4472, 113, 103, 32, 15, 16, 5345, 19, 178, 32]


In [5]:
class_names = ['Negative', 'Positive']

In [6]:
word_index = imdb.get_word_index()
print(word_index['hello'])

4822


# decoding part

In [7]:
reverse_word_index = dict((value, key) for key, value in word_index.items())

In [8]:
def decode(review):
    text = ''
    for i in review:
        text += reverse_word_index[i]
        text += " "
    return text

In [9]:
decode(x_train[0])

"the as you with out themselves powerful lets loves their becomes reaching had journalist of lot from anyone to have after out atmosphere never more room and it so heart shows to years of every never going and help moments or of every chest visual movie except her was several of enough more with is now current film as you of mine potentially unfortunately of you than him that with out themselves her get for was camp of you movie sometimes movie that with scary but and to story wonderful that in seeing in character to of 70s musicians with heart had shadows they of here that with her serious to have does when from why what have critics they is you that isn't one will very to as itself with other and in of seen over landed for anyone of and br show's to whether from than out themselves history he name half some br of and odd was two most of mean for 1 any an boat she he should is thought frog but of script you not while history he heart to real at barrel but when from one bit then have t

In [10]:
def show_len():
    print("Length of 1st training ex :", len(x_train[0]))
    print("Length of 2nd training ex :", len(x_train[1]))
    print("Length of 1st test ex :", len(x_test[0]))
    print("Length of 1st test ex :", len(x_test[1]))
show_len()

Length of 1st training ex : 218
Length of 2nd training ex : 189
Length of 1st test ex : 68
Length of 1st test ex : 260


# Padding

In [11]:
word_index['the']

1

In [12]:
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [13]:
x_train = pad_sequences(x_train, value = word_index['the'], padding = 'post', maxlen = maxlen)
x_test = pad_sequences(x_test, value = word_index['the'], padding = 'post', maxlen = maxlen)

In [14]:
show_len()

Length of 1st training ex : 256
Length of 2nd training ex : 256
Length of 1st test ex : 256
Length of 1st test ex : 256


In [15]:
decode(x_train[0])

"the as you with out themselves powerful lets loves their becomes reaching had journalist of lot from anyone to have after out atmosphere never more room and it so heart shows to years of every never going and help moments or of every chest visual movie except her was several of enough more with is now current film as you of mine potentially unfortunately of you than him that with out themselves her get for was camp of you movie sometimes movie that with scary but and to story wonderful that in seeing in character to of 70s musicians with heart had shadows they of here that with her serious to have does when from why what have critics they is you that isn't one will very to as itself with other and in of seen over landed for anyone of and br show's to whether from than out themselves history he name half some br of and odd was two most of mean for 1 any an boat she he should is thought frog but of script you not while history he heart to real at barrel but when from one bit then have t

Now we create and train the model

In [16]:
#create the model
model = Sequential([Embedding(10000, 16),
                   GlobalAveragePooling1D(),
                   Dense(16, activation = 'relu'),
                   Dense(1, activation = 'sigmoid')
                   ])
#compile the model
model.compile(loss = 'binary_crossentropy',
             optimizer = 'adam',
             metrics = ["accuracy"])
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, None, 16)          160000    
                                                                 
 global_average_pooling1d (  (None, 16)                0         
 GlobalAveragePooling1D)                                         
                                                                 
 dense (Dense)               (None, 16)                272       
                                                                 
 dense_1 (Dense)             (None, 1)                 17        
                                                                 
Total params: 160289 (626.13 KB)
Trainable params: 160289 (626.13 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [17]:
from tensorflow.python.keras.callbacks import LambdaCallback

In [18]:
simple_log = LambdaCallback(on_epoch_end=lambda epoch, logs: print(epoch, end='.'))


In [19]:
epochs = 20
h = model.fit(x_train, 
              y_train, 
              validation_split=0.2, 
              epochs=epochs, 
              callbacks=[simple_log],  # Pass the callback object here
              verbose=False)
h

0.1.2.3.4.5.6.7.8.9.10.11.12.13.14.15.16.17.18.19.

<keras.src.callbacks.History at 0x23cb7703410>

# Prediction and evaluation

In [20]:
loss, acc = model.evaluate(x_test, y_test)



In [21]:
print("The loss : ", loss)
print("The accuracy of the test : ", acc*100)

The loss :  0.9290899038314819
The accuracy of the test :  84.05200242996216


In [22]:
def encode(review):
    # Tokenize the review into words
    words = review.split()
    
    # Map each word to its corresponding index in the word_index dictionary
    encoded_review = [word_index[word] for word in words if word in word_index]
    
    return encoded_review

In [60]:
review = "This was a bad movie" 
encoded_feature1 = encode(review)
print(encoded_feature1)

[13, 3, 75, 17]


In [61]:
p = model.predict(np.expand_dims(encoded_feature1, axis = 0))
print(class_names[int(p[0] > 0.5)])

Negative
