In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential  # 
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense

In [31]:
# Load the IMDB dataset 

max_features = 100000 # initialie my vocabulary size. 
(x_train, y_train),(x_test,y_test)=imdb.load_data(num_words=max_features)

# Print the shape of the data 
print(f"Training data shape: {x_train.shape}, Training labels shape: {y_train.shape}")
print(f"Testing data shape: {x_test.shape}, Testing labels shape: {y_test.shape}")

Training data shape: (25000,), Training labels shape: (25000,)
Testing data shape: (25000,), Testing labels shape: (25000,)


In [32]:
# Inspect a sample review and its label 
# Sample label comming 1 means true and 0 means false. 
sample_review =  x_train[0]
sample_label = y_train[0]

print(f"Sample review (as integers): {sample_review}")
print(f"Sample lavel : {sample_label}")

Sample review (as integers): [1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65, 458, 4468, 66, 3941, 4, 173, 36, 256, 5, 25, 100, 43, 838, 112, 50, 670, 22665, 9, 35, 480, 284, 5, 150, 4, 172, 112, 167, 21631, 336, 385, 39, 4, 172, 4536, 1111, 17, 546, 38, 13, 447, 4, 192, 50, 16, 6, 147, 2025, 19, 14, 22, 4, 1920, 4613, 469, 4, 22, 71, 87, 12, 16, 43, 530, 38, 76, 15, 13, 1247, 4, 22, 17, 515, 17, 12, 16, 626, 18, 19193, 5, 62, 386, 12, 8, 316, 8, 106, 5, 4, 2223, 5244, 16, 480, 66, 3785, 33, 4, 130, 12, 16, 38, 619, 5, 25, 124, 51, 36, 135, 48, 25, 1415, 33, 6, 22, 12, 215, 28, 77, 52, 5, 14, 407, 16, 82, 10311, 8, 4, 107, 117, 5952, 15, 256, 4, 31050, 7, 3766, 5, 723, 36, 71, 43, 530, 476, 26, 400, 317, 46, 7, 4, 12118, 1029, 13, 104, 88, 4, 381, 15, 297, 98, 32, 2071, 56, 26, 141, 6, 194, 7486, 18, 4, 226, 22, 21, 134, 476, 26, 480, 5, 144, 30, 5535, 18, 51, 36, 28, 224, 92, 25, 104, 4, 226, 65, 16, 38, 1334, 88, 12, 16, 283, 5, 16, 4472, 113, 103, 32, 15, 16, 5345, 19, 178, 32]
Sample 

In [33]:
# from sequence , we are going to do the padding and set a max words in a sentence. 
from tensorflow.keras.preprocessing import sequence 

max_words = 500

x_train = sequence.pad_sequences(x_train, maxlen = max_words)
x_test = sequence.pad_sequences(x_test, maxlen = max_words)
x_train


array([[    0,     0,     0, ...,    19,   178,    32],
       [    0,     0,     0, ...,    16,   145,    95],
       [    0,     0,     0, ...,     7,   129,   113],
       ...,
       [    0,     0,     0, ...,     4,  3586, 22459],
       [    0,     0,     0, ...,    12,     9,    23],
       [    0,     0,     0, ...,   204,   131,     9]],
      shape=(25000, 500), dtype=int32)

In [34]:
# checking whether the padding happend the x_train or not. [ pre-padding  ] 
x_train[0]

array([    0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,

In [36]:
# Train a simple RNN 
# 128 is a dimension set for the feature representation of the data. 

model = Sequential()
model.add(Embedding(max_features,128,input_length= max_words))   ## embedding layer - responsible for converting rhe text to the vectors for the mentioned dimensions. 
model.add(SimpleRNN(128,activation='relu'))
model.add(Dense(1,activation="sigmoid"))

In [None]:
# This line tells the model the input shape and max_words set for the RNN model to be trained in the shape defined in the above code. 
model.build(input_shape=(None, max_words))

In [41]:
model.summary()

In [44]:
model.compile(optimizer='adam',loss='binary_crossentropy',metrics = ['accuracy'])

In [53]:
## Creating an Instances of Early stoping callback 
## We are setting Early stopping Callback so that the while training on the such a large data and dimension mentioned. It sopping after patience of 5.
from tensorflow.keras.callbacks import EarlyStopping

earlystopping = EarlyStopping(monitor = 'val_loss', patience=5,restore_best_weights='True')

In [54]:
# Train the model with earlystopping 

History = model.fit(
    x_train,y_train,epochs=10,batch_size=32,
    validation_split=0.2,
    callbacks = [earlystopping]
)

Epoch 1/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 90ms/step - accuracy: 0.6747 - loss: 16072.2363 - val_accuracy: 0.8004 - val_loss: 0.4462
Epoch 2/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 87ms/step - accuracy: 0.8561 - loss: 0.3689 - val_accuracy: 0.7588 - val_loss: 0.4983
Epoch 3/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 91ms/step - accuracy: 0.9294 - loss: 0.1964 - val_accuracy: 0.8400 - val_loss: 0.4068
Epoch 4/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 86ms/step - accuracy: 0.9654 - loss: 0.1006 - val_accuracy: 0.8380 - val_loss: 0.5138
Epoch 5/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m56s[0m 89ms/step - accuracy: 0.9833 - loss: 0.0536 - val_accuracy: 0.8494 - val_loss: 0.6479
Epoch 6/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 88ms/step - accuracy: 0.9832 - loss: 0.0558 - val_accuracy: 0.8298 - val_loss: 0.6849
Epoch 7/10


In [55]:
# save my model file 
model.save('SimpleRNN_IMDB.h5')

