In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense

Load the IMDB dataset

In [None]:
max_vocab_size = 10000  # Limit the vocabulary size
(X_train, y_train), (X_test,y_test) = imdb.load_data(num_words=max_vocab_size)

# print the shape of the data
print(f"Training data shape: {X_train.shape}, Labels shape: {y_train.shape}")
print(f"Test data shape: {X_test.shape}, Labels shape: {y_test.shape}")

In [None]:
# Inspect the first review and its label
sample_review = X_train[0]
sample_label = y_train[0]

print(f"Sample review (encoded): {sample_review}")
print(f"Sample label: {sample_label}")

In [None]:
#  just for our understanding, let's map the words index to words
word_index = imdb.get_word_index()

#  reverse the word index to get words from indices
reverse_word_index = {value: key for key, value in word_index.items()}


In [None]:
reverse_word_index.get(1, '?')  # 1 is reserved for padding, so we can ignore it

In [None]:
sample_review
    

In [None]:
#  now decode the sample review
decoded_review = ' '.join([reverse_word_index.get(i - 3, '?') for i in sample_review])
print(f"Decoded review: {decoded_review}")

In [None]:
max_length = 500  # Maximum length of each review
X_train = sequence.pad_sequences(X_train, maxlen=max_length)
X_test = sequence.pad_sequences(X_test, maxlen=max_length)

In [None]:
#  Train a simple RNN model
model = Sequential()
feature_dim = 128  # Dimension of the embedding space

# embedding layer
model.add(
    Embedding(max_vocab_size, feature_dim, input_length=max_length)
)

model.add(SimpleRNN(128,activation='relu'))

# for one output add a single Dense layer with sigmoid activation
model.add(Dense(1, activation='sigmoid'))

In [None]:
model.summary()

In [None]:
model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

In [None]:
#  Early stopping callback
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

In [None]:
# train the model with early stopping
#  THIS WILL TAKE A LONG TIME TO RUN - depending on your hardware
history=model.fit(
    X_train, y_train,
    epochs=20,
    batch_size=64,
    validation_split=0.2,
    callbacks=[early_stopping]
)

In [None]:
#  now export the model
model.save('simple_rnn_model.h5')