In [4]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, SimpleRNN, LSTM, Embedding
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [5]:
np.random.seed(42)

In [7]:
print("Loading IMDB dataset...")
vocab_size = 5000  # Use top 5000 most frequent words
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=vocab_size)

# Pad sequences to same length
max_length = 200  # Limit to 200 words per review
X_train_padded = pad_sequences(X_train, maxlen=max_length, padding='post')
X_test_padded = pad_sequences(X_test, maxlen=max_length, padding='post')

Loading IMDB dataset...


# Train Simple RNN on IMDB

In [8]:
rnn_model = Sequential([
    Embedding(vocab_size, 64, input_length=max_length),  # Convert words to dense vectors
    SimpleRNN(32, dropout=0.2, recurrent_dropout=0.2),
    Dense(1, activation='sigmoid')  # Binary classification
], name="RNN_IMDB")



In [9]:
rnn_model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

In [10]:
rnn_history = rnn_model.fit(
    X_train_padded, y_train,
    batch_size=32,
    epochs=3,  # Quick training
    validation_split=0.2,
    verbose=1
)

Epoch 1/3
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 46ms/step - accuracy: 0.5108 - loss: 0.7132 - val_accuracy: 0.5134 - val_loss: 0.6905
Epoch 2/3
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 47ms/step - accuracy: 0.5093 - loss: 0.6945 - val_accuracy: 0.5196 - val_loss: 0.6888
Epoch 3/3
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 47ms/step - accuracy: 0.5405 - loss: 0.6857 - val_accuracy: 0.5260 - val_loss: 0.6859


In [11]:
rnn_loss, rnn_accuracy = rnn_model.evaluate(X_test_padded, y_test, verbose=0)
print(f"RNN Test Accuracy: {rnn_accuracy:.4f} ({rnn_accuracy*100:.2f}%)")

RNN Test Accuracy: 0.5167 (51.67%)


# Train LSTM on IMDB

In [12]:
lstm_model = Sequential([
    Embedding(vocab_size, 64, input_length=max_length),
    LSTM(32, dropout=0.2, recurrent_dropout=0.2),
    Dense(1, activation='sigmoid')
], name="LSTM_IMDB")

In [13]:
lstm_model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

In [14]:
lstm_history = lstm_model.fit(
    X_train_padded, y_train,
    batch_size=32,
    epochs=3,
    validation_split=0.2,
    verbose=1
)

Epoch 1/3
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m90s[0m 138ms/step - accuracy: 0.5600 - loss: 0.6724 - val_accuracy: 0.7588 - val_loss: 0.5117
Epoch 2/3
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m86s[0m 138ms/step - accuracy: 0.7657 - loss: 0.5140 - val_accuracy: 0.7966 - val_loss: 0.4763
Epoch 3/3
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m86s[0m 138ms/step - accuracy: 0.8043 - loss: 0.4613 - val_accuracy: 0.8006 - val_loss: 0.4725


In [15]:
lstm_loss, lstm_accuracy = lstm_model.evaluate(X_test_padded, y_test, verbose=0)
print(f"LSTM Test Accuracy: {lstm_accuracy:.4f} ({lstm_accuracy*100:.2f}%)")

LSTM Test Accuracy: 0.8070 (80.70%)
