In [28]:
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np

In [11]:
max_words = 10000   # only consider the top 10k words
max_len   = 200     # cut/pad reviews to 200 tokens

In [12]:
# 1) Load data
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=max_words)

In [13]:
# 2) Pad sequences
X_train = pad_sequences(X_train, maxlen=max_len)
X_test  = pad_sequences(X_test,  maxlen=max_len)

In [15]:
print("Training Data Shape:", X_train.shape)
print("Testing Data Shape:", X_test.shape)

Training Data Shape: (25000, 200)
Testing Data Shape: (25000, 200)


In [19]:
# 3) Build RNN model
model = Sequential([
    Embedding(input_dim=max_words, output_dim=128, input_length=max_len),
    LSTM(64, return_sequences=False),
    Dropout(0.5),              # LSTM layer with 64 units
    Dense(1, activation='sigmoid')  # binary output
])



In [20]:
# 4) Compile
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [21]:
model.fit(
    X_train, y_train,
    epochs=5,
    batch_size=64,
    validation_data=(X_test, y_test)
)

Epoch 1/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 142ms/step - accuracy: 0.7306 - loss: 0.5150 - val_accuracy: 0.8511 - val_loss: 0.3486
Epoch 2/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 146ms/step - accuracy: 0.9019 - loss: 0.2527 - val_accuracy: 0.8504 - val_loss: 0.3656
Epoch 3/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 151ms/step - accuracy: 0.9315 - loss: 0.1836 - val_accuracy: 0.8670 - val_loss: 0.3230
Epoch 4/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 158ms/step - accuracy: 0.9534 - loss: 0.1332 - val_accuracy: 0.8618 - val_loss: 0.4090
Epoch 5/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 138ms/step - accuracy: 0.9653 - loss: 0.1032 - val_accuracy: 0.8497 - val_loss: 0.4206


<keras.src.callbacks.history.History at 0x15c11503560>

In [22]:
loss, acc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {acc*100:.2f}%")

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 26ms/step - accuracy: 0.8473 - loss: 0.4326
Test Accuracy: 84.97%


In [25]:
sample_review = X_test[5].reshape(1, max_len)
prediction = model.predict(sample_review)
sentiment = "Positive" if prediction[0] > 0.5 else "Negative"
print(f"Predicted Sentiment: {sentiment}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 323ms/step
Predicted Sentiment: Positive


In [26]:
word_index = imdb.get_word_index()
reverse_index = {v: k for k, v in word_index.items()}
def decode_review(seq):
    return " ".join(reverse_index.get(i - 3, "?") for i in seq if i >= 3)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json
[1m1641221/1641221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3us/step


In [29]:
first_review_seq = X_test[0]
true_label       = y_test[0]
prob = model.predict(first_review_seq[np.newaxis, :])[0,0]
pred = "Positive" if prob >= 0.5 else "Negative"

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step


In [30]:
print(decode_review(first_review_seq))
print(f"True sentiment:      {'Positive' if true_label==1 else 'Negative'}")
print(f"Predicted sentiment: {pred} ({prob:.3f} probability)")


please give this one a miss br br and the rest of the cast rendered terrible performances the show is flat flat flat br br i don't know how michael madison could have allowed this one on his plate he almost seemed to know this wasn't going to work out and his performance was quite so all you madison fans give this a miss
True sentiment:      Negative
Predicted sentiment: Negative (0.293 probability)
