In [82]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np


In [83]:
# Loads IMDb dataset (pre-tokenized as integers)
(x_train, y_train), (x_test, y_test) = keras.datasets.imdb.load_data(num_words=10000)

print(f"Training samples: {len(x_train)}, Test samples: {len(x_test)}")


Training samples: 25000, Test samples: 25000


In [84]:
# Ensure all reviews are the same length
maxlen = 200  # truncate/pad to 200 words

x_train = keras.preprocessing.sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = keras.preprocessing.sequence.pad_sequences(x_test, maxlen=maxlen)


In [88]:
model = keras.Sequential([
    layers.Embedding(input_dim=10000, output_dim=32, input_length=maxlen),
    layers.SimpleRNN(64),
    layers.Dense(1, activation='sigmoid')
])


model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])
# Explicitly build the model (for summary to show parameters)
model.build(input_shape=(None, maxlen))
model.summary()


In [89]:
history = model.fit(x_train, y_train,
                    epochs=5,
                    batch_size=64,
                    validation_split=0.2)


Epoch 1/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 40ms/step - accuracy: 0.5581 - loss: 0.6711 - val_accuracy: 0.8116 - val_loss: 0.4294
Epoch 2/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 40ms/step - accuracy: 0.8364 - loss: 0.3791 - val_accuracy: 0.7162 - val_loss: 0.5524
Epoch 3/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 41ms/step - accuracy: 0.7812 - loss: 0.4574 - val_accuracy: 0.8236 - val_loss: 0.3948
Epoch 4/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 39ms/step - accuracy: 0.9306 - loss: 0.1873 - val_accuracy: 0.8492 - val_loss: 0.4406
Epoch 5/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 39ms/step - accuracy: 0.9603 - loss: 0.1149 - val_accuracy: 0.8266 - val_loss: 0.4973


In [90]:
loss, acc = model.evaluate(x_test, y_test)
print(f"Test Accuracy: {acc:.4f}")


[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 8ms/step - accuracy: 0.8171 - loss: 0.5103
Test Accuracy: 0.8187


In [91]:
# Load the word index mapping used by Keras IMDb dataset
word_index = keras.datasets.imdb.get_word_index()

# Reserve first indices (as per Keras's convention)
word_index = {k: (v + 3) for k, v in word_index.items()}
word_index["<PAD>"] = 0
word_index["<START>"] = 1
word_index["<UNK>"] = 2
word_index["<UNUSED>"] = 3

# Create reverse mapping (integers to words) if needed
reverse_word_index = {value: key for (key, value) in word_index.items()}


In [92]:
def encode_review(text):
    tokens = text.lower().split()
    encoded = [1]  # Start token
    for word in tokens:
        index = word_index.get(word, 2)  # Use 2 for <UNK>
        encoded.append(index)
    return keras.preprocessing.sequence.pad_sequences([encoded], maxlen=200)


In [93]:
def predict_sentiment(text):
    encoded = encode_review(text)
    prediction = model.predict(encoded)[0][0]
    label = "Positive 😊" if prediction > 0.5 else "Negative 😞"
    print(f"Review: {text}")
    print(f"Prediction: {prediction:.4f} — {label}")


In [95]:
predict_sentiment("The movie was absolutely fantastic and heartwarming!")
predict_sentiment("It was boring, slow, and I fell asleep halfway through.")
predict_sentiment("This movie was a waste of time and money.")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
Review: The movie was absolutely fantastic and heartwarming!
Prediction: 0.5845 — Positive 😊
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
Review: It was boring, slow, and I fell asleep halfway through.
Prediction: 0.0742 — Negative 😞
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
Review: This movie was a waste of time and money.
Prediction: 0.1993 — Negative 😞


In [97]:
model.save('sentiment_rnn_model.keras')  # Recommended format

