In [39]:
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [41]:
num_words = 10000
maxlen = 200
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=num_words)
x_train = pad_sequences(x_train, maxlen=maxlen)  # Pad training sequences
x_test = pad_sequences(x_test, maxlen=maxlen)    # Pad test sequences


In [42]:
x_test

array([[   0,    0,    0, ...,   14,    6,  717],
       [1987,    2,   45, ...,  125,    4, 3077],
       [4468,  189,    4, ...,    9,   57,  975],
       ...,
       [   0,    0,    0, ...,   21,  846, 5518],
       [   0,    0,    0, ..., 2302,    7,  470],
       [   0,    0,    0, ...,   34, 2005, 2643]])

In [45]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(
        input_dim=num_words+1,   # Vocabulary size = 10,000
        output_dim=128,        # Embedding vector dimension
        input_length=maxlen    # Padded sequence length = 200
    ),
    tf.keras.layers.LSTM(64, dropout=0.2),  # 64 LSTM units
    tf.keras.layers.Dense(1, activation='sigmoid')
])

In [47]:
model.build(input_shape=(None, maxlen))

# Show summary
model.summary()

In [61]:
model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

# Add early stopping to prevent overfitting
early_stop = tf.keras.callbacks.EarlyStopping(
    monitor='val_accuracy', 
    patience=2,  # Stop if no improvement for 2 epochs
    restore_best_weights=True
)

# Train the model
history = model.fit(
    x_train, y_train,
    epochs=3,
    batch_size=32,
    validation_split=0.2,  # 20% of training data for validation
    callbacks=[early_stop]
)

Epoch 1/3
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m90s[0m 140ms/step - accuracy: 0.9248 - loss: 0.1973 - val_accuracy: 0.8606 - val_loss: 0.3617
Epoch 2/3
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m92s[0m 148ms/step - accuracy: 0.9469 - loss: 0.1409 - val_accuracy: 0.8626 - val_loss: 0.3683
Epoch 3/3
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m86s[0m 137ms/step - accuracy: 0.9658 - loss: 0.1001 - val_accuracy: 0.8600 - val_loss: 0.4329


In [51]:
test_loss, test_acc = model.evaluate(x_test, y_test, verbose=0)

print(f'Test Accuracy: {test_acc * 100:.2f}%')

Test Accuracy: 86.32%


In [53]:
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np

# Load the IMDB word index (maps words to integers)
word_index = imdb.get_word_index()

# Define parameters (same as training)
num_words = 10000
maxlen = 200

# Function to encode custom reviews
def encode_review(review):
    # Clean the text
    review = review.lower().replace('<br />', ' ')  # Remove HTML tags
    review = ''.join([c for c in review if c not in '!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~'])  # Remove punctuation
    words = review.split()
    
    # Convert words to integer indices
    indices = []
    for word in words:
        # Use word index (default to 2 for OOV words)
        index = word_index.get(word, 2)
        # Replace indices >= num_words with OOV (2)
        if index > num_words:
            index = 2
        indices.append(index)
    
    # Pad sequences to maxlen
    padded = pad_sequences([indices], maxlen=maxlen)
    return padded

In [55]:
# Example reviews
positive_review = "This movie was fantastic! The acting was superb and the plot kept me engaged."
negative_review = "A terrible film. Boring, poorly acted, and a waste of time."

# Encode the reviews
positive_encoded = encode_review(positive_review)
negative_encoded = encode_review(negative_review)

In [57]:
# Predict sentiment
def predict_sentiment(encoded_review):
    prediction = model.predict(encoded_review)[0][0]
    sentiment = "Positive" if prediction > 0.5 else "Negative"
    confidence = prediction if sentiment == "Positive" else 1 - prediction
    return f"{sentiment} (Confidence: {confidence:.2f})"

# Test predictions
print("Positive Review Prediction:", predict_sentiment(positive_encoded))
print("Negative Review Prediction:", predict_sentiment(negative_encoded))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 428ms/step
Positive Review Prediction: Negative (Confidence: 0.64)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step
Negative Review Prediction: Positive (Confidence: 0.64)


In [59]:
# Encode and predict custom reviews
custom_reviews = [
    "This movie was a masterpiece! The director did an amazing job.",
    "I hated every second of this film. The worst acting I've ever seen.",
    "The plot was average, but the cinematography was stunning."
]

for review in custom_reviews:
    encoded = encode_review(review)
    print(f"Review: {review}")
    print("Prediction:", predict_sentiment(encoded), "\n")

Review: This movie was a masterpiece! The director did an amazing job.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70ms/step
Prediction: Negative (Confidence: 0.51) 

Review: I hated every second of this film. The worst acting I've ever seen.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 120ms/step
Prediction: Negative (Confidence: 0.52) 

Review: The plot was average, but the cinematography was stunning.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69ms/step
Prediction: Positive (Confidence: 0.69) 

