In [1]:
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense

# Parameters
max_features = 10000  # Vocabulary size
max_len = 500  # Maximum length of the reviews

# Load data
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=max_features)






In [2]:
# Padding sequences to ensure all reviews are of the same length
X_train = sequence.pad_sequences(X_train, maxlen=max_len)
X_test = sequence.pad_sequences(X_test, maxlen=max_len)

In [7]:
X_train.shape,X_test.shape

((25000, 500), (25000, 500))

In [18]:
# Build the RNN model
model = Sequential()
model.add(Embedding(input_dim=max_features, output_dim=64))  # Embedding layer
model.add(SimpleRNN(64))  # SimpleRNN layer
model.add(Dense(1, activation='sigmoid'))  # Output layer for binary classification

# Compile the model



In [19]:
model.summary()

In [20]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=5, batch_size=64, validation_split=0.2)

# Evaluate the model
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f'Test Accuracy: {test_acc:.4f}')

Epoch 1/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 91ms/step - accuracy: 0.5873 - loss: 0.6604 - val_accuracy: 0.5532 - val_loss: 0.7847
Epoch 2/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 106ms/step - accuracy: 0.7543 - loss: 0.5193 - val_accuracy: 0.7666 - val_loss: 0.4959
Epoch 3/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 104ms/step - accuracy: 0.9073 - loss: 0.2453 - val_accuracy: 0.7884 - val_loss: 0.4799
Epoch 4/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 107ms/step - accuracy: 0.9547 - loss: 0.1348 - val_accuracy: 0.7892 - val_loss: 0.5573
Epoch 5/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 109ms/step - accuracy: 0.9876 - loss: 0.0454 - val_accuracy: 0.7906 - val_loss: 0.6364
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 21ms/step - accuracy: 0.7863 - loss: 0.6688
Test Accuracy: 0.7897


In [21]:
model.summary()

In [34]:
X_train[0].shape

(500,)

In [35]:
model.save('rnnmodel.h5')



In [36]:
# Function to preprocess and predict sentiment

word_index = imdb.get_word_index()
def predict_sentiment(review, model,word_index):
    # Tokenize the input review using the predefined IMDB word index
    words = review.lower().split()  # Simple tokenization by space
    review_sequence = []
    
    for word in words:
        if word in word_index:  # Only include words that are in the word index
            review_sequence.append(word_index.get(word,2)+3)  # Offset by 3 to match Keras' preprocessing

    # Pad the sequence to match the model's input length (max_len)
    review_padded = sequence.pad_sequences([review_sequence], maxlen=max_len)

    # Predict the sentiment
    prediction = model.predict(review_padded)
    
    # Convert the prediction to a human-readable form
    sentiment = "Positive" if prediction >= 0.5 else "Negative"
    
    print(f"Review: {review}")
    print(f"Predicted sentiment: {sentiment} ({prediction[0][0]:.4f})")

# Example usage
sample_review = "The movie was fantastic! I really loved the plot and the characters."
predict_sentiment(sample_review, model,word_index)




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
Review: The movie was fantastic! I really loved the plot and the characters.
Predicted sentiment: Positive (0.9846)


In [37]:
sample_review = "The movie was bad and worse i was unhappy"
predict_sentiment(sample_review, model,word_index)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
Review: The movie was bad and worse i was unhappy
Predicted sentiment: Negative (0.0066)
