In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [2]:
from tensorflow.keras.datasets import imdb

# Load the IMDB dataset (only use the top 10,000 words for simplicity)
max_words = 10000
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=max_words)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


In [3]:
# Step 3: Preprocess the data
# Pad sequences to make them the same length
max_len = 100  # We will pad all sequences to length 100
X_train_pad = pad_sequences(X_train, maxlen=max_len)
X_test_pad = pad_sequences(X_test, maxlen=max_len)


In [4]:
# Step 4: Build the RNN model (using LSTM)
model = Sequential()

# Embedding layer to convert words to vectors
model.add(Embedding(input_dim=max_words, output_dim=128, input_length=max_len))

# LSTM layer (RNN)
model.add(LSTM(units=128, return_sequences=False))

# Optional: Dropout layer to prevent overfitting
model.add(Dropout(0.5))

# Dense layer for classification (binary sentiment: positive or negative)
model.add(Dense(1, activation='sigmoid'))


In [5]:
# Step 5: Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [6]:
# Step 6: Train the model
history = model.fit(X_train_pad, y_train, epochs=5, batch_size=64, validation_data=(X_test_pad, y_test))


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [7]:
# Step 7: Evaluate the model
test_loss, test_accuracy = model.evaluate(X_test_pad, y_test)
print(f'Test accuracy: {test_accuracy*100:.2f}%')

Test accuracy: 82.91%


In [8]:
# Step 8: Make predictions (Optional, just to demonstrate)
predictions = model.predict(X_test_pad)



In [9]:
# Convert predictions to binary (0 or 1) for sentiment (0 - Negative, 1 - Positive)
predictions = (predictions > 0.5).astype(int)

In [10]:
# Evaluate using accuracy score
print(f'Accuracy on test set: {accuracy_score(y_test, predictions)*100:.2f}%')

Accuracy on test set: 82.91%
