In [11]:
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, GRU, SimpleRNN, Dense

# Set random seed for reproducibility
tf.random.set_seed(42)


Load and prepare the imdb dataset:-

In [12]:
# Load dataset with top 10,000 frequent words
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=10000)

# Pad sequences to fixed length (e.g. 200 words per review)
X_train = pad_sequences(X_train, maxlen=200)
X_test = pad_sequences(X_test, maxlen=200)


Choose Model Type RNN,GRU,LSTM:-

In [13]:
def build_model(cell_type="LSTM"):
    model = Sequential()
    model.add(Embedding(input_dim=10000, output_dim=128, input_length=200))

    if cell_type == "RNN":
        model.add(SimpleRNN(64))
    elif cell_type == "GRU":
        model.add(GRU(64))
    else:
        model.add(LSTM(64))

    model.add(Dense(1, activation='sigmoid'))  # binary classification
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model


Now training the model:-

In [14]:
# Choose one: "RNN", "LSTM", or "GRU"
a=input("enter the model:-")
model = build_model(cell_type="LSTM")

# Train the model
model.fit(X_train, y_train, epochs=3, batch_size=64, validation_split=0.2)


Epoch 1/3
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m96s[0m 296ms/step - accuracy: 0.7047 - loss: 0.5446 - val_accuracy: 0.8568 - val_loss: 0.3429
Epoch 2/3
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m145s[0m 305ms/step - accuracy: 0.8856 - loss: 0.2864 - val_accuracy: 0.8636 - val_loss: 0.3346
Epoch 3/3
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m137s[0m 289ms/step - accuracy: 0.9138 - loss: 0.2305 - val_accuracy: 0.8318 - val_loss: 0.3908


<keras.src.callbacks.history.History at 0x7ee123a49590>

Evaluation:-

In [16]:
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy:.4f}")


[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 36ms/step - accuracy: 0.8328 - loss: 0.3939
Test Accuracy: 0.8340


 Summary
Dataset: IMDB (25,000 movie reviews)

Goal: Classify sentiment (positive/negative)

Model: Uses Embedding + RNN/GRU/LSTM → Dense

Output: Accuracy on test data

In [17]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Load the same word index used by IMDB dataset
word_index = imdb.get_word_index()

# Reverse word index to decode integer back to words
reverse_word_index = {value: key for (key, value) in word_index.items()}

# Function to encode a new sentence
def encode_review(text):
    # Convert text to lowercase words, keep only those in word_index
    tokens = [word_index.get(word, 2) for word in text.lower().split()]
    return pad_sequences([tokens], maxlen=200)


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json
[1m1641221/1641221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [18]:
# Example custom review
new_review = "The movie was amazing, touching and beautifully acted."

# Encode and pad
encoded = encode_review(new_review)

# Predict sentiment (0 = negative, 1 = positive)
prediction = model.predict(encoded)

print("Predicted Sentiment:", "Positive" if prediction[0][0] > 0.5 else "Negative")
print(f"Confidence: {prediction[0][0]:.2f}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 286ms/step
Predicted Sentiment: Positive
Confidence: 0.73
