In [1]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, Flatten
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences





In [2]:

# Load the IMDB dataset
max_features = 10000  # Number of words to consider as features
maxlen = 200          # Limit the review length to 200 words

(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=max_features)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 0us/step


In [3]:
# Pad sequences to ensure uniform input size
X_train = pad_sequences(X_train, maxlen=maxlen)
X_test = pad_sequences(X_test, maxlen=maxlen)




In [15]:
# Build a simple DNN model
model = Sequential([
    Embedding(input_dim=max_features, output_dim=128, input_length=maxlen),
    Flatten(),
    Dense(64, activation='relu'),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')
])




In [16]:
# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])



In [17]:

# Train the model
model.fit(X_train, y_train, epochs=5, batch_size=32, validation_data=(X_test, y_test))


Epoch 1/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 21ms/step - accuracy: 0.7131 - loss: 0.5168 - val_accuracy: 0.8612 - val_loss: 0.3191
Epoch 2/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 21ms/step - accuracy: 0.9736 - loss: 0.0803 - val_accuracy: 0.8424 - val_loss: 0.4604
Epoch 3/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 21ms/step - accuracy: 0.9966 - loss: 0.0110 - val_accuracy: 0.8417 - val_loss: 0.6954
Epoch 4/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 21ms/step - accuracy: 0.9946 - loss: 0.0154 - val_accuracy: 0.8336 - val_loss: 0.7902
Epoch 5/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 20ms/step - accuracy: 0.9968 - loss: 0.0094 - val_accuracy: 0.8352 - val_loss: 0.8518


<keras.src.callbacks.history.History at 0x1eb6cd8b790>

In [13]:
# Evaluate the model
score, accuracy = model.evaluate(X_test, y_test)
print(f"Test accuracy: {accuracy*100:.2f}%")

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.8249 - loss: 1.0939
Test accuracy: 82.74%


In [26]:
# Load word index
word_index = imdb.get_word_index()

# Reverse word index
word_to_id = {k: (v + 3) for k, v in word_index.items()}

id_to_word = {value: key for key, value in word_to_id.items()}

# Simple tokenizer
def encode_review(text):
    tokens = text.lower().split()
    encoded = [1]  # start token
    for word in tokens:
        encoded.append(word_to_id.get(word,2))  # unknown token
    return encoded

# --- 🔮 Predict on custom input ---
def predict_review_sentiment(review_text):
    encoded = encode_review(review_text)
    padded = pad_sequences([encoded], maxlen=maxlen)
    prediction = model.predict(padded)[0][0]
    
    sentiment = "Positive 😊" if prediction >= 0.5 else "Negative 😞"
    print(f"Review: {review_text}")
    print(encoded)
    print(f"Prediction score: {prediction:.4f}")
    print(f"Predicted Sentiment: {sentiment}")

# --- ✨ Example ---
predict_review_sentiment("The movie was amazing with great acting and plot.")
predict_review_sentiment("I loved it !")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
Review: The movie was amazing with great acting and plot.
[1, 4, 20, 16, 480, 19, 87, 116, 5, 2]
Prediction score: 0.9790
Predicted Sentiment: Positive 😊
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
Review: I loved it !
[1, 13, 447, 12, 2]
Prediction score: 0.8052
Predicted Sentiment: Positive 😊
