In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.datasets import imdb
import numpy as np
from sklearn.metrics import classification_report

In [2]:
# Load IMDB dataset
max_words = 10_000  # Consider only the top 10,000 most common words
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=max_words)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [3]:
# Padding sequences to a fixed length
max_len = 500  # Max words per review
X_train = sequence.pad_sequences(X_train, maxlen=max_len)
X_test = sequence.pad_sequences(X_test, maxlen=max_len)

In [4]:
# Build the DNN Model
model = keras.Sequential([
    keras.layers.Embedding(input_dim=max_words, output_dim=128, input_length=max_len),
    keras.layers.GlobalAveragePooling1D(),  # Reduce to fixed-size feature representation
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dense(1, activation='sigmoid')  # Binary classification
])



In [5]:
# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 30ms/step - accuracy: 0.5878 - loss: 0.6502 - val_accuracy: 0.7750 - val_loss: 0.4411
Epoch 2/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 29ms/step - accuracy: 0.8288 - loss: 0.3841 - val_accuracy: 0.8739 - val_loss: 0.3102
Epoch 3/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 30ms/step - accuracy: 0.8727 - loss: 0.2990 - val_accuracy: 0.7413 - val_loss: 0.5567
Epoch 4/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 27ms/step - accuracy: 0.8862 - loss: 0.2674 - val_accuracy: 0.8872 - val_loss: 0.2797
Epoch 5/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 29ms/step - accuracy: 0.8848 - loss: 0.2660 - val_accuracy: 0.8666 - val_loss: 0.3078
Epoch 6/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 29ms/step - accuracy: 0.9101 - loss: 0.2262 - val_accuracy: 0.8834 - val_loss: 0.2830
Epoch 7/10
[1m7

In [6]:
# Evaluate the model
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_acc:.4f}")

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 8ms/step - accuracy: 0.8823 - loss: 0.2948
Test Accuracy: 0.8805


In [7]:
# Predictions
y_pred_probs = model.predict(X_test)
y_pred = (y_pred_probs > 0.5).astype("int32").flatten()

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step


In [8]:
# Classification Report
print(classification_report(y_test, y_pred, target_names=["Negative", "Positive"]))

              precision    recall  f1-score   support

    Negative       0.85      0.92      0.89     12500
    Positive       0.91      0.84      0.88     12500

    accuracy                           0.88     25000
   macro avg       0.88      0.88      0.88     25000
weighted avg       0.88      0.88      0.88     25000

