In [14]:
pip install pandas

Note: you may need to restart the kernel to use updated packages.


In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GlobalAveragePooling1D , Dense, TextVectorization
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
import time
from tensorflow.keras.utils import pad_sequences
import tkinter as tk
from tkinter import ttk, scrolledtext

In [2]:
MAX_FEATURES = 10000
SEQUENCE_LENGTH = 256
EMBEDDING_DIM = 32  
BATCH_SIZE = 32
VOCAB_SIZE = MAX_FEATURES

In [3]:
print(f"Loading IMDB dataset (Top {MAX_FEATURES} words)...")


Loading IMDB dataset (Top 10000 words)...


In [4]:
(X_train_raw , y_train), (X_test_raw, y_test) = tf.keras.datasets.imdb.load_data(num_words = MAX_FEATURES ,oov_char=2)

In [5]:
print(f"\nPadding sequences to fixed length of {SEQUENCE_LENGTH}...")

X_train = pad_sequences(
    X_train_raw , 
    maxlen=SEQUENCE_LENGTH,
    padding = 'post',
    truncating = 'post')


Padding sequences to fixed length of 256...


In [6]:
X_test = pad_sequences(
    X_test_raw,
    maxlen=SEQUENCE_LENGTH,
    padding='post',
    truncating='post')

In [7]:
train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)).batch(BATCH_SIZE)
test_ds = tf.data.Dataset.from_tensor_slices((X_test, y_test)).batch(BATCH_SIZE)
print(f"Final Training Samples Shape: {X_train.shape}")
print(f"Final Testing Samples Shape: {X_test.shape}")

Final Training Samples Shape: (25000, 256)
Final Testing Samples Shape: (25000, 256)


In [8]:
model=Sequential([
    Embedding(VOCAB_SIZE,EMBEDDING_DIM,name="embedding_layer",mask_zero=True),
    GlobalAveragePooling1D(),
    Dense(16, activation='relu'),
    Dense(1,activation='sigmoid'),
])
model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

try:
    _ = model.predict(X_test[:1])
except Exception as e:
    # We ignore the prediction result, we only care that the model ran once.
    pass
# ---------------------------------------------

# Display the model architecture (Now the summary will show parameter counts!)
print("\n--- Model Summary ---")
model.summary()



            

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 159ms/step

--- Model Summary ---


In [9]:
EPOCHS = 5
print(f"\n---Model Trainig(running for {EPOCHS} epochs)---")
start_time = time.time()




---Model Trainig(running for 5 epochs)---


In [10]:
history = model.fit(
    train_ds,
    validation_data=test_ds,
    epochs=EPOCHS,
    verbose=1)
end_time = time.time()
print(f"\nTrainng completed in {end_time - start_time:.2f}seconds.")


Epoch 1/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.7362 - loss: 0.5607 - val_accuracy: 0.8667 - val_loss: 0.3167
Epoch 2/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.8976 - loss: 0.2625 - val_accuracy: 0.8718 - val_loss: 0.3038
Epoch 3/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.9203 - loss: 0.2058 - val_accuracy: 0.8655 - val_loss: 0.3230
Epoch 4/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.9351 - loss: 0.1718 - val_accuracy: 0.8577 - val_loss: 0.3551
Epoch 5/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.9489 - loss: 0.1469 - val_accuracy: 0.8508 - val_loss: 0.3972

Trainng completed in 11.25seconds.


In [11]:
print("\n--- Model Evaluation ---")
loss, accuracy = model.evaluate(test_ds, verbose=0)
print(f"Test Loss: {loss:.4f}")
print(f"Test Accuracy: {accuracy:.4f}")

y_pred_probs = model.predict(X_test, verbose=0)
y_pred = (y_pred_probs > 0.5).astype("int32")
print("\n--- Classification Report ---")
print(classification_report(y_test, y_pred))


--- Model Evaluation ---
Test Loss: 0.3972
Test Accuracy: 0.8508

--- Classification Report ---
              precision    recall  f1-score   support

           0       0.83      0.89      0.86     12500
           1       0.88      0.81      0.84     12500

    accuracy                           0.85     25000
   macro avg       0.85      0.85      0.85     25000
weighted avg       0.85      0.85      0.85     25000



In [12]:
def decode_review(text_sequence, index_mapping):
    """Converts an integer sequence back to human-readable text."""
    reverse_word_index = dict([(value, key) for (key, value) in index_mapping.items()])
    # Indices are offset by 3 for: <pad>, <start>, <unknown>
    decoded_words = [reverse_word_index.get(i - 3, '?') for i in text_sequence if i > 0]
    return ' '.join(decoded_words)

# Load the word index dictionary
word_index = tf.keras.datasets.imdb.get_word_index()

# Example Test Case 1: Positive Review
new_review_text_1 = "This movie is truly one of the greatest cinematic achievements of the decade."
new_review_sequence_1 = [word_index.get(word, 2) + 3 for word in new_review_text_1.lower().split()]

# Example Test Case 2: Negative Review
new_review_text_2 = "What a dull and pointless film. The plot was thin and the characters were unconvincing."
new_review_sequence_2 = [word_index.get(word, 2) + 3 for word in new_review_text_2.lower().split()]

# Pad the new sequences
new_sequences = pad_sequences(
    [new_review_sequence_1, new_review_sequence_2], 
    maxlen=SEQUENCE_LENGTH, 
    padding='post', 
    truncating='post')

In [13]:
predictions = model.predict(new_sequences)

print("\n--- Prediction on New Reviews ---")
print(f"Review 1: '{new_review_text_1}'")
print(f"Prediction: {'Positive' if predictions[0][0] > 0.5 else 'Negative'} (Confidence: {predictions[0][0]:.4f})")

print(f"\nReview 2: '{new_review_text_2}'")
print(f"Prediction: {'Positive' if predictions[1][0] > 0.5 else 'Negative'} (Confidence: {predictions[1][0]:.4f})")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step

--- Prediction on New Reviews ---
Review 1: 'This movie is truly one of the greatest cinematic achievements of the decade.'
Prediction: Positive (Confidence: 0.9657)

Review 2: 'What a dull and pointless film. The plot was thin and the characters were unconvincing.'
Prediction: Negative (Confidence: 0.0000)
