## **Install and Import libraries**

In [15]:
!pip install tensorflow




In [16]:
import tensorflow as tf
from tensorflow.keras import layers
import tensorflow_datasets as tfds
import numpy as np
import matplotlib.pyplot as plt


## **Load the IMDB Dataset**

In [17]:
# Load IMDB dataset with supervised labels
(train_data, test_data), info = tfds.load(
    'imdb_reviews',
    split=['train', 'test'],
    as_supervised=True,
    with_info=True
)

## **Preprocess Text: Tokenization + Padding**

In [18]:
# Extract raw text and labels
train_texts, train_labels = [], []
test_texts, test_labels = [], []

for text, label in tfds.as_numpy(train_data):
    train_texts.append(text.decode('utf-8'))
    train_labels.append(label)

for text, label in tfds.as_numpy(test_data):
    test_texts.append(text.decode('utf-8'))
    test_labels.append(label)

# Tokenize texts
tokenizer = Tokenizer(num_words=10000, oov_token="<OOV>")
tokenizer.fit_on_texts(train_texts)

# Convert to sequences
train_sequences = tokenizer.texts_to_sequences(train_texts)
test_sequences = tokenizer.texts_to_sequences(test_texts)

# Pad sequences
train_padded = pad_sequences(train_sequences, maxlen=200, padding='post')
test_padded = pad_sequences(test_sequences, maxlen=200, padding='post')

# Convert labels to numpy arrays
train_labels = np.array(train_labels)
test_labels = np.array(test_labels)


## **Build and Compile the Model**

In [19]:
model = tf.keras.Sequential([
    layers.Embedding(10000, 64, input_length=200),
    layers.Bidirectional(layers.LSTM(64)),
    layers.Dense(64, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

model.summary()


## **Train the Model**

In [20]:
history = model.fit(
    train_padded,
    train_labels,
    epochs=10,
    batch_size=64,
    validation_data=(test_padded, test_labels)
)

Epoch 1/10
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m153s[0m 379ms/step - accuracy: 0.6619 - loss: 0.5780 - val_accuracy: 0.8500 - val_loss: 0.3490
Epoch 2/10
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m209s[0m 398ms/step - accuracy: 0.9062 - loss: 0.2481 - val_accuracy: 0.8671 - val_loss: 0.3149
Epoch 3/10
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 363ms/step - accuracy: 0.9354 - loss: 0.1779 - val_accuracy: 0.8574 - val_loss: 0.3806
Epoch 4/10
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m156s[0m 399ms/step - accuracy: 0.9531 - loss: 0.1303 - val_accuracy: 0.8536 - val_loss: 0.4023
Epoch 5/10
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m197s[0m 386ms/step - accuracy: 0.9719 - loss: 0.0840 - val_accuracy: 0.8478 - val_loss: 0.5637
Epoch 6/10
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m152s[0m 389ms/step - accuracy: 0.9749 - loss: 0.0743 - val_accuracy: 0.8518 - val_loss: 0.6002
Epoc

## **Evaluate Accuracy**

In [21]:
loss, accuracy = model.evaluate(test_padded, test_labels)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 57ms/step - accuracy: 0.8440 - loss: 0.7461
Test Accuracy: 84.47%


## **Predict on Custom Reviews**

In [22]:
def predict_sentiment(sentence):
    sentence = sentence.lower().strip()
    seq = tokenizer.texts_to_sequences([sentence])
    padded = pad_sequences(seq, maxlen=200, padding='post')
    pred = model.predict(padded)[0][0]
    label = "Positive" if pred > 0.5 else "Negative"
    print(f"{label} ({pred * 100:.2f}% confidence)")

In [23]:
# Try it out
predict_sentiment("I loved the product! Very useful and easy to use.")
predict_sentiment("Terrible experience. It broke after one day.")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 672ms/step
Positive (50.08% confidence)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 121ms/step
Negative (33.93% confidence)
