# **Practical No.: 2**

# Building a natural language processing (NLP) model for sentiment analysis or text classification.

In [None]:
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GlobalAveragePooling1D, Dense
import re

In [None]:
# Load the IMDB dataset (only top 10,000 words kept)
num_words = 10000
maxlen = 256

(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=num_words)

In [None]:
# Pad the sequences to ensure equal length
x_train = pad_sequences(x_train, maxlen=maxlen)
x_test = pad_sequences(x_test, maxlen=maxlen)

In [None]:
# Build a simple model
model = Sequential()
model.add(Embedding(input_dim=num_words, output_dim=16, input_length=maxlen))
model.add(GlobalAveragePooling1D())
model.add(Dense(16, activation='relu'))
model.add(Dense(1, activation='sigmoid'))



In [None]:
# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [None]:
# Train the model
model.fit(x_train, y_train,
          epochs=10,
          batch_size=512,
          validation_split=0.2,
          verbose=2)

Epoch 1/10
40/40 - 3s - 76ms/step - accuracy: 0.5899 - loss: 0.6894 - val_accuracy: 0.6732 - val_loss: 0.6823
Epoch 2/10
40/40 - 1s - 26ms/step - accuracy: 0.6779 - loss: 0.6726 - val_accuracy: 0.7354 - val_loss: 0.6560
Epoch 3/10
40/40 - 1s - 29ms/step - accuracy: 0.7438 - loss: 0.6368 - val_accuracy: 0.7690 - val_loss: 0.6093
Epoch 4/10
40/40 - 1s - 30ms/step - accuracy: 0.7843 - loss: 0.5797 - val_accuracy: 0.7952 - val_loss: 0.5462
Epoch 5/10
40/40 - 1s - 31ms/step - accuracy: 0.8195 - loss: 0.5123 - val_accuracy: 0.8232 - val_loss: 0.4842
Epoch 6/10
40/40 - 2s - 48ms/step - accuracy: 0.8316 - loss: 0.4531 - val_accuracy: 0.7940 - val_loss: 0.4525
Epoch 7/10
40/40 - 2s - 44ms/step - accuracy: 0.8458 - loss: 0.4080 - val_accuracy: 0.8488 - val_loss: 0.3994
Epoch 8/10
40/40 - 1s - 31ms/step - accuracy: 0.8669 - loss: 0.3669 - val_accuracy: 0.8556 - val_loss: 0.3724
Epoch 9/10
40/40 - 1s - 28ms/step - accuracy: 0.8758 - loss: 0.3375 - val_accuracy: 0.8586 - val_loss: 0.3522
Epoch 10/1

<keras.src.callbacks.history.History at 0x7d2c3e16fb50>

In [None]:
# Evaluate the model
loss,accuracy = model.evaluate(x_test, y_test, verbose=2)
print("Loss:", loss)
print("Accuracy:", accuracy)

782/782 - 2s - 3ms/step - accuracy: 0.8506 - loss: 0.3507
Loss: 0.3507475256919861
Accuracy: 0.8506399989128113


Code to Predict new reviews

In [None]:
# Load the word index
word_index = imdb.get_word_index()

# New Reviews to classify
reviews = [
    "This movie was amazing! I liked and enjoyed every moment of it",
    "This movie was terrible. The acting was awful and the plot was worst."
]

# Preprocess and predict
encoded_reviews = []

for review in reviews:
    # Clean the text: remove punctuation, lowercase
    cleaned = re.sub(r"[^\w\s]", "", review.lower())
    words = cleaned.split()
    # Convert to word indices, add offset of 3, use 2 for unknown words
    encoded = [word_index.get(word, 2) + 3 for word in words]
    encoded_reviews.append(encoded)

# Pad the sequences
padded_reviews = pad_sequences(encoded_reviews, maxlen=maxlen)

# Make predictions
predictions = model.predict(padded_reviews).flatten()

# Convert predictions to output_labels (y)
output_labels = ["Positive" if sentiment >= 0.5 else "Negative" for sentiment in predictions]

# Output results
for review, score, label in zip(reviews, predictions, output_labels):
    print("\nReview:", review)
    print(f"Sentiment score: {score:.4f}")
    print("Predicted sentiment:", label)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step

Review: This movie was amazing! I liked and enjoyed every moment of it
Sentiment score: 0.5033
Predicted sentiment: Positive

Review: This movie was terrible. The acting was awful and the plot was worst.
Sentiment score: 0.0681
Predicted sentiment: Negative
