# **Practical No.: 2**

# Building a natural language processing (NLP) model for sentiment analysis or text classification.

In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Flatten, Dense
import numpy as np

In [None]:
texts = [
    "I love this product",
    "This is the worst movie",
    "I am so happy",
    "I hate this book",
    "What a great day",
    "I am very disappointed"
]

labels = [1, 0, 1, 0, 1, 0]  # 1 = Positive, 0 = Negative

In [None]:
# Initialize tokenizer
tokenizer = Tokenizer(oov_token="<OOV>")
tokenizer.fit_on_texts(texts)

In [None]:
# Convert to sequences
sequences = tokenizer.texts_to_sequences(texts)
padded = pad_sequences(sequences, padding='post', maxlen=5)

In [None]:
# Vocabulary size
vocab_size = len(tokenizer.word_index) + 1

In [None]:
model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=8, input_length=5),
    Flatten(),
    Dense(6, activation='relu'),
    Dense(1, activation='sigmoid')
])

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
#model.summary()

In [None]:
# Convert labels to NumPy array
labels = np.array(labels)

In [None]:
# Train
model.fit(padded, labels, epochs=50, verbose=1)  # Set verbose=1 if you want output

Epoch 1/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - accuracy: 0.3333 - loss: 0.6996
Epoch 2/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 185ms/step - accuracy: 0.3333 - loss: 0.6972
Epoch 3/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 155ms/step - accuracy: 0.3333 - loss: 0.6950
Epoch 4/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step - accuracy: 0.3333 - loss: 0.6929
Epoch 5/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step - accuracy: 0.6667 - loss: 0.6914
Epoch 6/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step - accuracy: 0.6667 - loss: 0.6899
Epoch 7/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 140ms/step - accuracy: 0.8333 - loss: 0.6885
Epoch 8/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step - accuracy: 0.8333 - loss: 0.6871
Epoch 9/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

<keras.src.callbacks.history.History at 0x7b8fc8b95b50>

In [None]:
# Test new sentences
test_texts = ["so happy", "I hate this taste", "What a worst day", "Great movie"]

In [None]:
test_texts = [
    "I love this product",
    "worst movie",
    "I am so happy",
    "I hate this book",
    "great day",
    "I am very disappointed"
]

In [None]:
# Tokenize and pad
test_seq = tokenizer.texts_to_sequences(test_texts)
test_pad = pad_sequences(test_seq, maxlen=5, padding='post')

In [None]:
predictions = model.predict(test_pad)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 160ms/step


In [None]:
for i, text in enumerate(test_texts):
    sentiment = "Positive" if predictions[i] > 0.51 else "Negative"
    print(f"{text} → {sentiment} ({predictions[i][0]:.2f})")

I love this product → Positive (0.53)
worst movie → Negative (0.51)
I am so happy → Positive (0.56)
I hate this book → Negative (0.50)
great day → Positive (0.51)
I am very disappointed → Negative (0.45)


In [None]:
for i, text in enumerate(test_texts):
    if predictions[i] > 0.51:
      sentiment = "Positive"
    else:
      sentiment = "Negative"
    print(f"{text} → {sentiment} ({predictions[i][0]:.2f})")

I love this product → Positive (0.53)
worst movie → Negative (0.51)
I am so happy → Positive (0.56)
I hate this book → Negative (0.50)
great day → Positive (0.51)
I am very disappointed → Negative (0.45)
