## Sentiment Analysis

# Without Embedding Layer


In [28]:
import tensorflow as tf
from tensorflow.keras import layers, models
import numpy as np

In [29]:
# Load IMDB dataset
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.imdb.load_data(num_words=10000)

In [4]:
# IMDB reviews have different lengths, but Dense expects a fixed number of neurons
# We are ensuring every input is exactly 200 words.

In [30]:
# Pad sequences to ensure fixed-length inputs
from tensorflow.keras.preprocessing.sequence import pad_sequences
x_train = pad_sequences(x_train, maxlen=200)
x_test = pad_sequences(x_test, maxlen=200)


In [42]:
# Build model using only Dense layers
model = models.Sequential([
    layers.Flatten(input_shape=(200,)),  # Convert 2D sequences to 1D
    layers.Dense(128, activation='relu'),
    layers.Dense(64, activation='relu'),
    layers.Dense(1, activation='sigmoid')  # Binary classification
])

In [43]:
# Compile model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [48]:
# Train model
model.fit(x_train, y_train, epochs=5, batch_size=512)

Epoch 1/5
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5044 - loss: 0.6916
Epoch 2/5
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5021 - loss: 0.6913
Epoch 3/5
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5021 - loss: 0.6906
Epoch 4/5
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5073 - loss: 0.6904
Epoch 5/5
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.5005 - loss: 0.6902


<keras.src.callbacks.history.History at 0x78ed46d1cfe0>

In [49]:
test_loss, test_accuracy = model.evaluate(x_test, y_test)
print(f'Test Accuracy: {test_accuracy * 100:.2f}')

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.4930 - loss: 0.7020
Test Accuracy: 50.02


In [53]:
# Make a prediction on the first test sample
predicted_sentiment = model.predict(x_test[0:1])
print("Predicted Sentiment:", "Positive" if predicted_sentiment[0][0] > 0.5 else "Negative")
# x_test[0:1] as Keras models expect input in batches, i.e., a 2D array: (batch_size, sequence_length) -> x_test[0:1] = (1,200)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
Predicted Sentiment: Positive


# With Embedding Layer

The Embedding layer **transforms integer word indices into dense vector representations.**

***Example:***
Original Sentence:
"The movie was amazing, I loved it!"
Tokenized & Indexed Representation:
[10, 482, 21, 345, 7, 99, 4]

***What Embedding Does***
Instead of treating 10, 482, 21, etc., as just numbers, an embedding layer converts each of them into a dense vector of real numbers.

For example, the word index 10 (which represents "The") might be mapped to:
[0.12, -0.25, 0.78, ..., 0.05]  # A 128-dimensional vector
Each word in the sentence gets a similar vector.

***Why Is This Useful?***
Captures Word Meaning → Similar words will have similar vectors.
Avoids Numeric Misinterpretation → Without embeddings, the model might think 99 > 10, which makes no sense for words.
Enables Word Relationships → Words like "king" and "queen" will have similar embeddings.

***Example: Before vs After Embedding***
Before Embedding (Word Indices)

[10, 482, 21, 345, 7, 99, 4]  # Just numbers
After Embedding (Word Vectors, Each of Size 128)

[
  [0.12, -0.25, 0.78, ..., 0.05],   # Word 10
  [0.34, 0.67, -0.12, ..., -0.89],  # Word 482
  [0.08, 0.15, -0.32, ..., 0.40],   # Word 21
  ...
]
Each word now has a dense vector representation that captures its meaning.

In [14]:
import tensorflow as tf
from tensorflow.keras import layers,models
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences


In [15]:
(x_train,y_train),(x_test,y_test) = imdb.load_data(num_words=10000)
len(x_train)

25000

In [16]:
# Pad sequences to ensure fixed-length inputs
x_train = pad_sequences(x_train, maxlen=200)
x_test = pad_sequences(x_test, maxlen=200)

In [17]:
# Build models using only Dense layers with an Embedding layer
model = models.Sequential([
    layers.Embedding(input_dim=10000, output_dim=128, input_length=200), # Converts word indices to vectors
    layers.Flatten(), # Flatten embeddings into a 1D vector
    layers.Dense(128, activation='relu'),
    layers.Dense(64,activation='relu'),
    layers.Dense(1,activation='sigmoid')
])

In [18]:
# Compile model
model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])

In [19]:
# Train model
model.fit(x_train, y_train, epochs=10, batch_size=512)

Epoch 1/10
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 390ms/step - accuracy: 0.5926 - loss: 0.6453
Epoch 2/10
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 365ms/step - accuracy: 0.9278 - loss: 0.1841
Epoch 3/10
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 393ms/step - accuracy: 0.9912 - loss: 0.0380
Epoch 4/10
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 368ms/step - accuracy: 0.9987 - loss: 0.0072
Epoch 5/10
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 393ms/step - accuracy: 0.9998 - loss: 0.0019
Epoch 6/10
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 364ms/step - accuracy: 1.0000 - loss: 3.5102e-04
Epoch 7/10
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 387ms/step - accuracy: 1.0000 - loss: 2.0582e-04
Epoch 8/10
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 365ms/step - accuracy: 1.0000 - loss: 1.2618e-04
Epoch 9/10
[1m49/49

<keras.src.callbacks.history.History at 0x7bdd5280f620>

In [20]:
# Evaluate model
test_loss, test_accuracy = model.evaluate(x_test, y_test)
print(f'Test accuracy:{test_accuracy * 100:.2f}')

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 9ms/step - accuracy: 0.8498 - loss: 0.6821
Test accuracy:84.97


In [21]:
# Make a prediction on the first test sample
predicted_sentiment = model.predict(x_test[0:1])
print("Predicted Sentiment:", "Positive" if predicted_sentiment[0][0] > 0.5 else "Negative")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 87ms/step
Predicted Sentiment: Negative
