<a href="https://colab.research.google.com/github/rushen-rai/Sentiment_Analysis_NLP/blob/main/Sentiment_Analysis_NLP.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
import numpy as np

# Load IMDB dataset
(x_train, y_train), (x_test, y_test) = keras.datasets.imdb.load_data()

# Convert back to text
word_index = keras.datasets.imdb.get_word_index()
reverse_word_index = {v: k for k, v in word_index.items()}

def decode_review(encoded):
    return ' '.join([reverse_word_index.get(i - 3, '?') for i in encoded])

x_train_text = [decode_review(x) for x in x_train]
x_test_text = [decode_review(x) for x in x_test]

# ===== BAG-OF-WORDS APPROACH =====
print("Training Bag-of-Words model...")

# Create BoW features
bow_vectorizer = CountVectorizer(max_features=10000, binary=True)
x_train_bow = bow_vectorizer.fit_transform(x_train_text).toarray()
x_test_bow = bow_vectorizer.transform(x_test_text).toarray()

# Build BoW model
bow_model = keras.Sequential([
    layers.Dense(64, activation='relu', input_shape=(10000,)),
    layers.Dropout(0.5),
    layers.Dense(32, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(1, activation='sigmoid')
])

bow_model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

bow_history = bow_model.fit(
    x_train_bow, y_train,
    epochs=5,
    batch_size=512,
    validation_split=0.2,
    verbose=1
)

bow_test_loss, bow_test_acc = bow_model.evaluate(x_test_bow, y_test)
print(f"BoW Test Accuracy: {bow_test_acc:.4f}")

# ===== TF-IDF APPROACH =====
print("\nTraining TF-IDF model...")

# Create TF-IDF features
tfidf_vectorizer = TfidfVectorizer(max_features=10000)
x_train_tfidf = tfidf_vectorizer.fit_transform(x_train_text).toarray()
x_test_tfidf = tfidf_vectorizer.transform(x_test_text).toarray()

# Build TF-IDF model
tfidf_model = keras.Sequential([
    layers.Dense(64, activation='relu', input_shape=(10000,)),
    layers.Dropout(0.5),
    layers.Dense(32, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(1, activation='sigmoid')
])

tfidf_model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

tfidf_history = tfidf_model.fit(
    x_train_tfidf, y_train,
    epochs=5,
    batch_size=512,
    validation_split=0.2,
    verbose=1
)

tfidf_test_loss, tfidf_test_acc = tfidf_model.evaluate(x_test_tfidf, y_test)
print(f"TF-IDF Test Accuracy: {tfidf_test_acc:.4f}")

# ===== PREDICTION EXAMPLE =====
sample_review = "This movie was absolutely fantastic! Great acting."
sample_bow = bow_vectorizer.transform([sample_review]).toarray()
sample_tfidf = tfidf_vectorizer.transform([sample_review]).toarray()

bow_pred = bow_model.predict(sample_bow)[0][0]
tfidf_pred = tfidf_model.predict(sample_tfidf)[0][0]

print(f"\nSample: '{sample_review}'")
print(f"BoW Prediction: {bow_pred:.4f} ({'Positive' if bow_pred > 0.5 else 'Negative'})")
print(f"TF-IDF Prediction: {tfidf_pred:.4f} ({'Positive' if tfidf_pred > 0.5 else 'Negative'})")

Training Bag-of-Words model...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/5
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 109ms/step - accuracy: 0.6183 - loss: 0.6394 - val_accuracy: 0.8662 - val_loss: 0.3597
Epoch 2/5
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 70ms/step - accuracy: 0.8600 - loss: 0.3632 - val_accuracy: 0.8852 - val_loss: 0.2828
Epoch 3/5
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 86ms/step - accuracy: 0.9138 - loss: 0.2415 - val_accuracy: 0.8862 - val_loss: 0.2743
Epoch 4/5
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 100ms/step - accuracy: 0.9378 - loss: 0.1861 - val_accuracy: 0.8850 - val_loss: 0.2892
Epoch 5/5
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 70ms/step - accuracy: 0.9497 - loss: 0.1479 - val_accuracy: 0.8824 - val_loss: 0.3129
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.8763 - loss: 0.3235
BoW Test Accuracy: 0.8758

Training TF-IDF model...
Epoch 1/5
[1m40/40[0m [32m━━━━━━━━