In [None]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from sklearn.metrics import classification_report, confusion_matrix

#load IMDb data
vocab_size = 10000
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=vocab_size)

#parameters
embedding_dim = 128
max_len = 200

#pad sequences
X_train = pad_sequences(X_train, maxlen=max_len)
X_test = pad_sequences(X_test, maxlen=max_len)

#build model
model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_len),
    LSTM(units=64),
    Dense(1, activation='sigmoid')
])

#compile model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

#train model
model.fit(X_train, y_train, batch_size=128, epochs=3, validation_data=(X_test, y_test))

#evaluate
loss, accuracy = model.evaluate(X_test, y_test)
print(f"\nTest Accuracy: {accuracy:.4f}")

#predict and evaluate
y_pred_probs = model.predict(X_test)
y_pred = (y_pred_probs > 0.5).astype(int).flatten()
print(classification_report(y_test, y_pred))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))



#Raw Text Sentiment Prediction


#load word index mapping
word_index = imdb.get_word_index()

# Adjust word index for special tokens
word_index = {k: (v + 3) for k, v in word_index.items()}
word_index["<PAD>"] = 0
word_index["<START>"] = 1
word_index["<UNK>"] = 2
word_index["<UNUSED>"] = 3

#function to encode and pad raw text
def encode_review(text, word_index, max_len):
    tokens = text.lower().split()
    encoded = [1]
    for word in tokens:
        idx = word_index.get(word, 2)
        encoded.append(idx)
    padded = pad_sequences([encoded], maxlen=max_len)
    return padded

#function to predict sentiment from raw text
def predict_sentiment(text, model, word_index, max_len):
    encoded_review = encode_review(text, word_index, max_len)
    prediction = model.predict(encoded_review)[0][0]
    sentiment = "Positive" if prediction > 0.5 else "Negative"
    confidence = prediction if prediction > 0.5 else 1 - prediction
    print(f"Review: {text}")
    print(f"Predicted Sentiment: {sentiment} (Confidence: {confidence:.2f})")
    return sentiment, confidence




sample_reviews = [
    "The movie was absolutely wonderful, touching, and well-acted",
    "Terrible plot, horrible acting. I regret watching it.",
    "It was okay, not great but not bad either.",
]

for review in sample_reviews:
    predict_sentiment(review, model, word_index, max_len)



Epoch 1/3




[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 20ms/step - accuracy: 0.7048 - loss: 0.5330 - val_accuracy: 0.8624 - val_loss: 0.3220
Epoch 2/3
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 18ms/step - accuracy: 0.9038 - loss: 0.2498 - val_accuracy: 0.8628 - val_loss: 0.3245
Epoch 3/3
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 26ms/step - accuracy: 0.9296 - loss: 0.1857 - val_accuracy: 0.8566 - val_loss: 0.3598
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 5ms/step - accuracy: 0.8562 - loss: 0.3570

Test Accuracy: 0.8566
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step
              precision    recall  f1-score   support

           0       0.81      0.93      0.87     12500
           1       0.91      0.79      0.85     12500

    accuracy                           0.86     25000
   macro avg       0.86      0.86      0.86     25000
weighted avg       0.86      0.86      0.86  