In [None]:
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.preprocessing.sequence import pad_sequences

# 1️⃣ Load IMDb dataset
dataset, info = tfds.load("imdb_reviews", split=["train", "test"], as_supervised=True, with_info=True)

# Convert dataset to lists
train_data, test_data = dataset
train_sentences, train_labels = [], []
test_sentences, test_labels = [], []

for text, label in train_data:
    train_sentences.append(str(text.numpy().decode("utf-8")))
    train_labels.append(label.numpy())

for text, label in test_data:
    test_sentences.append(str(text.numpy().decode("utf-8")))
    test_labels.append(label.numpy())

# 2️⃣ Tokenization and Padding
vocab_size = 10000  # Only keep the top 10,000 most frequent words
max_length = 200  # Maximum length of input sequences

tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=vocab_size, oov_token="<OOV>")
tokenizer.fit_on_texts(train_sentences)

X_train = tokenizer.texts_to_sequences(train_sentences)
X_test = tokenizer.texts_to_sequences(test_sentences)

X_train = pad_sequences(X_train, maxlen=max_length, padding="post", truncating="post")
X_test = pad_sequences(X_test, maxlen=max_length, padding="post", truncating="post")

y_train = tf.convert_to_tensor(train_labels)
y_test = tf.convert_to_tensor(test_labels)

# 3️⃣ Define LSTM Model
model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=64, input_length=max_length),  # Word Embedding Layer
    LSTM(64, return_sequences=False),  # LSTM Layer with 64 units
    Dense(1, activation="sigmoid")  # Output layer (binary classification)
])

# 4️⃣ Compile the model
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

# 5️⃣ Train the model
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

# 6️⃣ Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy:.4f}")

# 7️⃣ Make Predictions
sample_text = ["The movie was fantastic! I loved it.", "It was a terrible film, I hated it."]
sample_sequences = tokenizer.texts_to_sequences(sample_text)
sample_padded = pad_sequences(sample_sequences, maxlen=max_length, padding="post", truncating="post")

predictions = model.predict(sample_padded)
predicted_labels = ["Positive" if p > 0.5 else "Negative" for p in predictions]

for text, label in zip(sample_text, predicted_labels):
    print(f"Review: {text} → Sentiment: {label}")
