<a href="https://colab.research.google.com/github/nivetharaja26/Google_Colab/blob/main/LSTM_IMDB_SentimentAnalysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense

# -----------------------------
# 1. Load the IMDB dataset
# -----------------------------
max_features = 10000  # consider only the top 10,000 words
maxlen = 200          # cut reviews after 200 words
batch_size = 32

print("Loading data...")
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
print(len(x_train), "training samples")
print(len(x_test), "testing samples")

# -----------------------------
# 2. Pad sequences
# -----------------------------
print("Pad sequences (samples x time)...")
x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
print("x_train shape:", x_train.shape)
print("x_test shape:", x_test.shape)

# -----------------------------
# 3. Build the LSTM model
# -----------------------------
model = Sequential()
model.add(Embedding(max_features, 128, input_length=maxlen))
model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1, activation="sigmoid"))

# -----------------------------
# 4. Compile the model
# -----------------------------
model.compile(
    loss="binary_crossentropy",
    optimizer="adam",
    metrics=["accuracy"]
)

print(model.summary())

# -----------------------------
# 5. Train the model
# -----------------------------
print("Training...")
history = model.fit(
    x_train, y_train,
    batch_size=batch_size,
    epochs=3,
    validation_data=(x_test, y_test)
)

# -----------------------------
# 6. Evaluate the model
# -----------------------------
score, acc = model.evaluate(x_test, y_test, batch_size=batch_size)
print("Test loss:", score)
print("Test accuracy:", acc)


Loading data...
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step
25000 training samples
25000 testing samples
Pad sequences (samples x time)...
x_train shape: (25000, 200)
x_test shape: (25000, 200)




None
Training...
Epoch 1/3
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m468s[0m 592ms/step - accuracy: 0.7088 - loss: 0.5469 - val_accuracy: 0.8537 - val_loss: 0.3490
Epoch 2/3
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m501s[0m 590ms/step - accuracy: 0.8581 - loss: 0.3472 - val_accuracy: 0.8560 - val_loss: 0.3458
Epoch 3/3
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m458s[0m 586ms/step - accuracy: 0.8762 - loss: 0.3037 - val_accuracy: 0.8626 - val_loss: 0.3402
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 76ms/step - accuracy: 0.8607 - loss: 0.3451
Test loss: 0.34020358324050903
Test accuracy: 0.8626400232315063


In [19]:
import numpy as np

sample = x_test[45]  # take one review
pred = model.predict(np.array([sample]))
print("Prediction:", pred)
print("Label:", y_test[3400])  # actual answer


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 72ms/step
Prediction: [[0.49170792]]
Label: 0
