<a href="https://colab.research.google.com/github/rohithsing/SAP/blob/main/LSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# 1. Imports
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense

In [None]:
# 2. Load Dataset
VOCAB_SIZE = 10000   # top 10k frequent words
MAX_LEN = 200        # max words per review

(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=VOCAB_SIZE)

# Padding for equal sequence length
X_train = pad_sequences(X_train, maxlen=MAX_LEN)
X_test = pad_sequences(X_test, maxlen=MAX_LEN)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m2s[0m 0us/step


In [None]:
# 3. Build LSTM Model
model = Sequential([
    Embedding(VOCAB_SIZE, 128, input_shape=(MAX_LEN,)),
    LSTM(128),
    Dense(1, activation="sigmoid")
])

model.summary()

In [None]:
# 4. Compile Model
model.compile(
    loss="binary_crossentropy",
    optimizer="adam",
    metrics=["accuracy"]
)

In [None]:
# 5. Train Model
model.fit(
    X_train,
    y_train,
    epochs=5,
    batch_size=64,
    validation_split=0.2
)

In [None]:
#6. Evaluate Model
test_loss, test_accuracy = model.evaluate(X_test, y_test)

print("\nTest Loss:", test_loss)
print("Test Accuracy:", test_accuracy)

In [None]:
# 7. Correct Review Encoding
word_index = imdb.get_word_index()

def encode_review(text):
    """
    Encode text the same way IMDB was trained
    """
    encoded = [1]  # start token

    for word in text.lower().split():
        idx = word_index.get(word)
        if idx is not None and idx < VOCAB_SIZE:
            encoded.append(idx + 3)
        else:
            encoded.append(2)  # unknown word

    return pad_sequences([encoded], maxlen=MAX_LEN)

In [None]:
# 8. Predictions
reviews = [
    "this movie was good and enjoyable",
    "this movie was bad and boring",
    "i loved this film it was amazing",
    "i hated this movie it was terrible"
]

for review in reviews:
    score = model.predict(encode_review(review))[0][0]
    print("\nReview:", review)
    print("Sentiment Score:", score)
    print("Prediction:", "Positive üòä" if score > 0.5 else "Negative üòû")