### LSTM for sequence classification in the IMDB dataset

In [1]:
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing import sequence

In [2]:
# fix random seed for reproducibility
tf.random.set_seed(7)

In [3]:
# load the dataset but only keep the top n words, zero the rest
top_words = 5000
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=top_words)

In [4]:
# truncate and pad input sequences
max_review_length = 500
X_train = sequence.pad_sequences(X_train, maxlen=max_review_length)
X_test = sequence.pad_sequences(X_test, maxlen=max_review_length)

In [5]:
# create the model
embedding_vecor_length = 32
model = Sequential()
model.add(Embedding(top_words, embedding_vecor_length, input_length=max_review_length))
model.add(LSTM(100))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())
model.fit(X_train, y_train, epochs=3, batch_size=64)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 500, 32)           160000    
                                                                 
 lstm (LSTM)                 (None, 100)               53200     
                                                                 
 dense (Dense)               (None, 1)                 101       
                                                                 
Total params: 213301 (833.21 KB)
Trainable params: 213301 (833.21 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None
Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.src.callbacks.History at 0x20b0373d3c0>

In [6]:
# Final evaluation of the model
scores = model.evaluate(X_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))

Accuracy: 87.58%


In [7]:
# Save the model
model.save('lstm_imdb_model.h5')

  saving_api.save_model(


In [8]:
# Load the saved model
imdb_model = tf.keras.models.load_model('lstm_imdb_model.h5')

# Function to predict sentiment for a given review
def predict_sentiment(review):
    # Process input text similarly to training data
    word_index = imdb.get_word_index()
    review = review.lower().split()
    review = [word_index[word] if word in word_index and word_index[word] < top_words else 0 for word in review]
    review = sequence.pad_sequences([review], maxlen=max_review_length)
    prediction = imdb_model.predict(review)
    if prediction > 0.5:
        return "Positive"
    else:
        return "Negative"


In [9]:
# Test the model prediction
example_review = "This movie was fantastic! I loved every bit of it."
prediction_result = predict_sentiment(example_review)
print(f"Review: {example_review} \nThe sentiment is predicted as: {prediction_result}")

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json
Review: This movie was fantastic! I loved every bit of it. 
The sentiment is predicted as: Positive
