In [None]:
import numpy as np
import pandas as pd
from keras.datasets import imdb
from keras.layers import LSTM, Dense, Embedding, Dropout, BatchNormalization
from keras.models import Sequential
from keras.optimizers import Adam
from tensorflow.keras.preprocessing.sequence import pad_sequences
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
from keras.utils import plot_model
from keras.preprocessing.text import Tokenizer
from gensim.models import KeyedVectors

# Set the random seed for reproducibility
np.random.seed(42)

# Load the GloVe embedding
# Load the GloVe embedding
embedding_file = 'glove.6B.300d.txt'
word_vectors = {}
with open(embedding_file, encoding='utf8') as f:
    for line in f:
        values = line.split()
        word = values[0]
        coefs = np.asarray(values[1:], dtype='float32')
        word_vectors[word] = coefs

# Load the IMDB dataset
vocab_size = 50000
maxlen = 250
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=vocab_size)

# Tokenize the text
tokenizer = Tokenizer(num_words=vocab_size)
X_train = [str(text) for text in X_train]
tokenizer.fit_on_texts(X_train)
X_train = tokenizer.texts_to_sequences(X_train)
X_test = [str(text) for text in X_test]
X_test = tokenizer.texts_to_sequences(X_test)

# Pad the sequences to a fixed length
X_train = pad_sequences(X_train, maxlen=maxlen)
X_test = pad_sequences(X_test, maxlen=maxlen)

# Create the embedding matrix
embedding_dim = 300
embedding_matrix = np.zeros((vocab_size, embedding_dim))
for word, i in tokenizer.word_index.items():
    if i >= vocab_size:
        continue
    embedding_vector = word_vectors.get(word)
    if embedding_vector is not None:
        embedding_matrix[i] = embedding_vector

# Define the model
model = Sequential()
model.add(Embedding(vocab_size, embedding_dim, weights=[embedding_matrix], input_length=maxlen, trainable=False))
model.add(Dropout(0.3))
model.add(BatchNormalization())
model.add(LSTM(128, return_sequences=True))
model.add(LSTM(64))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Define early stopping and learning rate reduction callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=3, verbose=1, mode='min')
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=1, verbose=1, mode='min')

# Train the model
history = model.fit(
    X_train, y_train,
    batch_size=128,
    epochs=15,
    validation_data=(X_test, y_test),
    callbacks=[early_stopping, reduce_lr]
)

# Evaluate the model on the test set
score, acc = model.evaluate(X_test, y_test, batch_size=128)
print('Test score =', score)
print('Test accuracy =', acc)

