In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras

# Load the IMDB movie review dataset
(x_train, y_train), (x_test, y_test) = keras.datasets.imdb.load_data(num_words=10000)

##This code defines a function called preprocess that takes two arguments x and y.
##
##The first line of the function uses Keras preprocessing function pad_sequences() to pad the sequences in x to a fixed length of 500.
##This is done to ensure that all sequences have the same length, which is necessary when training a neural network. The padded sequences are then assigned back to the variable x.
##The second line of the function converts the y array to a NumPy array of 32-bit floating-point numbers using the astype() method. This is often done to
##ensure that the data type of the array is compatible with other NumPy functions or machine learning libraries. The converted y array is then assigned back to the variable y.
##Finally, the function returns the processed x and y arrays as a tuple (x, y).
##Overall, this function preprocesses the input sequences x and their corresponding labels y in a way that is suitable for training a neural network model.
# Preprocess the data
# Define the maximum sequence length
max_length = 256

# Load the IMDB movie review dataset
(x_train, y_train), (x_test, y_test) = keras.datasets.imdb.load_data(num_words=10000)

# Preprocess the data
def preprocess(x, y):
    # Pad the sequences to the maximum length
    x = keras.preprocessing.sequence.pad_sequences(x, maxlen=max_length)
    y = np.array(y).astype(np.float32)
    return x, y

x_train, y_train = preprocess(x_train, y_train)
x_test, y_test = preprocess(x_test, y_test)

# Build the model
model = keras.Sequential([
    keras.layers.Embedding(input_dim=10000, output_dim=32, input_length=max_length),
    keras.layers.LSTM(32),
    keras.layers.Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(x_train, y_train, batch_size=6, epochs=5, validation_split=0.1)

# Evaluate the model
test_loss, test_acc = model.evaluate(x_test, y_test)
print("Test Accuracy:", test_acc)


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
