In [1]:
from tqdm.auto import tqdm
import numba as numba
import numpy as np

import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Dense, LSTM
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.optimizers import Adam

2023-06-28 20:01:19.234202: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
class CustomLSTMCell(layers.Layer):

    def __init__(self, units, **kwargs):
        self.units = units
        self.state_size = [units, units, units]  # Added third state
        super(CustomLSTMCell, self).__init__(**kwargs)

    def build(self, input_shape):
        self.kernel = self.add_weight(shape=(input_shape[-1], self.units * 5),  # Changed from 4 to 5
                                      initializer='glorot_uniform',
                                      name='kernel')
        self.recurrent_kernel = self.add_weight(
            shape=(self.units, self.units * 5),  # Changed from 4 to 5
            initializer='orthogonal',
            name='recurrent_kernel')
        self.bias = self.add_weight(shape=(self.units * 5,),  # Changed from 4 to 5
                                    initializer='zeros',
                                    name='bias')

    def call(self, inputs, states):
        h_tm1 = states[0]  # previous memory state
        c_tm1 = states[1]  # previous carry state
        l_tm1 = states[2]  # new long-term state

        z = tf.matmul(inputs, self.kernel) + tf.matmul(h_tm1, self.recurrent_kernel) + self.bias

        z0, z1, z2, z3, z4 = tf.split(z, 5, axis=-1)  # Changed from 4 to 5

        f = tf.nn.sigmoid(z0)
        i = tf.nn.sigmoid(z1 + 1.)
        c = f * c_tm1 + i * tf.nn.tanh(z2)
        o = tf.nn.sigmoid(z3)

        l = l_tm1 + tf.nn.sigmoid(z4)  # Update the long-term state based on the new gate

        h = o * tf.nn.tanh(c + l)  # Output depends on the long-term state as well as the cell state

        return h, [h, c, l]  # Return the long-term state along with the other states


In [3]:
class CustomLSTMLayer(layers.Layer):

    def __init__(self, units, **kwargs):
        self.units = units
        self.state_size = [units, units]
        self.return_sequences = kwargs.pop('return_sequences', False)
        self.custom_lstm_cell = CustomLSTMCell(units)
        self.rnn_layer = layers.RNN(self.custom_lstm_cell, 
                                    return_sequences=self.return_sequences,
                                    **kwargs)
        super(CustomLSTMLayer, self).__init__(**kwargs)

    def call(self, inputs):
        return self.rnn_layer(inputs)

In [4]:
# Load the IMDB dataset
num_words = 10000
maxlen = 500
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=num_words)
x_train = pad_sequences(x_train, maxlen=maxlen)
x_test = pad_sequences(x_test, maxlen=maxlen)

In [5]:
def build_and_train_model(recurrent_layer, x_train, y_train, x_test, y_test):
    # Build the model
    model = Sequential([
        Embedding(num_words, 32),
        recurrent_layer,
        Dense(1, activation='sigmoid')
    ])

    # Compile the model
    model.compile(loss=BinaryCrossentropy(), optimizer=Adam(), metrics=['accuracy'])

    # Train the model
    history = model.fit(x_train, y_train, batch_size=32, epochs=3, validation_data=(x_test, y_test))

    # Return the history object for later comparison
    return history

In [None]:
# Train a model with a standard LSTM layer
lstm_history = build_and_train_model(LSTM(100), x_train, y_train, x_test, y_test)

# Train a model with a custom LSTM layer
custom_lstm_history = build_and_train_model(CustomLSTMLayer(100), x_train, y_train, x_test, y_test)

2023-06-28 20:01:23.854394: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-06-28 20:01:23.896503: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1956] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...
2023-06-28 20:01:24.089121: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gra

Epoch 1/3


2023-06-28 20:01:24.357030: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2023-06-28 20:01:24.358266: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2023-06-28 20:01:24.359173: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus



2023-06-28 20:04:06.908548: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2023-06-28 20:04:06.909671: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2023-06-28 20:04:06.910533: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus

In [None]:
import matplotlib.pyplot as plt

# Plot the validation accuracy of both models
plt.figure(figsize=(12, 6))
plt.plot(lstm_history.history['val_accuracy'], label='LSTM')
plt.plot(custom_lstm_history.history['val_accuracy'], label='Custom LSTM')
plt.title('Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()