# STD LSTM (tensorflow)

In [10]:
import numpy as np
import tensorflow as tf

# Define hyperparameters
HIDDEN_SIZE = 50
BATCH_SIZE = 32
LEARNING_RATE = 0.001
SEQ_LENGTH = 20
VOCAB_SIZE = 10  # Digits 0-9
DELIMITER = "="

class LSTMCell(tf.keras.layers.Layer):
    def __init__(self, units):
        super(LSTMCell, self).__init__()
        self.units = units

    def build(self, input_shape):
        input_dim = input_shape[-1]
        self.W_input = self.add_weight(shape=(input_dim, self.units), initializer='random_normal', trainable=True)
        self.U_input = self.add_weight(shape=(self.units, self.units), initializer='random_normal', trainable=True)
        self.b_input = self.add_weight(shape=(self.units,), initializer='zeros', trainable=True)

        self.W_forget = self.add_weight(shape=(input_dim, self.units), initializer='random_normal', trainable=True)
        self.U_forget = self.add_weight(shape=(self.units, self.units), initializer='random_normal', trainable=True)
        self.b_forget = self.add_weight(shape=(self.units,), initializer='zeros', trainable=True)

        self.W_output = self.add_weight(shape=(input_dim, self.units), initializer='random_normal', trainable=True)
        self.U_output = self.add_weight(shape=(self.units, self.units), initializer='random_normal', trainable=True)
        self.b_output = self.add_weight(shape=(self.units,), initializer='zeros', trainable=True)

        self.W_c_compliment = self.add_weight(shape=(input_dim, self.units), initializer='random_normal', trainable=True)
        self.U_c_compliment = self.add_weight(shape=(self.units, self.units), initializer='random_normal', trainable=True)
        self.b_c_compliment = self.add_weight(shape=(self.units,), initializer='zeros', trainable=True)

    def call(self, x, states):
        hidden_prev, cell_prev = states

        input_gate = tf.sigmoid(tf.matmul(x, self.W_input) + tf.matmul(hidden_prev, self.U_input) + self.b_input)
        forget_gate = tf.sigmoid(tf.matmul(x, self.W_forget) + tf.matmul(hidden_prev, self.U_forget) + self.b_forget)
        output_gate = tf.sigmoid(tf.matmul(x, self.W_output) + tf.matmul(hidden_prev, self.U_output) + self.b_output)
        c_compliment = tf.tanh(tf.matmul(x, self.W_c_compliment) + tf.matmul(hidden_prev, self.U_c_compliment) + self.b_c_compliment)

        cell_state = forget_gate * cell_prev + input_gate * c_compliment
        hidden_state = output_gate * tf.tanh(cell_state)

        return hidden_state, [hidden_state, cell_state]

class LSTMModel(tf.keras.Model):
    def __init__(self, vocab_size, hidden_size):
        super(LSTMModel, self).__init__()
        self.embedding = tf.keras.layers.Embedding(vocab_size + 2, hidden_size)  # +2 for delimiter and padding
        self.lstm = tf.keras.layers.LSTM(hidden_size, return_sequences=True, return_state=True)
        self.dense = tf.keras.layers.Dense(vocab_size, activation='softmax')

    def call(self, inputs, states=None):
        x = self.embedding(inputs)
        x, hidden_state, cell_state = self.lstm(x, initial_state=states)
        output = self.dense(x)
        return output, [hidden_state, cell_state]

# Data Generation
def generate_data(num_samples=10000):
    X_data, Y_data = [], []
    for _ in range(num_samples):
        seq_length = np.random.randint(1, SEQ_LENGTH + 1)
        sequence = np.random.randint(0, VOCAB_SIZE, size=(seq_length,)).tolist()
        sequence.append(VOCAB_SIZE)  # Append delimiter
        target = sequence[:-1]  # Expected output (sequence before delimiter)
        X_data.append(sequence)
        Y_data.append(target)
    return np.array(X_data, dtype=np.int32), np.array(Y_data, dtype=np.int32)

# Prepare dataset
X, Y = generate_data()
split_idx = int(0.7 * len(X))
X_train, Y_train = X[:split_idx], Y[:split_idx]
X_val, Y_val = X[split_idx:], Y[split_idx:]

# Model Training
model = LSTMModel(VOCAB_SIZE, HIDDEN_SIZE)
optimizer = tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE)
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy()

model.compile(optimizer=optimizer, loss=loss_fn, metrics=['accuracy'])
history = model.fit(X_train, Y_train, validation_data=(X_val, Y_val), batch_size=BATCH_SIZE, epochs=10, verbose=1)

# Print training statistics
for epoch in range(len(history.history['loss'])):
    print(f"Epoch {epoch+1}/{len(history.history['loss'])} - Loss: {history.history['loss'][epoch]:.4f}, Accuracy: {history.history['accuracy'][epoch]:.4f}, Val Loss: {history.history['val_loss'][epoch]:.4f}, Val Accuracy: {history.history['val_accuracy'][epoch]:.4f}")


Loss after one training step: 2.393489122390747

Test input sequence: 3541=
Predicted output: 83898
