In [7]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense
from tensorflow.keras.optimizers import Adam

# Generate a dataset
def generate_sequence(length, num_sequences):
    return np.array([np.arange(i, i + length) for i in range(num_sequences)])

# Parameters
sequence_length = 5
num_sequences = 1000

# Create sequences
sequences = generate_sequence(sequence_length, num_sequences)

In [8]:
sequences

array([[   0,    1,    2,    3,    4],
       [   1,    2,    3,    4,    5],
       [   2,    3,    4,    5,    6],
       ...,
       [ 997,  998,  999, 1000, 1001],
       [ 998,  999, 1000, 1001, 1002],
       [ 999, 1000, 1001, 1002, 1003]])

In [9]:
# Prepare the data
X = sequences[:, :-1]
y = sequences[:, 1:]

In [10]:
X

array([[   0,    1,    2,    3],
       [   1,    2,    3,    4],
       [   2,    3,    4,    5],
       ...,
       [ 997,  998,  999, 1000],
       [ 998,  999, 1000, 1001],
       [ 999, 1000, 1001, 1002]])

In [11]:
y

array([[   1,    2,    3,    4],
       [   2,    3,    4,    5],
       [   3,    4,    5,    6],
       ...,
       [ 998,  999, 1000, 1001],
       [ 999, 1000, 1001, 1002],
       [1000, 1001, 1002, 1003]])

In [12]:
# Reshape for LSTM [samples, time steps, features]
X = X.reshape((X.shape[0], X.shape[1], 1))
y = y.reshape((y.shape[0], y.shape[1], 1))


In [13]:
X

array([[[   0],
        [   1],
        [   2],
        [   3]],

       [[   1],
        [   2],
        [   3],
        [   4]],

       [[   2],
        [   3],
        [   4],
        [   5]],

       ...,

       [[ 997],
        [ 998],
        [ 999],
        [1000]],

       [[ 998],
        [ 999],
        [1000],
        [1001]],

       [[ 999],
        [1000],
        [1001],
        [1002]]])

In [14]:
y

array([[[   1],
        [   2],
        [   3],
        [   4]],

       [[   2],
        [   3],
        [   4],
        [   5]],

       [[   3],
        [   4],
        [   5],
        [   6]],

       ...,

       [[ 998],
        [ 999],
        [1000],
        [1001]],

       [[ 999],
        [1000],
        [1001],
        [1002]],

       [[1000],
        [1001],
        [1002],
        [1003]]])

None: This dimension represents the time steps of the input sequences. Setting it to **None** means that the length of the input sequences can vary. This makes the model flexible to handle input sequences of different lengths. In sequence-to-sequence models, the length of the input sequence can vary depending on the specific application or dataset.


1: This dimension represents the number of features at each time step. Here **1** indicates that at each time step, there is only one feature. This is because our data consists of single numerical values (e.g., a sequence of numbers).

# Encoder

In [None]:
# Encoder
encoder_inputs = Input(shape=(None, 1))
encoder_lstm = LSTM(50, return_state=True)
encoder_outputs, state_h, state_c = encoder_lstm(encoder_inputs)
encoder_states = [state_h, state_c]

# Decoder

In [None]:
# Decoder
decoder_inputs = Input(shape=(None, 1))
decoder_lstm = LSTM(50, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_inputs, initial_state=encoder_states)
decoder_dense = Dense(1)
decoder_outputs = decoder_dense(decoder_outputs)

The Dense(1) layer in the decoder part of an LSTM Encoder-Decoder model is used to transform the high-dimensional output from the LSTM to a single value per time step.

This transformation is necessary to match the desired output shape, where each time step in the output sequence corresponds to a single predicted value.

It simplifies the task of the model by focusing on generating the final predicted values in the appropriate format.

In [17]:
# Prepare decoder input data, which is just the shifted output sequences
decoder_input_data = np.zeros_like(y)
decoder_input_data[:, 1:, :] = y[:, :-1, :]
decoder_input_data

array([[[   0],
        [   1],
        [   2],
        [   3]],

       [[   0],
        [   2],
        [   3],
        [   4]],

       [[   0],
        [   3],
        [   4],
        [   5]],

       ...,

       [[   0],
        [ 998],
        [ 999],
        [1000]],

       [[   0],
        [ 999],
        [1000],
        [1001]],

       [[   0],
        [1000],
        [1001],
        [1002]]])

In [None]:
# Define the model
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

**Input Tensors:** It takes both **encoder_inputs** and **decoder_inputs** as the inputs to the model.
This helps the model to learn the mapping from input sequences (encoder inputs) to output sequences (decoder outputs) with the help of decoder inputs during training.


**Output Tensor:** The decoder_outputs tensor is the output of the model, representing the predicted sequence.

In [None]:
# Compile the model
model.compile(optimizer=Adam(), loss='mse')

In [None]:
# Training parameters
epochs = 20
batch_size = 64


# Train the model
history = model.fit([X, decoder_input_data], y, epochs=epochs, batch_size=batch_size, validation_split=0.2)


**X:** This is the input data for the encoder.
It consists of sequences that the encoder will process. Its shape is (samples, time_steps, features):

**samples** is the number of sequences.

**time_steps** is the length of each sequence.

**features** is the number of features per time step (usually 1 in simple sequence tasks).


**Role:** The encoder processes these sequences to compress the information into a context vector (a set of states), which captures the input sequence's important features. This **context vector** is then passed to the decoder as its initial state.

**decoder_input_data** contains the sequences fed into the decoder during training.

During training, the decoder requires input sequences to learn the mapping from encoder outputs to the target sequences. These inputs are usually the ground truth shifted by one time step.

**Teacher Forcing:** This technique, known as teacher forcing, involves using the actual target sequence as the input to the decoder at each time step. This helps the model learn to predict the next time step more effectively.


**WORKING**

Training with Encoder and Decoder:

During training, the model learns to map input sequences to output sequences.

**Encoder:** The encoder takes X as input and processes it through its LSTM layers, resulting in a set of states (hidden state and cell state). These states encapsulate the information from the input sequence.

**Decoder:** The decoder takes two inputs during training:
Initial States: The states generated by the encoder.

**Input Sequences:** decoder_input_data, which are sequences that help the decoder learn the correct mapping to the target sequences.


**What if we use only X ??**

If we only used X, the model would have no way of knowing the actual sequence
it is supposed to generate during training.
The decoder needs **guidance** in the form of **decoder_input_data** to learn the correct sequence.

The encoder compresses the input sequence into states, but the decoder still needs the sequence context provided by decoder_input_data to predict the next elements in the sequence correctly.


# INFERENCE

In [None]:
# Define encoder model
encoder_model = Model(encoder_inputs, encoder_states)

encoder_model is used for Inference

The primary function of the encoder during inference is to convert the input sequence into a set of initial states (hidden state and cell state) for the decoder.

# Decoder State Inputs

In [None]:
# Define decoder model
decoder_state_input_h = Input(shape=(50,))
decoder_state_input_c = Input(shape=(50,))
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]

placeholders for the decoder's initial states (hidden state and cell state) during inference.


The decoder requires the initial states from the encoder to start generating the output sequence. These placeholders will be filled with the actual state values during each step of the inference process.


In [None]:
decoder_outputs, state_h, state_c = decoder_lstm(
    decoder_inputs, initial_state=decoder_states_inputs)

During each step of inference, the decoder takes the current input and the previous states to produce the next output and update the states.

By specifying **initial_state=decoder_states_inputs**, we allow the LSTM to use the provided states as its starting point.

In [None]:
decoder_states = [state_h, state_c]
decoder_outputs = decoder_dense(decoder_outputs)

**Purpose**: This line groups the updated states into a list. These updated states will be fed back into the decoder in the next time step.


**Explanation**: The states need to be updated after each time step to accurately reflect the context of the sequence generated so far. This list of updated states will be used as the initial state for the next decoding step.

In [None]:
decoder_model = Model(
    [decoder_inputs] + decoder_states_inputs,
    [decoder_outputs] + decoder_states)

The decoder model for inference in a sequence-to-sequence LSTM

**[decoder_inputs] + decoder_states_inputs** --> Input to Decoder Model

By concatenating **[decoder_inputs] and decoder_states_inputs**, it is specified all the inputs required for the decoder model to start generating the output sequence.


**[decoder_outputs] + decoder_states** --> Output from Model

By concatenating **[decoder_outputs] and decoder_states**, we specify all the outputs returned by the decoder model after processing the input sequence.

**During training**, the decoder is part of the larger model that includes the encoder and is trained end-to-end.


**During inference**, however, we need a separate model that can generate sequences based on previously unseen inputs and states.


**The decoder_model** allows us to initialize the LSTM states (decoder_states_inputs) and update them (decoder_states) across multiple time steps as the sequence is generated.


This stateful nature ensures that the context of the sequence is preserved and contributes to accurate sequence generation.

In [15]:





# Function to predict the next sequence
def predict_sequence(input_seq):
    # Encode the input as state vectors.
    states_value = encoder_model.predict(input_seq)

    # Generate empty target sequence of length 1.
    target_seq = np.zeros((1, 1, 1))
    # Populate the first value of target sequence with the first value of input sequence.
    target_seq[0, 0, 0] = input_seq[0, 0, 0] + 1

    # Sampling loop for a batch of sequences
    stop_condition = False
    decoded_seq = []

    while not stop_condition:
        output_tokens, h, c = decoder_model.predict(
            [target_seq] + states_value)

        # Sample a token
        sampled_token = output_tokens[0, -1, 0]
        decoded_seq.append(sampled_token)

        # Exit condition: either hit max length
        if len(decoded_seq) == sequence_length - 1:
            stop_condition = True

        # Update the target sequence (of length 1).
        target_seq = np.zeros((1, 1, 1))
        target_seq[0, 0, 0] = sampled_token

        # Update states
        states_value = [h, c]

    return decoded_seq



Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [16]:
# Test the prediction
test_seq = np.array([[0, 1, 2, 3]]).reshape((1, 4, 1))

In [None]:
predicted_seq = predict_sequence(test_seq)
print("Input sequence:", test_seq.flatten())
print("Predicted sequence:", predicted_seq)
