# Data to sequence

## V1

기준이 $t-N$ 에서 시작

In [None]:
def data2sequence(dataframe, 
                  set_points, process_vars,
                  history_size, prediction_size, 
                  step=1, start_idx=0, end_idx=None,
                  stride=1):
    
    history_inputs = set_points + process_vars
    target_outputs = process_vars
        
    history_data = dataframe[history_inputs].values
    target_data = dataframe[target_outputs].values
    sp_data = dataframe[set_points].values
    
    history_sequence = []
    target_sequence = []
    sp_sequence = []
    
    if end_idx is None:
        end_idx = len(dataframe) - history_size - prediction_size
    else:
        end_idx = end_idx - history_size - prediction_size
    assert end_idx >= 0, 'time-series dataset is not long enough'
    
    i=start_idx
    while i <= end_idx:
        history_sequence.append(history_data[range(i, i+history_size, step)])
        if prediction_size == 1:
            target_sequence.append(target_data[i+history_size])
            sp_sequence.append(sp_data[i+history_size])
        else:
            target_sequence.append(target_data[i+history_size : i+history_size+prediction_size : step])
            sp_sequence.append(sp_data[i+history_size : i+history_size+prediction_size : step])
        i+=stride
    
    encoder_input = np.array(history_sequence)
    decoder_input = np.array(sp_sequence)
    decoder_output = np.array(target_sequence)
    
    return encoder_input, decoder_input, decoder_output

## V2

기준이 $t$에서 시작

In [None]:
def data2sequence(dataframe, 
                  set_points, process_vars,
                  history_size, prediction_size, 
                  step=1, start_idx=0, end_idx=None,
                  stride=1):
    
    history_inputs = set_points + process_vars
    target_outputs = process_vars
        
    history_data = dataframe[history_inputs].values
    target_data = dataframe[target_outputs].values
    sp_data = dataframe[set_points].values
    
    history_sequence = []
    target_sequence = []
    sp_sequence = []
    
    start_idx = start_idx + history_size # target step t+1
    
    if end_idx is None:
        end_idx = len(dataframe) - prediction_size
    else:
        end_idx = end_idx - prediction_size
    assert start_idx >= 0, 'start_idx is under 0'
    assert end_idx >=0, 'end_idx is under 0'
    
    i = start_idx
    while i <= end_idx:
        history_sequence.append(history_data[range(i-history_size, i, step)])
        if prediction_size == 1:
            target_sequence.append(target_data[i])
            sp_sequence.append(sp_data[i])
        else:
            target_sequence.append(target_data[i : i+prediction_size : step])
            sp_sequence.append(sp_data[i : i+prediction_size : step])
        i+=stride
    
    encoder_input = np.array(history_sequence)
    decoder_input = np.array(sp_sequence)
    decoder_output = np.array(target_sequence)
    
    return encoder_input, decoder_input, decoder_output

# RNN model

## V1

**input**: process variables and set points from $t-N+1$ to $t$ and set point at $t+1$

**outpu**t: process variables at $t+1$

In [None]:
def Seq2VecLSTM(
    history_size, history_dim, 
    prediction_size, prediction_dim,
    rnn_neurons = [100], dense_neurons = [100]
):
    encoder_input = Input(shape=(history_size, history_dim))
    
    # encoder module
    if len(rnn_neurons) == 1:
        encoder_output, state_h, state_c = LSTM(rnn_neurons[0], return_state=True, name='encoder')(encoder_input)
        encoder_states = [state_h, state_c]
        
    else:
        num_layers = len(rnn_neurons)
        for i, neurons in enumerate(rnn_neurons):
            #first encoder layer
            if i==0: 
                encoder_output = LSTM(neurons, return_sequences=True, name="first_encoder")(encoder_input)
            #mediate encoder layer
            elif i < num_layers-1: 
                encoder_output = LSTM(neurons, return_sequences=True, name=f"encoder_{i+1}")(encoder_output)
            #last encoder layer
            else: 
                encoder_output, state_h, state_c  = LSTM(neurons, return_state=True, name=f"last_encoder")(encoder_output)
                encoder_states = [state_h, state_c]
    
    # context + set_point
    sp_input = Input(shape=(history_dim-prediction_dim))
    combined_input = concatenate([encoder_output, sp_input])
    
    # dense module
    if len(dense_neurons) == 1:
        dense_output = Dense(prediction_dim, name='dense')(combined_input)
        dense_output = Dense(prediction_dim, name='last_dense')(dense_output)
    else:
        num_layers = len(dense_neurons)
        for i, neurons in enumerate(dense_neurons):
            #first dense layer
            if i==0:
                dense_output = Dense(neurons, name='first_dense')(combined_input)
            #mediate encoder layer
            else:
                dense_output = Dense(neurons, name=f'dense_{i+1}')(dense_output)
        dense_output = Dense(prediction_dim, name=f'last_dense')(dense_output)  
    
    # model compile
    model = Model([encoder_input, sp_input], dense_output)
    optimizer = keras.optimizers.Adam(learning_rate = 0.001, beta_1=0.9, beta_2=0.999)
    model.compile(loss='mse', optimizer = optimizer)
    
    return model

## V2

**input**: process variables and set points from $t-N+1$ to $t$

**outpu**t: process variables at $t+1$