[Source](https://levelup.gitconnected.com/building-seq2seq-lstm-with-luong-attention-in-keras-for-time-series-forecasting-1ee00958decb)

In [5]:
%cd /kaggle/input/ann-time-series/

In [6]:
import random
import tensorflow as tf
import numpy as np
import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
plt.rc('font', size=16)
from sklearn.preprocessing import MinMaxScaler
import warnings
warnings.filterwarnings('ignore')
tf.get_logger().setLevel('ERROR')

tfk = tf.keras
tfkl = tf.keras.layers

In [7]:
# Random seed for reproducibility
seed = 42

random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
tf.random.set_seed(seed)
tf.compat.v1.set_random_seed(seed)

In [8]:
dataset = pd.read_csv('Training.csv')
print(dataset.shape)
dataset.head()

In [9]:
def inspect_dataframe(df, columns):
    figs, axs = plt.subplots(len(columns), 1, sharex=True, figsize=(17,17))
    for i, col in enumerate(columns):
        axs[i].plot(df[col])
        axs[i].set_title(col)
    plt.show()
inspect_dataframe(dataset, dataset.columns)

In [10]:
X_train_raw = dataset.copy()
print(X_train_raw.shape)

# Normalize both features and labels
X_min = X_train_raw.min()
X_max = X_train_raw.max()

X_train_raw = (X_train_raw-X_min)/(X_max-X_min)

plt.figure(figsize=(17,5))
plt.plot(X_train_raw.Sponginess, label='Train (Sponginess)')
plt.title('Data Normalized')
plt.legend()
plt.show()

In [11]:
window = 400
stride = 10

In [12]:
#the same as iloc
future = dataset[-window:]
#normalize
future = (future-X_min)/(X_max-X_min)
#add axis
future = np.expand_dims(future, axis=0)
future.shape

In [13]:
def build_sequences(df, target_labels, window, stride, telescope):
    # Sanity check to avoid runtime errors
    assert window % stride == 0
    dataset = []
    labels = []
    #copy because this way temp_df is decoupled from df
    #values converts from a dataframe to a numpy array
    temp_df = df.copy().values
    #target labels => what I want to predict??-- for each step what I predict in the future
    temp_label = df[target_labels].copy().values
    padding_len = len(df)%window

    if(padding_len != 0):
        # Compute padding length at the BEGINNING of the data
        padding_len = window - len(df)%window
        padding = np.zeros((padding_len,temp_df.shape[1]), dtype='float64')
        temp_df = np.concatenate((padding,df))
        padding = np.zeros((padding_len,temp_label.shape[1]), dtype='float64')
        temp_label = np.concatenate((padding,temp_label))
        assert len(temp_df) % window == 0
    #extract from the time series segments of size window and specified stride and then concatenate
    for idx in np.arange(0,len(temp_df)-window-telescope,stride):
        dataset.append(temp_df[idx:idx+window])
        labels.append(temp_label[idx+window:idx+window+telescope])

    dataset = np.array(dataset)
    labels = np.array(labels)
    return dataset, labels

In [14]:
def inspect_multivariate(X, y, columns, telescope, idx=None):
    if(idx==None):
        idx=np.random.randint(0,len(X))

    figs, axs = plt.subplots(len(columns), 1, sharex=True, figsize=(17,17))
    for i, col in enumerate(columns):
        axs[i].plot(np.arange(len(X[0,:,i])), X[idx,:,i])
        axs[i].scatter(np.arange(len(X[0,:,i]), len(X_train[0,:,i])+telescope), y[idx,:,i], color='orange')
        axs[i].set_title(col)
        axs[i].set_ylim(0,1)
    plt.show()

In [15]:
def inspect_multivariate_prediction(X, y, pred, columns, telescope, idx=None):
    if(idx==None):
        idx=np.random.randint(0,len(X))

    figs, axs = plt.subplots(len(columns), 1, sharex=True, figsize=(17,17))
    for i, col in enumerate(columns):
        axs[i].plot(np.arange(len(X[0,:,i])), X[idx,:,i])
        axs[i].plot(np.arange(len(X[0,:,i]), len(X_train[0,:,i])+telescope), y[idx,:,i], color='orange')
        axs[i].plot(np.arange(len(X[0,:,i]), len(X_train[0,:,i])+telescope), pred[idx,:,i], color='green')
        axs[i].set_title(col)
        axs[i].set_ylim(0,1)
    plt.show()

In [16]:
target_labels = dataset.columns
telescope = 20

In [17]:
X_train, y_train = build_sequences(X_train_raw, target_labels, window, stride, telescope)
X_train.shape, y_train.shape

In [18]:
n_hidden = 100

In [19]:
input_train = tfkl.Input(shape=(X_train.shape[1], X_train.shape[2]))
output_train = tfkl.Input(shape=(y_train.shape[1], y_train.shape[2]))
print(input_train)
print(output_train)

In [21]:
encoder_stack_h, encoder_last_h, encoder_last_c = tfkl.LSTM(
    n_hidden, activation='elu', dropout=0.2, recurrent_dropout=0.2, 
    return_state=True, return_sequences=True)(input_train)
print(encoder_stack_h)
print(encoder_last_h)
print(encoder_last_c)

In [22]:
encoder_last_h = tfkl.BatchNormalization(momentum=0.6)(encoder_last_h)
encoder_last_c = tfkl.BatchNormalization(momentum=0.6)(encoder_last_c)

In [24]:
decoder_input = tfkl.RepeatVector(output_train.shape[1])(encoder_last_h)
print(decoder_input)

In [25]:
decoder_stack_h = tfkl.LSTM(n_hidden, activation='elu', dropout=0.2, recurrent_dropout=0.2,
 return_state=False, return_sequences=True)(
 decoder_input, initial_state=[encoder_last_h, encoder_last_c])
print(decoder_stack_h)

In [28]:
attention = tfkl.dot([decoder_stack_h, encoder_stack_h], axes=[2, 2])
attention = tfkl.Activation('softmax')(attention)
print(attention)

In [29]:
context = tfkl.dot([attention, encoder_stack_h], axes=[2,1])
context = tfkl.BatchNormalization(momentum=0.6)(context)
print(context)

In [31]:
decoder_combined_context = tfkl.concatenate([context, decoder_stack_h])
print(decoder_combined_context)

In [34]:
out = tfkl.TimeDistributed(tfkl.Dense(output_train.shape[2]))(decoder_combined_context)
print(out)

In [36]:
model = tfk.Model(inputs=input_train, outputs=out)
opt = tfk.optimizers.Adam(lr=0.01, clipnorm=1)
model.compile(loss='mean_squared_error', optimizer=opt, metrics=['mae'])
model.summary()

In [37]:
batch_size = 32
epochs = 200

In [38]:
history = model.fit(
                    x = X_train,
                    y = y_train,
                    batch_size = batch_size,
                    epochs = epochs, 
                    verbose=1,
                    validation_split=0.2,
                    callbacks = [
                        tfk.callbacks.EarlyStopping(monitor='val_loss', mode='min', patience=10, restore_best_weights=True),
                        tfk.callbacks.ReduceLROnPlateau(monitor='val_loss', mode='min', patience=5, factor=0.5, min_lr=1e-5)
                    ]).history

In [41]:
%cd /kaggle/working/
# Save the best model
model.save('AR_TS_4')
#del tl_model  # To avoid filling the memory

In [42]:
!zip -r ar4.zip AR_TS_4/

<a href="ar4.zip"> Download Zip </a>