[Source](https://keras.io/examples/timeseries/timeseries_transformer_classification/)

In [16]:
%cd /kaggle/input/ann-time-series/

In [17]:
import random
import tensorflow as tf
import numpy as np
import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
plt.rc('font', size=16)
from sklearn.preprocessing import MinMaxScaler
import warnings
warnings.filterwarnings('ignore')
tf.get_logger().setLevel('ERROR')

tfk = tf.keras
tfkl = tf.keras.layers

In [18]:
# Random seed for reproducibility
seed = 42

random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
tf.random.set_seed(seed)
tf.compat.v1.set_random_seed(seed)

In [19]:
dataset = pd.read_csv('Training.csv')
print(dataset.shape)
dataset.head()

In [20]:
def inspect_dataframe(df, columns):
    figs, axs = plt.subplots(len(columns), 1, sharex=True, figsize=(17,17))
    for i, col in enumerate(columns):
        axs[i].plot(df[col])
        axs[i].set_title(col)
    plt.show()
inspect_dataframe(dataset, dataset.columns)

In [21]:
X_train_raw = dataset.copy()
print(X_train_raw.shape)

# Normalize both features and labels
X_min = X_train_raw.min()
X_max = X_train_raw.max()

X_train_raw = (X_train_raw-X_min)/(X_max-X_min)

inspect_dataframe(X_train_raw, X_train_raw.columns)

In [22]:
window = 800
telescope = 80
stride = 4

In [23]:
def build_sequences(df, target_labels, window, stride, telescope):
    # Sanity check to avoid runtime errors
    assert window % stride == 0
    dataset = []
    labels = []
    #copy because this way temp_df is decoupled from df
    #values converts from a dataframe to a numpy array
    temp_df = df.copy().values
    #target labels => what I want to predict??-- for each step what I predict in the future
    temp_label = df[target_labels].copy().values
    padding_len = len(df)%window

    if(padding_len != 0):
        # Compute padding length at the BEGINNING of the data
        padding_len = window - len(df)%window
        padding = np.zeros((padding_len,temp_df.shape[1]), dtype='float64')
        temp_df = np.concatenate((padding,df))
        padding = np.zeros((padding_len,temp_label.shape[1]), dtype='float64')
        temp_label = np.concatenate((padding,temp_label))
        assert len(temp_df) % window == 0
    #extract from the time series segments of size window and specified stride and then concatenate
    for idx in np.arange(0,len(temp_df)-window-telescope,stride):
        dataset.append(temp_df[idx:idx+window])
        labels.append(temp_label[idx+window:idx+window+telescope])

    dataset = np.array(dataset)
    labels = np.array(labels)
    return dataset, labels

In [24]:
def inspect_multivariate(X, y, columns, telescope, idx=None):
    if(idx==None):
        idx=np.random.randint(0,len(X))

    figs, axs = plt.subplots(len(columns), 1, sharex=True, figsize=(17,17))
    for i, col in enumerate(columns):
        axs[i].plot(np.arange(len(X[0,:,i])), X[idx,:,i])
        axs[i].scatter(np.arange(len(X[0,:,i]), len(X_train[0,:,i])+telescope), y[idx,:,i], color='orange')
        axs[i].set_title(col)
        axs[i].set_ylim(0,1)
    plt.show()

In [25]:
def inspect_multivariate_prediction(X, y, pred, columns, telescope, idx=None):
    if(idx==None):
        idx=np.random.randint(0,len(X))

    figs, axs = plt.subplots(len(columns), 1, sharex=True, figsize=(17,17))
    for i, col in enumerate(columns):
        axs[i].plot(np.arange(len(X[0,:,i])), X[idx,:,i])
        axs[i].plot(np.arange(len(X[0,:,i]), len(X_train[0,:,i])+telescope), y[idx,:,i], color='orange')
        axs[i].plot(np.arange(len(X[0,:,i]), len(X_train[0,:,i])+telescope), pred[idx,:,i], color='green')
        axs[i].set_title(col)
        axs[i].set_ylim(0,1)
    plt.show()

In [26]:
def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
    # Normalization and Attention
    x = tfkl.LayerNormalization(epsilon=1e-6)(inputs)
    x = tfkl.MultiHeadAttention(
        key_dim=head_size, num_heads=num_heads, dropout=dropout
    )(x, x)
    x = tfkl.Dropout(dropout)(x)
    res = x + inputs

    # Feed Forward Part
    x = tfkl.LayerNormalization(epsilon=1e-6)(res)
    x = tfkl.Conv1D(filters=ff_dim, kernel_size=1, activation="relu")(x)
    x = tfkl.Dropout(dropout)(x)
    x = tfkl.Conv1D(filters=inputs.shape[-1], kernel_size=1)(x)
    return x + res

In [27]:
def build_model(
    input_shape,
    head_size,
    num_heads,
    ff_dim,
    num_transformer_blocks,
    dropout=0,
):
    inputs = tfk.Input(shape=input_shape)
    x = inputs
    for _ in range(num_transformer_blocks):
        x = transformer_encoder(x, head_size, num_heads, ff_dim, dropout)

    x = tfkl.Flatten()(x)
    dense = tfkl.Dense(output_shape[-1]*output_shape[-2], activation='relu')(x)
    output_layer = tfkl.Reshape((output_shape[-2],output_shape[-1]))(dense)
    #output_layer = tfkl.Conv1D(output_shape[-1], 1, padding='same')(output_layer)
    return tfk.Model(inputs, output_layer)

In [28]:
target_labels = dataset.columns

In [29]:
X_train, y_train = build_sequences(X_train_raw, target_labels, window, stride, telescope)
X_train.shape, y_train.shape

In [30]:
input_shape = X_train.shape[1:]
output_shape = y_train.shape[1:]

model = build_model(
    input_shape,
    head_size=256,
    num_heads=4,
    ff_dim=4,
    num_transformer_blocks=4,
    dropout=0.25,
)

model.compile(
    loss=tfk.losses.MeanSquaredError(),
    optimizer=tfk.optimizers.Adam(learning_rate=2e-4),
    metrics=['mae'],
)
model.summary()

callbacks = [
                tfk.callbacks.EarlyStopping(monitor='val_loss', mode='min', patience=10, restore_best_weights=True),
                tfk.callbacks.ReduceLROnPlateau(monitor='val_loss', mode='min', patience=5, factor=0.5, min_lr=1e-5)
            ]

model.fit(
    X_train,
    y_train,
    validation_split=0.2,
    epochs=60,
    batch_size=8,
    callbacks=callbacks,
)

In [31]:
%cd /kaggle/working/
# Save the best model
model.save('AR_TS_15')
#del tl_model  # To avoid filling the memory

In [32]:
!zip -r ar15.zip AR_TS_15

<a href="ar15.zip"> Download Zip </a>

In [33]:
reg_telescope = 864

In [34]:
#covert dataset to tensor to mimic input
X = tf.constant(dataset, dtype = tf.float32, shape=[68528,7])

In [35]:
X = X.numpy()
X_min = X.min(axis=0)
X_max = X.max(axis=0)

In [36]:
#the same as iloc
future = X[-window:]
future.shape

In [37]:
#normalize
future = (future-X_min)/(X_max-X_min)
#add axis
future = np.expand_dims(future, axis=0)
print(future.shape)
type(future)

In [38]:
# Autoregressive Forecasting
reg_predictions = np.array([])
X_temp = future
for reg in range(0,reg_telescope + telescope,telescope): #telescope is set to 1 for AR
    pred_temp = model.predict(X_temp)
    #the first prediction
    if(len(reg_predictions)==0):
        reg_predictions = pred_temp
    else:
        reg_predictions = np.concatenate((reg_predictions,pred_temp),axis=1)
    X_temp = np.concatenate((X_temp[:,telescope:,:],pred_temp), axis=1)

reg_predictions = reg_predictions[:,:reg_telescope,:]

In [39]:
print(reg_predictions.shape)
type(reg_predictions)
assert(not None in reg_predictions)

In [40]:
figs, axs = plt.subplots(len(target_labels), 1, sharex=True, figsize=(17,17))
for i, col in enumerate(target_labels):
    axs[i].plot(np.arange(len(future[0,:,i])), future[0,:,i])
    axs[i].plot(np.arange(len(future[0,:,i]), len(future[0,:,i])+reg_telescope), reg_predictions[0,:,i], color='orange')
    axs[i].set_title(col)
    axs[i].set_ylim(0,1)
plt.show()

In [41]:
reg_predictions = reg_predictions * (X_max - X_min) + X_min

In [42]:
future = future * (X_max - X_min) + X_min

figs, axs = plt.subplots(len(target_labels), 1, sharex=True, figsize=(17,17))
for i, col in enumerate(target_labels):
    axs[i].plot(np.arange(len(future[0,:,i])), future[0,:,i])
    axs[i].plot(np.arange(len(future[0,:,i]), len(future[0,:,i])+reg_telescope), reg_predictions[0,:,i], color='orange')
    axs[i].set_title(col)
plt.show()

In [43]:
import time
for _ in range (100000):
    print("ciao")    
    time.sleep(30)