# Imports and Colab Mount

In [1]:
import datetime
import seaborn as sn
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import scipy.stats as stats
import glob
from math import sqrt
from tqdm import tqdm

from sklearn.preprocessing import StandardScaler, MinMaxScaler

import keras
from tensorflow.keras.optimizers import Adam
from keras.layers import Dense, LSTM, LeakyReLU, Dropout, GRU, SimpleRNN, Input, LSTM, Dense, Bidirectional, Concatenate, Reshape, Lambda, Bidirectional
from keras.models import Model, Sequential
from keras import backend as K
from tensorflow.keras import layers
from keras.callbacks import Callback, ReduceLROnPlateau, EarlyStopping
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error
from sklearn.model_selection import train_test_split
import seaborn as sns

from numpy.random import seed
#from tensorflow import set_random_seed

%matplotlib inline

In [2]:
from google.colab import drive

drive.mount('/content/drive')

Mounted at /content/drive


# Load Data

In [3]:
train_h = pd.read_csv("/content/drive/MyDrive/Datasets/solar_train.csv", index_col=0, parse_dates=True)
valid_h = pd.read_csv("/content/drive/MyDrive/Datasets/solar_valid.csv", index_col=0, parse_dates=True)
test_h = pd.read_csv("/content/drive/MyDrive/Datasets/solar_test.csv", index_col=0, parse_dates=True)
hourly = pd.read_csv("/content/drive/MyDrive/Datasets/solar_all.csv", index_col=0, parse_dates=True)

In [4]:
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
	n_vars = 1 if type(data) is list else data.shape[1]
	df = pd.DataFrame(data)
	cols, names = list(), list()
	# input sequence (t-n, ... t-1)
	for i in range(n_in, 0, -1):
		cols.append(df.shift(i))
		names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
	# forecast sequence (t, t+1, ... t+n)
	for i in range(0, n_out):
		cols.append(df.shift(-i))
		if i == 0:
			names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
		else:
			names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
	# put it all together
	agg = pd.concat(cols, axis=1)
	agg.columns = names
	# drop rows with NaN values
	if dropnan:
		agg.dropna(inplace=True)
	return agg

In [5]:
def make_data(data, timestep, resid_check=False):
  values = data.values
  values = values.astype('float32')
  #scaler = MinMaxScaler(feature_range=(0, 1))
  #scaled = scaler.fit_transform(values)
  
  timestep = timestep
  n_features = 39
  n_obs = timestep * n_features
  reframed = series_to_supervised(values, timestep, 1)
  reframed = reframed.iloc[: , :-38]

  values = reframed.values
  indice1 = train_h.shape[0]
  indice2 = valid_h.shape[0]

  train = values[:indice1, :]
  valid = values[indice1:indice1+indice2, :]
  test = values[indice1+indice2:, :]

  train_X, train_y = train[:, :-1], train[:, -1]
  valid_X, valid_y = valid[:, :-1], valid[:, -1]
  test_X, test_y = test[:, :-1], test[:, -1]

  if (resid_check==True):
    train_y = resid_train.values[:2184]
    valid_y = resid_train.values[2184:]

  scaler = MinMaxScaler(feature_range=(0, 1)).fit(train_X)
  train_X = scaler.transform(train_X)
  valid_X = scaler.transform(valid_X)
  test_X = scaler.transform(test_X)

  scaler_y = MinMaxScaler(feature_range=(0, 1)).fit(train_y.reshape(-1,1))
  train_y = scaler_y.transform(train_y.reshape(-1,1))
  valid_y = scaler_y.transform(valid_y.reshape(-1,1))
  test_y = scaler_y.transform(test_y.reshape(-1,1))

  train_X = train_X.reshape((train_X.shape[0], timestep, n_features))
  valid_X = valid_X.reshape((valid_X.shape[0], timestep, n_features))
  test_X = test_X.reshape((test_X.shape[0], timestep, n_features))
  return train_X, train_y, valid_X, valid_y, test_X, test_y

In [6]:
def make_data2(data, timestep, resid_check=False):
  values = data.values
  values = values.astype('float32')
  #scaler = MinMaxScaler(feature_range=(0, 1))
  #scaled = scaler.fit_transform(values)
  
  timestep = timestep
  n_features = 39
  n_obs = timestep * n_features
  reframed = series_to_supervised(values, timestep, 1)
  reframed = reframed.iloc[: , :-38]

  values = reframed.values
  indice1 = train_h.shape[0]
  indice2 = valid_h.shape[0]

  train = values[:indice1, :]
  valid = values[indice1:indice1+indice2, :]
  test = values[indice1+indice2:, :]

  train_X, train_y = train[:, :-1], train[:, -1]
  valid_X, valid_y = valid[:, :-1], valid[:, -1]
  test_X, test_y = test[:, :-1], test[:, -1]

  if (resid_check==True):
    train_y = resid_train.values[:2184]
    valid_y = resid_train.values[2184:]

  scaler = MinMaxScaler(feature_range=(0, 1)).fit(train_X)
  train_X = scaler.transform(train_X)
  valid_X = scaler.transform(valid_X)
  test_X = scaler.transform(test_X)

  scaler_y = MinMaxScaler(feature_range=(0, 1)).fit(train_y.reshape(-1,1))
  train_y = scaler_y.transform(train_y.reshape(-1,1))
  valid_y = scaler_y.transform(valid_y.reshape(-1,1))
  test_y = scaler_y.transform(test_y.reshape(-1,1))

  train_X = train_X.reshape((train_X.shape[0], timestep, n_features))
  valid_X = valid_X.reshape((valid_X.shape[0], timestep, n_features))
  test_X = test_X.reshape((test_X.shape[0], timestep, n_features))
  return train_X, train_y, valid_X, valid_y, test_X, test_y, scaler, scaler_y

# WanDB

In [None]:
!pip install wandb -qq
import wandb
from wandb.keras import WandbCallback
!wandb login

[K     |████████████████████████████████| 1.7 MB 5.4 MB/s 
[K     |████████████████████████████████| 144 kB 60.6 MB/s 
[K     |████████████████████████████████| 181 kB 29.7 MB/s 
[K     |████████████████████████████████| 63 kB 1.2 MB/s 
[?25h  Building wheel for pathtools (setup.py) ... [?25l[?25hdone
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit: 
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


# Test

# RNN

In [None]:
rmse_list = []
mae_list = []
mape_list = []

timestep=6
layers=2
num_units=256
dropout=0
lr=0.01
batch_size=128

for z in tqdm(range(10)):
  train_X, train_y, valid_X, valid_y, test_X, test_y, scaler, scaler_y = make_data2(hourly, 6, resid_check=False)
  model = Sequential()

  if layers > 1:
    model.add(SimpleRNN(units = num_units, input_shape=(train_X.shape[1], train_X.shape[2]), dropout=dropout, return_sequences=True))

    for i in range(layers-2):
      model.add(SimpleRNN(units = num_units, dropout=dropout, return_sequences=True))

    model.add(SimpleRNN(units = num_units, dropout=dropout))

  else:
    model.add(SimpleRNN(units = num_units, input_shape=(train_X.shape[1], train_X.shape[2]), dropout=dropout))

  model.add(Dense(units = 1))

  model.compile(
    loss="mse",
    optimizer=Adam(learning_rate=lr)
  )

  model.fit(train_X, train_y, batch_size=batch_size,
          epochs=200, verbose=0, shuffle=False,
          validation_data=(valid_X, valid_y),
          callbacks=[EarlyStopping(patience=10, restore_best_weights=True)])
  
  # make a prediction
  yhat = model.predict(test_X)
  yhat_inv = scaler_y.inverse_transform(yhat)
  #resid_sum = (yhat_inv+resid_test.values[3:])

  rmse_list.append(sqrt(mean_squared_error(yhat_inv, scaler_y.inverse_transform(test_y))))
  mae_list.append(mean_absolute_error(yhat_inv, scaler_y.inverse_transform(test_y)))
  mape_list.append(mean_absolute_percentage_error(yhat_inv, scaler_y.inverse_transform(test_y)))
  print()
print(f"RNN LIST OF RMSE: {rmse_list}")
print(f'RNN RMSE:  {sum(rmse_list)/len(rmse_list)}')

print(f"RNN LIST OF MAE: {mae_list}")
print(f'RNN MAE:  {sum(mae_list)/len(mae_list)}')

print(f"RNN LIST OF MAPE: {mape_list}")
print(f'RNN MAPE:  {sum(mape_list)/len(mape_list)}')

 10%|█         | 1/10 [00:49<07:29, 49.95s/it]




 20%|██        | 2/10 [01:33<06:07, 46.00s/it]




 30%|███       | 3/10 [02:16<05:12, 44.68s/it]




 40%|████      | 4/10 [03:14<04:59, 49.98s/it]




 50%|█████     | 5/10 [03:58<03:58, 47.73s/it]




 60%|██████    | 6/10 [04:37<03:00, 45.05s/it]




 70%|███████   | 7/10 [05:20<02:12, 44.14s/it]




 80%|████████  | 8/10 [06:02<01:27, 43.68s/it]




 90%|█████████ | 9/10 [06:31<00:39, 39.00s/it]




100%|██████████| 10/10 [07:04<00:00, 42.46s/it]


RNN LIST OF RMSE: [1068.5556840895097, 1159.240268451713, 1379.9390385085858, 1176.7722273235377, 1161.6255851176832, 1528.136610385341, 1112.091891437034, 1141.5379976154977, 1790.3627425748114, 1128.1197853065073]
RNN RMSE:  1264.6381830810221
RNN LIST OF MAE: [865.2617, 861.32086, 1092.1304, 928.2128, 881.0841, 1299.1615, 821.06195, 875.2505, 1478.8761, 841.22095]
RNN MAE:  994.3580871582031
RNN LIST OF MAPE: [0.3519668, 0.2943436, 0.34910986, 0.32278258, 0.30088556, 0.45316443, 0.2896887, 0.31686652, 0.38811088, 0.29890344]
RNN MAPE:  0.33658223450183866





# LSTM

In [10]:
rmse_list = []
mae_list = []
mape_list = []

timestep=6
layers=2
num_units=64
dropout=0.1
lr=0.01
batch_size=128

for z in tqdm(range(10)):
  train_X, train_y, valid_X, valid_y, test_X, test_y, scaler, scaler_y = make_data2(hourly, timestep, resid_check=False)
  model = Sequential()

  if layers > 1:
    model.add(LSTM(units = num_units, input_shape=(train_X.shape[1], train_X.shape[2]), dropout=dropout, return_sequences=True))

    for i in range(layers-2):
      model.add(LSTM(units = num_units, dropout=dropout, return_sequences=True))

    model.add(LSTM(units = num_units, dropout=dropout))

  else:
    model.add(LSTM(units = num_units, input_shape=(train_X.shape[1], train_X.shape[2]), dropout=dropout))

  model.add(Dense(units = 1))

  model.compile(
    loss="mse",
    optimizer=Adam(learning_rate=lr)
  )

  model.fit(train_X, train_y, batch_size=batch_size,
          epochs=200, verbose=0, shuffle=False,
          validation_data=(valid_X, valid_y),
          callbacks=[EarlyStopping(patience=10, restore_best_weights=True)])
  
  # make a prediction
  yhat = model.predict(test_X)
  yhat_inv = scaler_y.inverse_transform(yhat)
  #resid_sum = (yhat_inv+resid_test.values[3:])

  rmse_list.append(sqrt(mean_squared_error(yhat_inv, scaler_y.inverse_transform(test_y))))
  mae_list.append(mean_absolute_error(yhat_inv, scaler_y.inverse_transform(test_y)))
  mape_list.append(mean_absolute_percentage_error(yhat_inv, scaler_y.inverse_transform(test_y)))
  print()
print(f"LSTM LIST OF RMSE: {rmse_list}")
print(f'LSTM RMSE:  {sum(rmse_list)/len(rmse_list)}')

print(f"LSTM LIST OF MAE: {mae_list}")
print(f'LSTM MAE:  {sum(mae_list)/len(mae_list)}')

print(f"LSTM LIST OF MAPE: {mape_list}")
print(f'LSTM MAPE:  {sum(mape_list)/len(mape_list)}')

 10%|█         | 1/10 [00:15<02:20, 15.64s/it]




 20%|██        | 2/10 [00:41<02:51, 21.42s/it]




 30%|███       | 3/10 [00:56<02:12, 18.89s/it]




 40%|████      | 4/10 [01:11<01:42, 17.08s/it]




 50%|█████     | 5/10 [01:27<01:22, 16.60s/it]




 60%|██████    | 6/10 [01:52<01:19, 19.77s/it]




 70%|███████   | 7/10 [02:08<00:54, 18.27s/it]




 80%|████████  | 8/10 [02:22<00:33, 17.00s/it]




 90%|█████████ | 9/10 [02:36<00:15, 15.96s/it]




100%|██████████| 10/10 [02:51<00:00, 17.19s/it]


LSTM LIST OF RMSE: [963.6882600198054, 929.7355470777699, 928.8288997441887, 1008.1366164860792, 973.3494362252438, 891.9060698862858, 886.3810904458646, 1015.9572456555443, 1006.7303511864535, 963.3463486721689]
LSTM RMSE:  956.8059865399404
LSTM LIST OF MAE: [714.864, 660.08813, 712.111, 757.5335, 713.8564, 642.4371, 636.3678, 730.4109, 771.0856, 714.8614]
LSTM MAE:  705.3615783691406
LSTM LIST OF MAPE: [0.27761814, 2.8527899, 0.32956073, 0.2855545, 0.2899234, 0.32398516, 0.30619875, 0.38147333, 0.28862876, 0.28174567]
LSTM MAPE:  0.5617478311061859





In [11]:
rmse_list = []
mae_list = []
mape_list = []

timestep=6
layers=2
num_units=64
dropout=0.1
lr=0.01
batch_size=128

for z in tqdm(range(10)):
  train_X, train_y, valid_X, valid_y, test_X, test_y, scaler, scaler_y = make_data2(hourly, timestep, resid_check=False)
  model = Sequential()

  if layers > 1:
    model.add(LSTM(units = num_units, input_shape=(train_X.shape[1], train_X.shape[2]), dropout=dropout, return_sequences=True))

    for i in range(layers-2):
      model.add(LSTM(units = num_units, dropout=dropout, return_sequences=True))

    model.add(LSTM(units = num_units, dropout=dropout))

  else:
    model.add(LSTM(units = num_units, input_shape=(train_X.shape[1], train_X.shape[2]), dropout=dropout))

  model.add(Dense(units = 1))

  model.compile(
    loss="mse",
    optimizer=Adam(learning_rate=lr)
  )

  model.fit(train_X, train_y, batch_size=batch_size,
          epochs=200, verbose=0, shuffle=False,
          validation_data=(valid_X, valid_y),
          callbacks=[EarlyStopping(patience=20, restore_best_weights=True), ReduceLROnPlateau(monitor='val_loss',patience=5, min_lr=1e-6)])
  
  # make a prediction
  yhat = model.predict(test_X)
  yhat_inv = scaler_y.inverse_transform(yhat)
  #resid_sum = (yhat_inv+resid_test.values[3:])

  rmse_list.append(sqrt(mean_squared_error(yhat_inv, scaler_y.inverse_transform(test_y))))
  mae_list.append(mean_absolute_error(yhat_inv, scaler_y.inverse_transform(test_y)))
  mape_list.append(mean_absolute_percentage_error(yhat_inv, scaler_y.inverse_transform(test_y)))
  print()
print(f"LSTM LIST OF RMSE: {rmse_list}")
print(f'LSTM RMSE:  {sum(rmse_list)/len(rmse_list)}')

print(f"LSTM LIST OF MAE: {mae_list}")
print(f'LSTM MAE:  {sum(mae_list)/len(mae_list)}')

print(f"LSTM LIST OF MAPE: {mape_list}")
print(f'LSTM MAPE:  {sum(mape_list)/len(mape_list)}')

 10%|█         | 1/10 [00:17<02:40, 17.81s/it]




 20%|██        | 2/10 [00:43<03:01, 22.74s/it]




 30%|███       | 3/10 [01:09<02:49, 24.19s/it]




 40%|████      | 4/10 [01:35<02:28, 24.69s/it]




 50%|█████     | 5/10 [02:01<02:06, 25.33s/it]




 60%|██████    | 6/10 [02:27<01:41, 25.36s/it]




 70%|███████   | 7/10 [02:53<01:16, 25.56s/it]




 80%|████████  | 8/10 [03:19<00:51, 25.69s/it]




 90%|█████████ | 9/10 [03:36<00:23, 23.11s/it]




100%|██████████| 10/10 [04:02<00:00, 24.28s/it]


LSTM LIST OF RMSE: [976.9073011294367, 968.8100949102461, 1059.009855950359, 985.4152360807093, 986.2787068065497, 982.8246600996538, 957.82458597595, 1008.2015857456286, 922.5306905463905, 1012.7413601211318]
LSTM RMSE:  986.0544077366055
LSTM LIST OF MAE: [723.0525, 693.693, 786.77966, 707.9397, 707.80066, 715.1701, 688.6474, 748.45184, 656.5426, 771.4806]
LSTM MAE:  719.9558044433594
LSTM LIST OF MAPE: [0.30149546, 0.28810877, 0.28271756, 0.31732655, 0.28096387, 0.28739175, 0.32207233, 0.281074, 0.35629702, 0.3013806]
LSTM MAPE:  0.3018827885389328





# GRU

In [None]:
rmse_list = []
mae_list = []
mape_list = []

timestep=12
layers=8
num_units=64
dropout=0.1
lr=0.01
batch_size=32

for z in tqdm(range(10)):
  train_X, train_y, valid_X, valid_y, test_X, test_y, scaler, scaler_y = make_data2(hourly, timestep, resid_check=False)
  model = Sequential()

  if layers > 1:
    model.add(GRU(units = num_units, input_shape=(train_X.shape[1], train_X.shape[2]), dropout=dropout, return_sequences=True))

    for i in range(layers-2):
      model.add(GRU(units = num_units, dropout=dropout, return_sequences=True))

    model.add(GRU(units = num_units, dropout=dropout))

  else:
    model.add(GRU(units = num_units, input_shape=(train_X.shape[1], train_X.shape[2]), dropout=dropout))

  model.add(Dense(units = 1))

  model.compile(
    loss="mse",
    optimizer=Adam(learning_rate=lr)
  )

  model.fit(train_X, train_y, batch_size=batch_size,
          epochs=200, verbose=0, shuffle=False,
          validation_data=(valid_X, valid_y),
          callbacks=[EarlyStopping(patience=10, restore_best_weights=True)])
  
  # make a prediction
  yhat = model.predict(test_X)
  yhat_inv = scaler_y.inverse_transform(yhat)
  #resid_sum = (yhat_inv+resid_test.values[3:])

  rmse_list.append(sqrt(mean_squared_error(yhat_inv, scaler_y.inverse_transform(test_y))))
  mae_list.append(mean_absolute_error(yhat_inv, scaler_y.inverse_transform(test_y)))
  mape_list.append(mean_absolute_percentage_error(yhat_inv, scaler_y.inverse_transform(test_y)))
print(f"GRU LIST OF RMSE: {rmse_list}")
print(f'GRU RMSE:  {sum(rmse_list)/len(rmse_list)}')

print(f"GRU LIST OF MAE: {mae_list}")
print(f'GRU MAE:  {sum(mae_list)/len(mae_list)}')

print(f"GRU LIST OF MAPE: {mape_list}")
print(f'GRU MAPE:  {sum(mape_list)/len(mape_list)}')

100%|██████████| 10/10 [36:47<00:00, 220.78s/it]

GRU LIST OF RMSE: [1047.4952267194346, 960.2672154145429, 1004.9950248633074, 1017.5268423977817, 1005.5185540307051, 923.7623476847278, 1195.4631947492153, 997.4431688071255, 971.634705020359, 955.6800262117023]
GRU RMSE:  1007.9786305898903
GRU LIST OF MAE: [827.2577, 733.55176, 804.21875, 722.3083, 713.299, 645.6095, 960.50946, 761.9838, 706.05237, 643.83325]
GRU MAE:  751.8623901367188
GRU LIST OF MAPE: [0.30466995, 0.29843897, 0.37575185, 0.2929131, 0.24875705, 0.43719256, 0.33111233, 0.3117522, 0.29438516, 0.33002964]
GRU MAPE:  0.3225002810359001





# Transformer

In [None]:
from tensorflow.keras import layers

class TransformerBlock(layers.Layer):
    def __init__(self, embed_dim, feat_dim, num_heads, ff_dim, rate = 0.1):
        super(TransformerBlock, self).__init__()
        self.att = layers.MultiHeadAttention(num_heads = num_heads, key_dim = embed_dim)
        self.ffn = keras.Sequential( [layers.Dense(ff_dim, activation = "gelu"), layers.Dense(feat_dim),] )
        self.layernorm1 = layers.BatchNormalization()
        self.layernorm2 = layers.BatchNormalization()
        self.dropout1 = layers.Dropout(rate)
        self.dropout2 = layers.Dropout(rate)
        self.embed_dim = embed_dim
        self.feat_dim = feat_dim
        self.num_heads = num_heads
        self.ff_dim = ff_dim
        self.rate = rate

    def call(self, inputs, training):
        attn_output = self.att(inputs, inputs)
        attn_output = self.dropout1(attn_output, training = training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training = training)
        return self.layernorm2(out1 + ffn_output)

    def get_config(self):

        config = super().get_config()
        config.update({
            'embed_dim': self.embed_dim,
            'feat_dim': self.feat_dim,
            'num_heads': self.num_heads,
            'ff_dim': self.ff_dim,
            'rate': self.rate,
        })
        return config

In [None]:
class Time2Vec(keras.layers.Layer):
    def __init__(self, kernel_size = 1):
        super(Time2Vec, self).__init__(trainable = True, name = 'Time2VecLayer')
        self.k = kernel_size

    def build(self, input_shape):
        # trend
        self.wb = self.add_weight(name = 'wb', shape = (input_shape[1],), initializer = 'uniform', trainable = True)
        self.bb = self.add_weight(name = 'bb', shape = (input_shape[1],), initializer = 'uniform', trainable = True)
        # periodic
        self.wa = self.add_weight(name = 'wa', shape = (1, input_shape[1], self.k), initializer = 'uniform', trainable = True)
        self.ba = self.add_weight(name = 'ba', shape = (1, input_shape[1], self.k), initializer = 'uniform', trainable = True)
        super(Time2Vec, self).build(input_shape)

    def call(self, inputs, **kwargs):
        bias = self.wb * inputs + self.bb
        dp = K.dot(inputs, self.wa) + self.ba
        wgts = K.sin(dp) # or K.cos(.)
        ret = K.concatenate([K.expand_dims(bias, -1), wgts], -1)
        ret = K.reshape(ret, (-1, inputs.shape[1] * (self.k + 1)))
        return ret

    def compute_output_shape(self, input_shape):
        return (input_shape[0], input_shape[1] * (self.k + 1))

    def get_config(self):

        config = super().get_config()
        config.update({
            'kernel_size': self.k,
        })
        return config

In [None]:
EPOCHS = 50
N_HEADS = 8
N_FOLDS = 10
FF_DIM = 256
N_BLOCKS = 6
EMBED_DIM = 64
BATCH_SIZE = 16
WINDOW_SIZE = 65
DROPUT_RATE = 0.0
TIME_2_VEC_DIM = 3
TRAIN_MODEL = True
SKIP_CONNECTION_STRENGTH = 0.9

In [None]:
rmse_list = []
mae_list = []
mape_list = []

batch_size=32
lr=0.001
N_HEADS = 4
FF_DIM = 256
N_BLOCKS = 2
EMBED_DIM = 128
DROPUT_RATE = 0.3
time2vec_dim = 2
timestep = 24

for z in tqdm(range(10)):
  train_X, train_y, valid_X, valid_y, test_X, test_y, scaler, scaler_y = make_data2(hourly, timestep, resid_check=False)


  input_shape = train_X.shape[1:]
  inp = Input(input_shape)
  x = inp

  time_embedding = keras.layers.TimeDistributed(Time2Vec(time2vec_dim - 1))(x)
  x = Concatenate(axis = -1)([x, time_embedding])
  x = layers.LayerNormalization(epsilon = 1e-6)(x)

  for k in range(N_BLOCKS):
    x_old = x
    transformer_block = TransformerBlock(EMBED_DIM, input_shape[-1] + ( input_shape[-1] * time2vec_dim), N_HEADS, FF_DIM, DROPUT_RATE)
    x = transformer_block(x)
    x = ((1.0 - SKIP_CONNECTION_STRENGTH) * x) + (SKIP_CONNECTION_STRENGTH * x_old)

  x = layers.Flatten()(x)

  x = layers.Dense(128, activation = "relu")(x)
  x = layers.Dropout(DROPUT_RATE)(x)
  x = Dense(1, activation = 'linear')(x)

  out = x
  model = Model(inp, out)

  model.compile(
    loss="mse",
    optimizer=Adam(learning_rate=lr)
              )

  model.fit(train_X, train_y, batch_size=batch_size,
          epochs=200, verbose=0, shuffle=False,
          validation_data=(valid_X, valid_y),
          callbacks=[EarlyStopping(patience=10, restore_best_weights=True)]
          )
  
  # make a prediction
  yhat = model.predict(test_X)
  yhat_inv = scaler_y.inverse_transform(yhat)
  #resid_sum = (yhat_inv+resid_test.values[3:])

  rmse_list.append(sqrt(mean_squared_error(yhat_inv, scaler_y.inverse_transform(test_y))))
  mae_list.append(mean_absolute_error(yhat_inv, scaler_y.inverse_transform(test_y)))
  mape_list.append(mean_absolute_percentage_error(yhat_inv, scaler_y.inverse_transform(test_y)))
print(f"Transformer LIST OF RMSE: {rmse_list}")
print(f'Transformer RMSE:  {sum(rmse_list)/len(rmse_list)}')

print(f"Transformer LIST OF MAE: {mae_list}")
print(f'Transformer MAE:  {sum(mae_list)/len(mae_list)}')

print(f"Transformer LIST OF MAPE: {mape_list}")
print(f'Transformer MAPE:  {sum(mape_list)/len(mape_list)}')

100%|██████████| 10/10 [1:13:09<00:00, 438.94s/it]

Transformer LIST OF RMSE: [1069.345594277173, 1180.647280096812, 1145.9040099414958, 974.9583003903296, 1018.4296121971316, 1091.3377112516546, 1031.2739088137546, 1144.9281636853902, 1080.3077107935499, 1164.1378784319322]
Transformer RMSE:  1090.1270169879222
Transformer LIST OF MAE: [809.22424, 946.4375, 906.5738, 720.68304, 763.6714, 869.78345, 776.4992, 913.0993, 836.0788, 951.6148]
Transformer MAE:  849.366552734375
Transformer LIST OF MAPE: [0.62974226, 0.30611587, 0.30105332, 0.27899137, 0.3038196, 0.3025636, 0.5804642, 0.30953628, 0.28656432, 0.31644234]
Transformer MAPE:  0.3615293145179749



