In [1]:
%matplotlib widget

import os

if not os.path.exists("./models/"):
  os.makedirs("./models/")
if not os.path.exists("./report/figures/"):
  os.makedirs("./report/figures/")

# Data Import

In [2]:
import pandas as pd

solar_energy_df = pd.read_csv("../ML_week3/solarenergy.csv",
                              delimiter=",",
                              index_col=0,
                              date_format="%d/%m/%Y %H:%M",
                              parse_dates=True).sort_index()

"""
solar_energy_df["Datetime"] = pd.to_datetime(solar_energy_df["Datetime"],
                                             format="%d/%m/%Y %H:%M")
solar_energy_df = solar_energy_df.set_index("Datetime").sort_index()
"""

solar_energy_df = solar_energy_df.dropna()
solar_energy_df = solar_energy_df.resample("1H").interpolate("linear")
solar_energy_df = \
  (solar_energy_df - solar_energy_df.mean()) / solar_energy_df.std()

training_ratio = 0.7

training_limit = \
  solar_energy_df.index[
      int(training_ratio*solar_energy_df.shape[0])
    ]

y_train_df = solar_energy_df.loc[:training_limit-pd.Timedelta(hours=1),
                                  "solar_mw"]
y_test_df = solar_energy_df.loc[training_limit:, "solar_mw"]

# Tensor Creation

In [3]:
from typing import Optional
from pydantic import NonNegativeInt, PositiveInt

import numpy as np
import tensorflow as tf
from tensorflow import keras

n_feats = 1
p=10
q=48

def tensor_memory_reshaper(in_np:np.ndarray, out_np:Optional[np.ndarray],
                           memory:NonNegativeInt,
                           n_feats_internal:PositiveInt = n_feats):
  in_np = in_np.reshape((-1,1,n_feats_internal))
  for _ in range(memory):
    next_np = in_np[1:,-1,:].reshape(((-1,1,n_feats_internal)))
    in_np = np.concatenate((in_np[:-1,:,:], next_np), axis=1)

  if out_np is not None:
    return (tf.convert_to_tensor(in_np),
            tf.convert_to_tensor(out_np[memory:]))
  else:
    return (tf.convert_to_tensor(in_np), None)
  
x_train_np = np.pad(
    y_train_df[:-1].to_numpy().reshape((-1,1)), ((q+1,0),(0,0)), mode="edge"
  )
y_train_np = np.pad(y_train_df.to_numpy(), ((q,0),), mode="edge")

x_test_np = pd.concat(
    (y_train_df.iloc[-(q+1):], y_test_df[:-1])
  ).to_numpy().reshape((-1,1))
y_test_np = pd.concat((y_train_df.iloc[-q:], y_test_df)).to_numpy()

x_train_tensor,y_train_tensor = \
  tensor_memory_reshaper(x_train_np, y_train_np, q)
_,y_test_tensor = \
  tensor_memory_reshaper(x_test_np, y_test_np, q)

class StateResetCallback(keras.callbacks.Callback):
  def on_epoch_begin(self, epoch, logs=None):
    self.model.reset_states()

2023-11-23 16:15:05.914865: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-11-23 16:15:06.021106: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-11-23 16:15:06.021183: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-11-23 16:15:06.021203: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-11-23 16:15:06.071570: I tensorflow/core/platform/cpu_feature_g

# Model instantiation

In [4]:
from datetime import datetime
import transformer_SV

import matplotlib.pyplot as plt

model_dict = {}
callbacks = [
    keras.callbacks.EarlyStopping(monitor="val_loss",patience=10,
                                  restore_best_weights=True),
    StateResetCallback()
  ]

date_format = "%Y%m%d%H%M%S"

try:
  for head_size in [2,5,10]:
    for num_heads in [5,10,20]:
      for ff_dim in [10,20,50]:
        for mlp_units in [32,64,128]:
          serial = "model_" + datetime.now().strftime(date_format)
          
          transformer_model = transformer_SV.build_model(
              input_shape=x_train_tensor.shape[1:],
              head_size=head_size,
              num_heads=num_heads,
              ff_dim=ff_dim,
              num_transformer_blocks=1,
              mlp_units=[mlp_units],
              mlp_dropout=0.5,
              dropout=0.5
            )

          transformer_model.compile(
              loss="mse",
              optimizer="Adam"
            )

          transformer_model.fit(
              x=x_train_tensor,
              y=y_train_tensor,
              validation_split=0.2,
              epochs=200,
              shuffle=True,
              callbacks=callbacks,
              verbose=0
            )
          transformer_model.save(f"./models/{serial}.keras")
    
          transformer_fit = transformer_model.predict(x=x_train_tensor,
                                                      verbose=0).flatten()

          transformer_prediction = np.zeros(y_test_tensor.shape)
          buffer = y_train_np[-(q+1):].copy()
          for idx in range(len(y_test_tensor)):
            current_input,_ = tensor_memory_reshaper(buffer, None, q, 1)
            transformer_prediction[idx] = \
              transformer_model.predict(current_input, batch_size=1,
                                        verbose=0).flatten()[-1]

            buffer[:-1] = buffer[1:]
            buffer[-1] = transformer_prediction[idx]

          transformer_rmse = np.sqrt(
              np.mean((transformer_prediction-y_test_tensor)**2)
            )
          transforme_mae = np.abs(
              transformer_prediction-y_test_tensor
            ).max()

          model_dict[serial] = {
            "serial": serial,
            "head_size": head_size,
            "num_heads": num_heads,
            "ff_dim": ff_dim,
            "mlp_units": mlp_units,
            "seq_memory": q,
            "RMSE": transformer_rmse,
            "MAE": transforme_mae
          }
          print(f"Model: {model_dict[serial]}")

          plt.figure(figsize=(8,5))
          plt.plot(solar_energy_df["solar_mw"],
                  label="Measured")
          plt.plot(y_train_df.index, transformer_fit, label="Fitted")
          plt.plot(y_test_df.index, transformer_prediction, label="Estimated")
          plt.autoscale(True, "x", tight=True)
          plt.title(f"Classic Transformer - {serial}")
          plt.legend()
          plt.savefig(f"./report/figures/{serial}_fit.svg")
          plt.close("all")
finally:
  model_df = pd.DataFrame.from_dict(list(model_dict.values()))
  model_df.to_excel(f"./results_{datetime.now().strftime(date_format)}.xlsx")

2023-11-23 16:15:10.622494: I tensorflow/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2023-11-23 16:15:11.279535: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:442] Loaded cuDNN version 8800
2023-11-23 16:15:11.465451: I tensorflow/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2023-11-23 16:15:11.721608: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7f1bb27e4b80 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2023-11-23 16:15:11.721649: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): NVIDIA RTX A500 Laptop GPU, Compute Capability 8.6
2023-11-23 16:15:11.730164: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2023-11-23 16:15:11.823594: I ./tensorflow/compiler/jit/device_c

Model: {'serial': 'model_20231123161509', 'head_size': 2, 'num_heads': 5, 'ff_dim': 10, 'mlp_units': 32, 'seq_memory': 48, 'RMSE': 0.9521070786350811, 'MAE': 2.4080189779009054}
Model: {'serial': 'model_20231123161600', 'head_size': 2, 'num_heads': 5, 'ff_dim': 10, 'mlp_units': 64, 'seq_memory': 48, 'RMSE': 0.9430197308543782, 'MAE': 2.3594687297548482}
Model: {'serial': 'model_20231123161643', 'head_size': 2, 'num_heads': 5, 'ff_dim': 10, 'mlp_units': 128, 'seq_memory': 48, 'RMSE': 0.9594363457021841, 'MAE': 2.3880907907690236}
Model: {'serial': 'model_20231123161719', 'head_size': 2, 'num_heads': 5, 'ff_dim': 20, 'mlp_units': 32, 'seq_memory': 48, 'RMSE': 1.063124590954646, 'MAE': 2.642023058196468}
Model: {'serial': 'model_20231123161805', 'head_size': 2, 'num_heads': 5, 'ff_dim': 20, 'mlp_units': 64, 'seq_memory': 48, 'RMSE': 192164.8558499995, 'MAE': 1145306.4930827303}
Model: {'serial': 'model_20231123161852', 'head_size': 2, 'num_heads': 5, 'ff_dim': 20, 'mlp_units': 128, 'seq_m