In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from utils import processing
from utils import utils
import datetime as dt

from tensorflow import keras
from tensorflow.keras.losses import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error
from tensorflow.keras.preprocessing import timeseries_dataset_from_array
from sklearn.preprocessing import MinMaxScaler, StandardScaler

In [2]:
# Processing Parameters
split = 0.80 # split percentage for training data
steps = 1 # timesteps: 1 hour

lookback = 24 * 31  # the number of hours in the past the model is shown
horizon = 1 # the target hour in the future we want to predict 1 hour ahead

# Training parameters
batch_size = 224

In [3]:
plt.style.use('fivethirtyeight')
plt.rcParams["figure.figsize"] = (18,10)

In [4]:
kld = pd.read_csv('../data/processed/kolding_features.csv', index_col='Datetime', parse_dates=['Datetime'])
stb = pd.read_csv('../data/processed/strib_features.csv', index_col='Datetime', parse_dates=['Datetime'])

In [5]:
kld = kld[['Value']]
stb = stb[['Value']]

### Year Before

In [8]:
kld2021 = kld[kld.index.year == 2021] # Year to predict
kld2019 = kld[(kld.index.year == 2019) & (kld.index <= dt.datetime(2019, 5, 4, 23))]
kld2020 = kld[(kld.index.year == 2020) & (kld.index <= dt.datetime(2020, 5, 4, 23))]
kld2020 = kld2020[kld2020.index.date != dt.date(2020, 2, 29)]


stb2021 = stb[stb.index.year == 2021] # Year to predict
stb2019 = stb[(stb.index.year == 2019) & (stb.index <= dt.datetime(2019, 5, 4, 23))]
stb2020 = stb[(stb.index.year == 2020) & (stb.index <= dt.datetime(2020, 5, 4, 23))]
stb2020 = stb2020[stb2020.index.date != dt.date(2020, 2, 29)]

In [9]:
# use 2019 and 2020 data to predict 2021 for naive forecast
kld_true = kld2021['Value'].values
kld_pred19 = kld2019['Value'].values
kld_pred20 = kld2020['Value'].values

stb_true = stb2021['Value'].values
stb_pred19 = stb2019['Value'].values
stb_pred20 = stb2020['Value'].values

In [10]:
def metrics(true, pred):
    print(f'MSE: {np.mean(mean_squared_error(true, pred))}')
    print(f'RMSE: {np.sqrt(np.mean(mean_squared_error(true, pred)))}')
    print(f'MAE: {np.mean(mean_absolute_error(true, pred))}')
    print(f'MAPE: {np.mean(mean_absolute_percentage_error(true, pred))}')

In [11]:
# Kolding: `19 data to predict `21
metrics(kld_true, kld_pred19)

MSE: 523.1050031502017
RMSE: 22.871488870429964
MAE: 17.678650201612903
MAPE: 17.053641775857486


In [12]:
# Kolding: `20 data to predict `21
metrics(kld_true, kld_pred20)

MSE: 439.92791935114263
RMSE: 20.974458737978022
MAE: 15.944457325268818
MAPE: 15.446316452750416


In [13]:
# Strib: `19 data to predict `21
metrics(stb_true, stb_pred19)

MSE: 301.1630905937499
RMSE: 17.354051129167217
MAE: 14.523117943548392
MAPE: 21.414846990578045


In [14]:
# Strib: `20 data to predict `21
metrics(stb_true, stb_pred20)

MSE: 193.45500703124998
RMSE: 13.90881041035681
MAE: 11.10651579301075
MAPE: 15.85413071013743


In [15]:
# Average of both years
kld_pred_avg = (kld_pred19 + kld_pred20) / 2
stb_pred_avg = (stb_pred19 + stb_pred20) / 2

In [16]:
# Kolding: Using `19 and `20 average to predict `21
metrics(kld_true, kld_pred_avg)

MSE: 402.1595851905243
RMSE: 20.05391695381539
MAE: 15.070034610215057
MAPE: 14.348025451734136


In [17]:
# Strib: Using `19 and `20 average to predict `21
metrics(stb_true, stb_pred_avg)

MSE: 220.8466792137096
RMSE: 14.860911116540251
MAE: 12.193578293010749
MAPE: 17.63887845030034


 
### Common Sense Approach

In [6]:
# Create Train, Val, and Test Sets
kld_train, kld_val, kld_test = processing.ts_offset_split(kld, steps=steps, lookback=lookback,
                                                          horizon=horizon, batch_size=batch_size, scaler=None)
stb_train, stb_val, stb_test = processing.ts_offset_split(stb, steps=steps, lookback=lookback,
                                                          horizon=horizon, batch_size=batch_size, scaler=None)


Data has not been scaled.
Data has not been scaled.


In [9]:
val_steps = (2231 - lookback) #length of test set

def eval_naive(val):
    batch_maes = []
    batch_rmse = []
    batch_mape = []
    for step in range(val_steps):
        for batch in val.take(1):
            samples, targets = batch
            pred = samples[:, -1]
            # MAE
            mae = np.mean(mean_absolute_error(targets, pred))
            batch_maes.append(mae)
            # MAPE
            mape = np.mean(mean_absolute_percentage_error(targets, pred))
            batch_mape.append(mape)
            # RMSE
            rmse = np.sqrt(np.mean(mean_squared_error(targets, pred)))
            batch_rmse.append(rmse)
            
    print(f'MAE: {np.mean(batch_maes)}')
    print(f'MAPE: {np.mean(batch_mape)}')
    print(f'RMSE: {np.mean(batch_rmse)}')

#### Kolding Common Sense Forecast

In [10]:
eval_naive(kld_test)

MAE: 24.592071428571426
MAPE: 18.408773831241177
RMSE: 33.1373976481687


#### Strib Common Sense Forecast

In [11]:
eval_naive(stb_test)

MAE: 13.075562499999997
MAPE: 15.967196220315058
RMSE: 16.881341883179356
