# Very Short Term Forecasting

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from tqdm.auto import tqdm
from sklearn.multioutput import MultiOutputRegressor
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from nets import SimpleMLPRegressor, CNNRegressor
from lightgbm import LGBMRegressor
from utils import create_time_series_splits, read_df, mape

In [2]:
excluded_columns = ['Total Load', 'Most recent forecast', 'Most recent P10',
       'Most recent P90', 'Day-ahead 6PM forecast', 'Day-ahead 6PM P10',
       'Day-ahead 6PM P90', 'Week-ahead forecast', 'Year', 'Month', 'Day',
       'Hour', 'Minute', 'Season', 'Lockdown']

In [4]:
num_splits = 5
splits = create_time_series_splits(
    data=read_df('./data/preprocessed_data_18032025.csv'),
    train_size_days=7*4*24,
    test_size_days=7*4,
    num_splits=num_splits,
    window_size_steps=4,
    exclude_columns=excluded_columns,
    target_column='Total Load Interpolated',
    prediction_horizon_steps=4,
    shifting_steps=1,
    elia_column_to_return='Day-ahead 6PM forecast',
    alignment_times=['18:00']
)

In [13]:
splits[0]["X_train"]

NameError: name 'splits' is not defined

In [11]:
splits[0]["Y_train"]

Unnamed: 0,Total Load Interpolated_t+1,Total Load Interpolated_t+2,Total Load Interpolated_t+3,Total Load Interpolated_t+4
2014-12-31 23:45:00+00:00,9755.00,9575.73,9494.09,9382.50
2015-01-01 00:00:00+00:00,9575.73,9494.09,9382.50,9349.16
2015-01-01 00:15:00+00:00,9494.09,9382.50,9349.16,9213.16
2015-01-01 00:30:00+00:00,9382.50,9349.16,9213.16,9069.42
2015-01-01 00:45:00+00:00,9349.16,9213.16,9069.42,8976.35
...,...,...,...,...
2016-11-02 22:30:00+00:00,9710.90,9618.17,9448.26,9346.88
2016-11-02 22:45:00+00:00,9618.17,9448.26,9346.88,9029.48
2016-11-02 23:00:00+00:00,9448.26,9346.88,9029.48,9016.92
2016-11-02 23:15:00+00:00,9346.88,9029.48,9016.92,8837.40


In [12]:
models = [LinearRegression, DecisionTreeRegressor, MultiOutputRegressor, SimpleMLPRegressor, CNNRegressor]
params = {"LinearRegression": {},
          "DecisionTreeRegressor": {},
          "MultiOutputRegressor": {'estimator': LGBMRegressor(verbose= -1)},
          "SimpleMLPRegressor": {'device': 'mps', 'epochs': 150, 'verbose': True},
          "CNNRegressor": {'device': 'mps', 'epochs': 150, 'verbose': True}}

In [13]:
errors = {}
for m in models:
    errors[m.__name__] = []
    mapes = []
    mapes_ELIA = []
    for split_idx in tqdm(range(num_splits)):
        X_train = splits[split_idx]['X_train'].to_numpy()
        Y_train = splits[split_idx]['Y_train'].to_numpy()
        X_test = splits[split_idx]['X_test'].to_numpy()
        Y_test = splits[split_idx]['Y_test'].to_numpy()
        min_X_train, max_X_train = np.min(X_train), np.max(X_train)
        min_Y_train, max_Y_train = np.min(Y_train), np.max(Y_train)
        X_train, Y_train = (X_train-min_X_train)/(max_X_train-min_X_train), (Y_train-min_Y_train)/(max_Y_train-min_Y_train)
        X_test = (X_test-min_X_train)/(max_X_train-min_X_train)
        ELIA_train = splits[split_idx]['ELIA_train'].to_numpy()
        ELIA_test = splits[split_idx]['ELIA_test'].to_numpy()
        model = m(**params[m.__name__])
        model.fit(X_train, Y_train)
        Y_pred = (model.predict(X_test)*(max_Y_train-min_Y_train))+min_Y_train
        errors[m.__name__].append(mape(Y_pred, Y_test))
        mapes_ELIA.append(mape(ELIA_test, Y_test))
    print(m.__name__, np.mean(errors[m.__name__]))
errors["ELIA"] = mapes_ELIA

  0%|          | 0/5 [00:00<?, ?it/s]

LinearRegression 1.5068709228264525


  0%|          | 0/5 [00:00<?, ?it/s]

DecisionTreeRegressor 2.1587809850751216


  0%|          | 0/5 [00:00<?, ?it/s]

MultiOutputRegressor 1.5896719019946393


  0%|          | 0/5 [00:00<?, ?it/s]

Training epochs:   0%|          | 0/150 [00:00<?, ?it/s]

Training epochs:   0%|          | 0/150 [00:00<?, ?it/s]

Training epochs:   0%|          | 0/150 [00:00<?, ?it/s]

Training epochs:   0%|          | 0/150 [00:00<?, ?it/s]

Training epochs:   0%|          | 0/150 [00:00<?, ?it/s]

SimpleMLPRegressor 1.6176830964369213


  0%|          | 0/5 [00:00<?, ?it/s]

Training epochs:   0%|          | 0/150 [00:00<?, ?it/s]

Training epochs:   0%|          | 0/150 [00:00<?, ?it/s]

Training epochs:   0%|          | 0/150 [00:00<?, ?it/s]

Training epochs:   0%|          | 0/150 [00:00<?, ?it/s]

Training epochs:   0%|          | 0/150 [00:00<?, ?it/s]

CNNRegressor 1.6412692586732391


In [14]:
results = pd.DataFrame(errors)
results.describe().loc[["mean", "std"]]

Unnamed: 0,LinearRegression,DecisionTreeRegressor,MultiOutputRegressor,SimpleMLPRegressor,CNNRegressor,ELIA
mean,1.506871,2.158781,1.589672,1.617683,1.641269,2.102381
std,0.592206,0.403514,0.5457,0.456083,0.609207,0.317704


In [12]:
a = [1, 2, 3]
b = ["a", "b", "c"]

[f"{x}_t-{y}" for y in a for x in b ]

# for ai in [1, 2, 3]:
#     for bi in ["a", "b", "c"]:
#         print(f"{bi}_{ai}")

['a_t-1',
 'b_t-1',
 'c_t-1',
 'a_t-2',
 'b_t-2',
 'c_t-2',
 'a_t-3',
 'b_t-3',
 'c_t-3']