# Short Term Forecasting

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from tqdm.auto import tqdm
from sklearn.multioutput import MultiOutputRegressor
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from nets import SimpleMLPRegressor, CNNRegressor
from lightgbm import LGBMRegressor
from utils import create_time_series_splits, read_df, mape

In [3]:
excluded_columns = ['Total Load', 'Most recent forecast', 'Most recent P10',
       'Most recent P90', 'Day-ahead 6PM forecast', 'Day-ahead 6PM P10',
       'Day-ahead 6PM P90', 'Week-ahead forecast', 'Year', 'Month', 'Day',
       'Hour', 'Minute', 'Season', 'Lockdown', 'Previous_24h', "Previous_7days"]

In [None]:
num_splits = 5
splits = create_time_series_splits(
    data=read_df('data/preprocessed_data_multimodality_18032025.csv'),
    train_size_days=7*4*24,
    test_size_days=7*4,
    num_splits=num_splits,
    window_size_steps=4*30,
    exclude_columns=excluded_columns,
    target_column='Total Load Interpolated',
    prediction_horizon_steps=4*30,
    shifting_steps=1,
    elia_column_to_return='Day-ahead 6PM forecast',
    alignment_times=['18:00']
)

In [10]:
models = [LinearRegression, DecisionTreeRegressor, MultiOutputRegressor, SimpleMLPRegressor, CNNRegressor]
params = {"LinearRegression": {},
          "DecisionTreeRegressor": {},
          "MultiOutputRegressor": {'estimator': LGBMRegressor(verbose= -1)},
          "SimpleMLPRegressor": {'device': 'mps', 'epochs': 150, 'verbose': True},
          "CNNRegressor": {'device': 'mps', 'epochs': 150, 'verbose': True}}

In [12]:
errors = {}
for m in models:
    errors[m.__name__] = []
    mapes = []
    mapes_ELIA = []
    for split_idx in tqdm(range(num_splits)):
        X_train = splits[split_idx]['X_train'].to_numpy()
        Y_train = splits[split_idx]['Y_train'].to_numpy()
        X_test = splits[split_idx]['X_test'].to_numpy()
        Y_test = splits[split_idx]['Y_test'].to_numpy()
        min_X_train, max_X_train = np.min(X_train), np.max(X_train)
        min_Y_train, max_Y_train = np.min(Y_train), np.max(Y_train)
        X_train, Y_train = (X_train-min_X_train)/(max_X_train-min_X_train), (Y_train-min_Y_train)/(max_Y_train-min_Y_train)
        X_test = (X_test-min_X_train)/(max_X_train-min_X_train)
        ELIA_train = splits[split_idx]['ELIA_train'].to_numpy()
        ELIA_test = splits[split_idx]['ELIA_test'].to_numpy()
        model = m(**params[m.__name__])
        model.fit(X_train, Y_train)
        Y_pred = (model.predict(X_test)*(max_Y_train-min_Y_train))+min_Y_train
        errors[m.__name__].append(mape(Y_pred, Y_test))
        mapes_ELIA.append(mape(ELIA_test, Y_test))
    print(m.__name__, np.mean(errors[m.__name__]))
errors["ELIA"] = mapes_ELIA

  0%|          | 0/5 [00:00<?, ?it/s]

LinearRegression 4.660998189941622


  0%|          | 0/5 [00:00<?, ?it/s]

DecisionTreeRegressor 5.810599359422913


  0%|          | 0/5 [00:00<?, ?it/s]

MultiOutputRegressor 3.7743560460944026


  0%|          | 0/5 [00:00<?, ?it/s]

Training epochs:   0%|          | 0/150 [00:00<?, ?it/s]

Training epochs:   0%|          | 0/150 [00:00<?, ?it/s]

Training epochs:   0%|          | 0/150 [00:00<?, ?it/s]

Training epochs:   0%|          | 0/150 [00:00<?, ?it/s]

Training epochs:   0%|          | 0/150 [00:00<?, ?it/s]

SimpleMLPRegressor 4.142328416873934


  0%|          | 0/5 [00:00<?, ?it/s]

Training epochs:   0%|          | 0/150 [00:00<?, ?it/s]

Training epochs:   0%|          | 0/150 [00:00<?, ?it/s]

Training epochs:   0%|          | 0/150 [00:00<?, ?it/s]

Training epochs:   0%|          | 0/150 [00:00<?, ?it/s]

Training epochs:   0%|          | 0/150 [00:00<?, ?it/s]

CNNRegressor 4.194132938069397


In [13]:
results = pd.DataFrame(errors)
results.describe().loc[["mean", "std"]]

Unnamed: 0,LinearRegression,DecisionTreeRegressor,MultiOutputRegressor,SimpleMLPRegressor,CNNRegressor,ELIA
mean,4.660998,5.810599,3.774356,4.142328,4.194133,2.354688
std,0.298252,1.52068,0.319587,0.570675,0.995691,0.536661


In [2]:
for a,b in zip([1, 2, 3], [4, 5, 6]):
    print(a, b)

1 4
2 5
3 6
