# Direct multi-step 24-hour forecast 

In [1]:
from pathlib import Path
import warnings
import math

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import HistGradientBoostingRegressor, RandomForestRegressor
from sklearn.model_selection import cross_validate
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.preprocessing import OrdinalEncoder
from sklearn.inspection import permutation_importance

import xgboost as xgb

In [2]:
# Current versions of XGBoost and Seaborn have a FutureWarning bug; suppress with this
warnings.simplefilter(action='ignore', category=FutureWarning)

In [3]:
# Set max number of columns to display; default 20
pd.options.display.max_columns = 50

In [4]:
# Paths to data directories
cwd_path = Path.cwd()
data_path = cwd_path.parent.joinpath('data')
data_push_path = cwd_path.parent.joinpath('data_to_push')

#### Read in data (and wrangle a bit)

In [5]:
df = pd.read_pickle(data_push_path / 'df_main_smard_era5_final_reordered.pkl')

# Split off SMARD forecasted and actuals into separate dataframe and drop SMARD forecasted
df_smard_preds_vs_actuals = df.loc[:, ['forecasted_generation_smard_mwh', 'actual_generated_smard_mwh']].copy()
df.drop(columns=['forecasted_generation_smard_mwh'], inplace=True)

# Add column of pure random noise which will be used for comparison in feature importance analysis later
# Random values between 0 and 1
df['noise'] = np.random.rand(df.shape[0])

# Back fill the 6_550 missing values in day-ahead price feature; random forest regressor doesn't handle NaNs!
df['day_ahead_price_eur_mwh'].fillna(method='bfill', inplace=True)

df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 48168 entries, 2018-01-01 00:00:00+00:00 to 2023-06-30 23:00:00+00:00
Data columns (total 35 columns):
 #   Column                                          Non-Null Count  Dtype   
---  ------                                          --------------  -----   
 0   hour                                            48168 non-null  int32   
 1   day_of_week                                     48168 non-null  int32   
 2   day_of_month                                    48168 non-null  int32   
 3   day_of_year                                     48168 non-null  int32   
 4   week_of_year                                    48168 non-null  int32   
 5   month_number                                    48168 non-null  int32   
 6   year                                            48168 non-null  int32   
 7   meteorological_season                           48168 non-null  category
 8   public_holiday                                  48168 non-n

In [6]:
# Separate these ERA5 and ERA5-derived param groups for quicker selections later
weighted_wx_features = [
    'weighted_temp_2m_celsius',
    'weighted_mean_sea_level_pressure_mb',
    'weighted_wind_gusts_10m',
    'weighted_mean_wind_speed_10m',
    'weighted_mean_wind_speed_100m',
    'weighted_mean_wind_direction_angle_10m',
    'weighted_mean_wind_direction_angle_100m',
    'weighted_wind_direction_intercardinal_10m',
    'weighted_wind_direction_intercardinal_100m',
]

unweighted_wx_features = [
    'temp_2m_celsius',
    'mean_sea_level_pressure_mb',
    'wind_gusts_10m',
    'mean_wind_speed_10m',
    'mean_wind_speed_100m',
    'wind_direction_angle_10m',
    'wind_direction_angle_100m',
    'wind_direction_intercardinal_10m',
    'wind_direction_intercardinal_100m',
]

# Create separate dataframes as well
df_weighted = df.loc[:, ~df.columns.isin(unweighted_wx_features)].copy()
df_unweighted = df.loc[:, ~df.columns.isin(weighted_wx_features)].copy()

## Shifting the dataset and creating lagged features

In [7]:
df_shifted = df_unweighted.copy()

In [8]:
# Create lagged features (e.g., lag up to 24 hours)
for i in range(1, 25):
    df_shifted[f'actual_generated_lag_{i}'] = df_shifted['actual_generated_smard_mwh'].shift(i)

In [9]:
df_shifted

Unnamed: 0_level_0,hour,day_of_week,day_of_month,day_of_year,week_of_year,month_number,year,meteorological_season,public_holiday,turbines_in_operation,total_nominal_capacity_operational_turbines_mw,total_nominal_capacity_smard_mw,day_ahead_price_eur_mwh,total_net_load_smard_mwh,residual_load_smard_mwh,temp_2m_celsius,mean_sea_level_pressure_mb,wind_gusts_10m,mean_wind_speed_10m,mean_wind_speed_100m,wind_direction_angle_10m,wind_direction_angle_100m,wind_direction_intercardinal_10m,wind_direction_intercardinal_100m,actual_generated_smard_mwh,noise,actual_generated_lag_1,actual_generated_lag_2,actual_generated_lag_3,actual_generated_lag_4,actual_generated_lag_5,actual_generated_lag_6,actual_generated_lag_7,actual_generated_lag_8,actual_generated_lag_9,actual_generated_lag_10,actual_generated_lag_11,actual_generated_lag_12,actual_generated_lag_13,actual_generated_lag_14,actual_generated_lag_15,actual_generated_lag_16,actual_generated_lag_17,actual_generated_lag_18,actual_generated_lag_19,actual_generated_lag_20,actual_generated_lag_21,actual_generated_lag_22,actual_generated_lag_23,actual_generated_lag_24
datetime_utc,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1
2018-01-01 00:00:00+00:00,0,0,1,1,1,1,2018,winter,1,27464,49732.097897,51633.0,59.53,44270.00,11634.25,8.510492,1000.349453,12.419700,5.978772,10.395984,220.756622,225.207733,SW,SW,29638.00,0.530071,,,,,,,,,,,,,,,,,,,,,,,,
2018-01-01 01:00:00+00:00,1,0,1,1,1,1,2018,winter,0,27464,49732.097897,51633.0,59.53,43195.75,9928.75,8.281854,1000.427969,13.007113,6.309438,10.805341,224.697327,228.564713,SW,SW,30173.75,0.999789,29638.00,,,,,,,,,,,,,,,,,,,,,,,
2018-01-01 02:00:00+00:00,2,0,1,1,1,1,2018,winter,0,27464,49732.097897,51633.0,59.53,42527.50,8371.50,8.135431,1000.821328,13.622252,6.685641,11.288467,228.800705,232.298874,SW,SW,31021.50,0.006770,30173.75,29638.00,,,,,,,,,,,,,,,,,,,,,,
2018-01-01 03:00:00+00:00,3,0,1,1,1,1,2018,winter,0,27464,49732.097897,51633.0,59.53,42515.75,8291.25,7.977838,1001.264219,14.090265,6.894922,11.552816,231.736252,235.041382,SW,SW,31015.00,0.175530,31021.50,30173.75,29638.00,,,,,,,,,,,,,,,,,,,,,
2018-01-01 04:00:00+00:00,4,0,1,1,1,1,2018,winter,0,27464,49732.097897,51633.0,59.53,42278.00,7465.50,7.647913,1001.436406,14.186666,7.060956,11.751130,234.274536,236.804169,SW,WSW,31534.00,0.020875,31015.00,31021.50,30173.75,29638.00,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-06-30 19:00:00+00:00,19,4,30,181,26,6,2023,summer,0,29412,59315.041337,57590.0,130.74,48794.25,44133.75,17.934381,1010.896875,3.549851,1.842328,3.449438,277.569061,278.582458,W,W,1698.75,0.435843,1664.50,2492.50,3215.75,3687.25,4078.25,4519.75,4432.50,4083.25,3437.50,2981.25,2689.00,2718.00,2431.75,2769.50,3071.50,3254.50,3126.25,2994.25,2831.00,2773.00,3009.75,3026.50,2643.75,2696.75
2023-06-30 20:00:00+00:00,20,4,30,181,26,6,2023,summer,0,29412,59315.041337,57590.0,122.39,46764.00,41337.00,16.834222,1011.285156,3.295196,1.792745,3.391158,265.232117,275.095490,W,W,2334.25,0.420313,1698.75,1664.50,2492.50,3215.75,3687.25,4078.25,4519.75,4432.50,4083.25,3437.50,2981.25,2689.00,2718.00,2431.75,2769.50,3071.50,3254.50,3126.25,2994.25,2831.00,2773.00,3009.75,3026.50,2643.75
2023-06-30 21:00:00+00:00,21,4,30,181,26,6,2023,summer,0,29412,59315.041337,57590.0,109.47,43702.00,36306.00,16.122461,1011.060078,3.497805,1.922537,3.609585,253.096542,263.328888,WSW,W,3734.00,0.896673,2334.25,1698.75,1664.50,2492.50,3215.75,3687.25,4078.25,4519.75,4432.50,4083.25,3437.50,2981.25,2689.00,2718.00,2431.75,2769.50,3071.50,3254.50,3126.25,2994.25,2831.00,2773.00,3009.75,3026.50
2023-06-30 22:00:00+00:00,22,4,30,181,26,6,2023,summer,0,29412,59315.041337,57590.0,109.74,42181.25,33264.50,15.426996,1010.850312,3.864601,2.141451,4.113689,244.405441,256.425568,WSW,WSW,5207.75,0.785468,3734.00,2334.25,1698.75,1664.50,2492.50,3215.75,3687.25,4078.25,4519.75,4432.50,4083.25,3437.50,2981.25,2689.00,2718.00,2431.75,2769.50,3071.50,3254.50,3126.25,2994.25,2831.00,2773.00,3009.75
