In [39]:
import numpy as np
import pandas as pd

from energy_consumption.help_functions import get_forecast_timestamps
from energy_consumption.feature_selection.collect import dummy_mapping
from energy_consumption.feature_selection.collect import weather_sunhours


def get_energy_and_forecast_quantreg(energydata):

    energydf = energydata.copy()
    energyforecast = get_forecast_timestamps.forecast_timestamps(
        energydf.index[-1])
    energyforecast['energy_consumption'] = np.nan
    merged = pd.concat([energydf, energyforecast])

    merged.index = pd.to_datetime(merged.index)
    merged = dummy_mapping.get_day_mapping(merged)
    merged = dummy_mapping.get_hour_mapping(merged)
    merged = weather_sunhours.ec_sun_hours_merge(merged)

    merged['weekly_lag'] = merged['energy_consumption'].shift(168)
    merged['yearly_lag'] = merged['energy_consumption'].shift(8760)
    merged = merged[-1100:]

    merged.insert(loc=0, column='constant', value=1)

    energydf = merged[-1100:-100]
    energyforecast = merged[-100:].drop(columns=['energy_consumption'])

    return energydf, energyforecast

In [40]:
import pandas as pd
import numpy as np

import statsmodels.api as sm

from energy_consumption.feature_selection.extract import extract_energy_data, extract_all_features
from energy_consumption.help_functions import get_forecast_timestamps, create_submission_frame


def get_QuantRegExShort_forecasts(energydf=np.nan, indexes=[47, 51, 55, 71, 75, 79], quantiles=[0.025, 0.25, 0.5, 0.75, 0.975], abs_eval=False):

    if type(energydf) == float:
        energydf = extract_energy_data.get_data(num_years=2)

    energydata = energydf.copy()
    energydata, X_pred = get_energy_and_forecast_quantreg(energydata)

    X = energydata.drop(columns=['energy_consumption'])
    y = energydata['energy_consumption']
    
    # model
    model_qr = sm.QuantReg(y, X)

    # create dataframe to store forecast quantiles
    energyforecast = get_forecast_timestamps.forecast_timestamps(
        energydata.index[-1])

    for q in quantiles:
        model_temp = model_qr.fit(q=q)
        forecast_temp = model_temp.predict(X_pred)
        energyforecast[f'q{q}'] = forecast_temp

    first_name = f'q{quantiles[0]}'
    max_index = len(quantiles) - 1
    last_name = f'q{quantiles[max_index]}'

    selected_forecasts = energyforecast.loc[energyforecast.index[indexes],
                                            first_name:last_name]

    if abs_eval == False:
        selected_forecasts = create_submission_frame.get_frame(
            selected_forecasts)

    return selected_forecasts

In [8]:
energydf = extract_energy_data.get_data(num_years=2)
energydata = energydf.copy()


100%|██████████| 104/104 [01:13<00:00,  1.42it/s]


In [42]:
forecasts = get_QuantRegExShort_forecasts(energydf)
forecasts

2022-02-14 00:00:00
2024-02-12 16:00:00


Unnamed: 0,date_time,forecast_date,target,horizon,q0.025,q0.25,q0.5,q0.75,q0.975
0,2024-02-09 12:00:00,2024-02-07,energy,36 hour,66.291027,69.492307,70.399522,71.245384,73.650108
1,2024-02-09 16:00:00,2024-02-07,energy,40 hour,62.713689,64.850149,66.280086,66.714746,69.052795
2,2024-02-09 20:00:00,2024-02-07,energy,44 hour,62.289852,64.169611,64.998062,65.33034,68.439399
3,2024-02-10 12:00:00,2024-02-07,energy,60 hour,56.725301,61.068296,62.336159,63.638844,68.085547
4,2024-02-10 16:00:00,2024-02-07,energy,64 hour,53.182459,56.670767,58.474441,59.303377,63.522849
5,2024-02-10 20:00:00,2024-02-07,energy,68 hour,51.578502,55.287674,56.708026,57.211346,61.80895


In [26]:
type(energydata.index)
type(energydf.index)

pandas.core.indexes.datetimes.DatetimeIndex

In [15]:
energydata = get_energy_and_forecast_quantreg(energydf)

#X = energydata.drop(columns=['energy_consumption'])
#y = energydata['energy_consumption']


Unnamed: 0_level_0,constant,period1,period2,period3,period4,period5,period6,saturday,working_day,holiday,sun_hours
date_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2023-11-15 00:00:00,1,0,0,0,0,0,0,0,1,0,9.070278
2023-11-15 01:00:00,1,0,1,0,0,0,0,0,1,0,9.070278
2023-11-15 02:00:00,1,0,1,0,0,0,0,0,1,0,9.070278
2023-11-15 03:00:00,1,0,1,0,0,0,0,0,1,0,9.070278
2023-11-15 04:00:00,1,0,1,0,0,0,0,0,1,0,9.070278
...,...,...,...,...,...,...,...,...,...,...,...
2024-02-07 08:00:00,1,0,0,1,0,0,0,0,1,0,9.640278
2024-02-07 09:00:00,1,0,0,0,0,0,0,0,1,0,9.640278
2024-02-07 10:00:00,1,0,0,0,0,0,1,0,1,0,9.640278
2024-02-07 11:00:00,1,0,0,0,0,0,1,0,1,0,9.640278


In [16]:
energydata

Unnamed: 0,date_time,energy_consumption
0,2022-02-14 00:00:00,52.70975
1,2022-02-14 01:00:00,51.26975
2,2022-02-14 02:00:00,50.72800
3,2022-02-14 03:00:00,51.08425
4,2022-02-14 04:00:00,52.70100
...,...,...
17460,2024-02-11 12:00:00,
17461,2024-02-11 13:00:00,
17462,2024-02-11 14:00:00,
17463,2024-02-11 15:00:00,


In [3]:
energydata = extract_all_features.get_energy_and_features(energydata,
                                                          feature_selection=True)

2023-11-13 00:00:00
2024-02-08 12:00:00


In [6]:
import pandas as pd
import numpy as np

from sklearn.ensemble import GradientBoostingRegressor

from energy_consumption.feature_selection.extract import extract_energy_data
from energy_consumption.help_functions import create_submission_frame
from energy_consumption.models.XGBoost.functions import get_energy_and_forecast, get_opt_parameters


def get_XGBoost_forecasts(energydf=np.nan, indexes=[47, 51, 55, 71, 75, 79], quantiles=[0.025, 0.25, 0.5, 0.75, 0.975], periods=100, abs_eval=False):

    if type(energydf) == float:
        energydf = extract_energy_data.get_data(num_years=2)

    energydata = energydf.copy()
    energydata, X_pred = get_energy_and_forecast(energydata)

    X = energydata.drop(columns=['energy_consumption'])
    y = energydata['energy_consumption']

    return X

In [7]:
get_XGBoost_forecasts()

100%|██████████| 104/104 [01:08<00:00,  1.52it/s]


Unnamed: 0_level_0,hour,day_of_week,weekly_lag,yearly_lag,index
date_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-11-09 13:00:00,13,3,66.10125,64.33525,1
2023-11-09 14:00:00,14,3,64.85050,63.10050,2
2023-11-09 15:00:00,15,3,63.62850,62.53325,3
2023-11-09 16:00:00,16,3,63.89550,63.21525,4
2023-11-09 17:00:00,17,3,65.77275,65.81500,5
...,...,...,...,...,...
2024-02-07 08:00:00,8,2,68.53025,66.27050,2156
2024-02-07 09:00:00,9,2,69.11000,66.10575,2157
2024-02-07 10:00:00,10,2,70.08825,66.57900,2158
2024-02-07 11:00:00,11,2,71.09600,67.21500,2159


In [5]:
X = energydata.drop(columns=[
                        'energy_consumption', 'population', 'spring_autumn', 'abs_log_ret_weekly', 'index', 'winter', 'tavg', 'wspd'])
X.insert(loc=0, column='constant', value=1)
X

Unnamed: 0_level_0,constant,period1,period2,period3,period4,period5,period6,saturday,working_day,holiday,sun_hours
date_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2023-11-15 00:00:00,1,0,0,0,0,0,0,0,1,0,9.070278
2023-11-15 01:00:00,1,0,1,0,0,0,0,0,1,0,9.070278
2023-11-15 02:00:00,1,0,1,0,0,0,0,0,1,0,9.070278
2023-11-15 03:00:00,1,0,1,0,0,0,0,0,1,0,9.070278
2023-11-15 04:00:00,1,0,1,0,0,0,0,0,1,0,9.070278
...,...,...,...,...,...,...,...,...,...,...,...
2024-02-07 08:00:00,1,0,0,1,0,0,0,0,1,0,9.640278
2024-02-07 09:00:00,1,0,0,0,0,0,0,0,1,0,9.640278
2024-02-07 10:00:00,1,0,0,0,0,0,1,0,1,0,9.640278
2024-02-07 11:00:00,1,0,0,0,0,0,1,0,1,0,9.640278


In [None]:
if type(energydf) == float:
        # use derived optimum for number of years (see notebook)
        energydf = extract_energy_data.get_data(num_years=0.25)

    energydata = energydf.copy()
    # get features
    if len(energydata) > 1000:
        energydata = extract_all_features.get_energy_and_features(energydata,
                                                                  feature_selection=True)[-1000:]
    else:
        energydata = extract_all_features.get_energy_and_features(energydata,
                                                                  feature_selection=True)

    # new: drop index und winter, since they are not important for monthly forecasts
    X = energydata.drop(columns=[
                        'energy_consumption', 'population', 'spring_autumn', 'abs_log_ret_weekly', 'index', 'winter', 'tavg', 'wspd'])
    X.insert(loc=0, column='constant', value=1)
    y = energydata['energy_consumption']

    # create dataframe to store forecast quantiles
    energyforecast = get_forecast_timestamps.forecast_timestamps(
        energydata.index[-1])

    X_pred = extract_all_features.get_energy_and_features(energyforecast,
                                                          feature_selection=True)

    for col in ['population', 'spring_autumn', 'abs_log_ret_weekly', 'index', 'winter', 'tavg', 'wspd']:
        if col in X_pred.columns:
            X_pred = X_pred.drop(columns=[col])
    X_pred.insert(loc=0, column='constant', value=1)