In [143]:
import numpy as np
import pandas as pd

from treen import load_example
import enflow as ef
import gymnasium as gym

### Step 1) Define the energy system 

In [144]:
df = load_example('gefcom2014-wind')

windfarms = []
for name in df.columns.levels[0]:
    windfarms.append(ef.WindFarm(capacity=1, name=name))

portfolio = ef.Portfolio(assets=windfarms)

In [145]:
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Site1,Site1,Site1,Site1,Site1,Site2,Site2,Site2,Site2,Site2,...,Site9,Site9,Site9,Site9,Site9,Site10,Site10,Site10,Site10,Site10
Unnamed: 0_level_1,Unnamed: 1_level_1,Power,U10,U100,V10,V100,Power,U10,U100,V10,V100,...,Power,U10,U100,V10,V100,Power,U10,U100,V10,V100
ref_datetime,valid_datetime,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
2012-01-01 01:00:00,2012-01-01 01:00:00,0.0,2.1246,2.86428,-2.681966,-3.666076,0.596273,-0.171642,-0.112594,-5.466031,-7.101347,...,0.0,1.903254,2.516723,-1.495002,-2.078063,0.594361,1.65579,2.024257,-4.649593,-5.992232
2012-01-01 01:00:00,2012-01-01 02:00:00,0.054879,2.521695,3.344859,-1.79696,-2.464761,0.41118,-0.088859,-0.0141,-4.643205,-5.896778,...,0.0,2.173296,2.837109,-0.643641,-1.006914,0.569679,2.178609,2.623141,-4.184944,-5.318086
2012-01-01 01:00:00,2012-01-01 03:00:00,0.110234,2.67221,3.508448,-0.822516,-1.214093,0.167243,-0.246014,-0.225238,-3.613395,-4.489369,...,0.0,2.312744,3.034096,0.136595,-0.01549,0.330539,2.228633,2.645217,-3.242442,-4.052232
2012-01-01 01:00:00,2012-01-01 04:00:00,0.165116,2.457504,3.215233,-0.143642,-0.355546,0.037326,-0.680738,-0.772046,-2.919615,-3.598824,...,0.0,2.217565,2.941829,0.797156,0.823531,0.211308,2.077232,2.432947,-2.345085,-2.913831
2012-01-01 01:00:00,2012-01-01 05:00:00,0.15694,2.245898,2.957678,0.389576,0.332701,0.051206,-1.261106,-1.487537,-2.619786,-3.244667,...,0.0,1.900598,2.559177,1.285656,1.471102,0.17214,1.814716,2.097844,-1.577625,-1.9761


In [146]:
dataset = ef.Dataset(name="gefcom2024",
                     description="Data provided by the organisers of HEFTCom2024. Participants are free to use additional external data.",
                     energy_system=portfolio,
                     data={"data_gefcom2014_wind": df})

### Step 2) Define state, exogeneous and action spaces

In [147]:
state_space = ef.DataFrameSpace({asset.name: {
    'Power': gym.spaces.Box(low=0, high=1, shape=(1,), dtype=np.float32)
} for asset in portfolio.assets})

exogeneous_space = ef.DataFrameSpace({asset.name: {
    'U10': gym.spaces.Box(low=-np.inf, high=np.inf, shape=(1,), dtype=np.float32),
    'V10': gym.spaces.Box(low=-np.inf, high=np.inf, shape=(1,), dtype=np.float32),
    'U100': gym.spaces.Box(low=-np.inf, high=np.inf, shape=(1,), dtype=np.float32),
    'V100': gym.spaces.Box(low=-np.inf, high=np.inf, shape=(1,), dtype=np.float32)
} for asset in portfolio.assets})

action_space = ef.DataFrameSpace({asset.name: {
    f"Quantile_forecast_{i+1}": gym.spaces.Box(low=0, high=1, shape=(1,)) for i in range(99)
} for asset in portfolio.assets})


In [148]:
action_space.sample(n_rows=3)

Unnamed: 0_level_0,Site1,Site1,Site1,Site1,Site1,Site1,Site1,Site1,Site1,Site1,...,Site9,Site9,Site9,Site9,Site9,Site9,Site9,Site9,Site9,Site9
Unnamed: 0_level_1,Quantile_forecast_1,Quantile_forecast_10,Quantile_forecast_11,Quantile_forecast_12,Quantile_forecast_13,Quantile_forecast_14,Quantile_forecast_15,Quantile_forecast_16,Quantile_forecast_17,Quantile_forecast_18,...,Quantile_forecast_90,Quantile_forecast_91,Quantile_forecast_92,Quantile_forecast_93,Quantile_forecast_94,Quantile_forecast_95,Quantile_forecast_96,Quantile_forecast_97,Quantile_forecast_98,Quantile_forecast_99
0,0.837348,0.360967,0.894261,0.045181,0.924268,0.207248,0.039155,0.592308,0.69328,0.067031,...,0.579525,0.407737,0.636381,0.99748,0.911098,0.40029,0.149113,0.82628,0.198423,0.001391
1,0.682897,0.40832,0.821909,0.505007,0.43475,0.653865,0.947773,0.924092,0.576941,0.096989,...,0.004608,0.561656,0.799356,0.214474,0.010241,0.507337,0.554264,0.283537,0.76009,0.423908
2,0.348161,0.74832,0.04788,0.846367,0.670771,0.775054,0.563875,0.066391,0.871596,0.14702,...,0.875101,0.937921,0.96795,0.835396,0.533905,0.786277,0.144454,0.90971,0.477578,0.858335


In [149]:
exogeneous_space.sample(n_rows=3)

Unnamed: 0_level_0,Site1,Site1,Site1,Site1,Site10,Site10,Site10,Site10,Site2,Site2,...,Site7,Site7,Site8,Site8,Site8,Site8,Site9,Site9,Site9,Site9
Unnamed: 0_level_1,U10,U100,V10,V100,U10,U100,V10,V100,U10,U100,...,V10,V100,U10,U100,V10,V100,U10,U100,V10,V100
0,-0.735895,-1.001889,-0.040306,0.337505,1.245916,2.007525,-0.54237,-0.262513,0.105425,0.032516,...,-0.384823,0.396762,0.401403,-0.772699,-1.194904,0.8704,0.65873,-0.039183,1.126278,0.836198
1,0.226214,-0.417589,0.445117,-0.477045,1.454729,-0.056192,-0.995153,-1.287023,0.551928,-0.478411,...,0.485367,0.022178,-0.387284,-0.360958,0.274118,0.090201,0.84547,-0.582151,-0.305689,0.193123
2,-0.920413,0.692832,-0.535392,-0.627698,0.070358,0.835419,-1.110435,0.709685,-1.470986,1.205148,...,0.652099,0.06217,-0.777496,-1.365857,-0.787745,-1.003496,0.885643,1.205912,-0.824563,0.702019


In [150]:
action_space.sample(n_rows=3)

Unnamed: 0_level_0,Site1,Site1,Site1,Site1,Site1,Site1,Site1,Site1,Site1,Site1,...,Site9,Site9,Site9,Site9,Site9,Site9,Site9,Site9,Site9,Site9
Unnamed: 0_level_1,Quantile_forecast_1,Quantile_forecast_10,Quantile_forecast_11,Quantile_forecast_12,Quantile_forecast_13,Quantile_forecast_14,Quantile_forecast_15,Quantile_forecast_16,Quantile_forecast_17,Quantile_forecast_18,...,Quantile_forecast_90,Quantile_forecast_91,Quantile_forecast_92,Quantile_forecast_93,Quantile_forecast_94,Quantile_forecast_95,Quantile_forecast_96,Quantile_forecast_97,Quantile_forecast_98,Quantile_forecast_99
0,0.923784,0.562542,0.837868,0.459868,0.384162,0.456399,0.08197,0.121961,0.214223,0.104984,...,0.3413,0.415192,0.872151,0.92447,0.970467,0.533539,0.072975,0.948833,0.829753,0.727098
1,0.304727,0.704436,0.080276,0.218127,0.984153,0.859901,0.666226,0.726099,0.960146,0.08859,...,0.461526,0.035954,0.708767,0.324796,0.548651,0.339458,0.68628,0.412606,0.944068,0.009562
2,0.224871,0.788183,0.271202,0.622689,0.776736,0.752829,0.07705,0.464065,0.082363,0.319574,...,0.448183,0.053635,0.032816,0.106432,0.902714,0.4545,0.242988,0.432172,0.341226,0.663856


### Step 3) Create environment

In [159]:
class GEFCom2014Wind(gym.Env):
    def __init__(self, dataset: ef.Dataset): 
        self.dataset = dataset
        self.data = dataset.data["data_gefcom2014_wind"]
        self.state_space = state_space
        self.exogeneous_space = exogeneous_space
        self.action_space = action_space
        self.idx_counter = 0

        self.train = [["2012-01-01 01:00:00", "2012-10-01 00:00:00"],
                      ["2012-10-01 01:00:00", "2012-11-01 00:00:00"],
                      ["2012-11-01 01:00:00", "2012-12-01 00:00:00"],
                      ["2012-12-01 01:00:00", "2013-01-01 00:00:00"],
                      ["2013-01-01 01:00:00", "2013-02-01 00:00:00"],
                      ["2013-02-01 01:00:00", "2013-03-01 00:00:00"],
                      ["2013-03-01 01:00:00", "2013-04-01 00:00:00"],
                      ["2013-04-01 01:00:00", "2013-05-01 00:00:00"],
                      ["2013-05-01 01:00:00", "2013-06-01 00:00:00"],
                      ["2013-06-01 01:00:00", "2013-07-01 00:00:00"],
                      ["2013-07-01 01:00:00", "2013-08-01 00:00:00"],
                      ["2013-08-01 01:00:00", "2013-09-01 00:00:00"],
                      ["2013-09-01 01:00:00", "2013-10-01 00:00:00"],
                      ["2013-10-01 01:00:00", "2013-11-01 00:00:00"],
                      ["2013-11-01 01:00:00", "2013-12-01 00:00:00"]]
        
        self.test = [["2012-10-01 01:00:00", "2012-11-01 00:00:00"],
                     ["2012-11-01 01:00:00", "2012-12-01 00:00:00"],
                     ["2012-12-01 01:00:00", "2013-01-01 00:00:00"],
                     ["2013-01-01 01:00:00", "2013-02-01 00:00:00"],
                     ["2013-02-01 01:00:00", "2013-03-01 00:00:00"],
                     ["2013-03-01 01:00:00", "2013-04-01 00:00:00"],
                     ["2013-04-01 01:00:00", "2013-05-01 00:00:00"],
                     ["2013-05-01 01:00:00", "2013-06-01 00:00:00"],
                     ["2013-06-01 01:00:00", "2013-07-01 00:00:00"],
                     ["2013-07-01 01:00:00", "2013-08-01 00:00:00"],
                     ["2013-08-01 01:00:00", "2013-09-01 00:00:00"],
                     ["2013-09-01 01:00:00", "2013-10-01 00:00:00"],
                     ["2013-10-01 01:00:00", "2013-11-01 00:00:00"],
                     ["2013-11-01 01:00:00", "2013-12-01 00:00:00"],
                     ["2013-12-01 01:00:00", "2014-01-01 00:00:00"]]
                
    def reset(self, return_dataframe=False):
        self.idx_counter = 0
        initial_dataframe = df.loc[(df.index.get_level_values('valid_datetime') >= self.train[0][0]) &
                                   (df.index.get_level_values('valid_datetime') <= self.train[0][1])]

        return initial_dataframe

    def step(self, action=None):
        
        next_state = df.loc[(df.index.get_level_values('valid_datetime') >= self.train[self.idx_counter+1][0]) &
                           (df.index.get_level_values('valid_datetime') <= self.train[self.idx_counter+1][1]),
                           pd.IndexSlice[:, 'Power']]

        next_exogenous = df.loc[(df.index.get_level_values('valid_datetime') >= self.test[self.idx_counter+1][0]) &
                    (df.index.get_level_values('valid_datetime') <= self.test[self.idx_counter+1][1]),
                    pd.IndexSlice[:, ['U10', 'V10', 'U100', 'V100']]] 

        done = True if self.idx_counter+2 == len(self.train) else False
        self.idx_counter += 1
        
        return next_state, next_exogenous, done

### Step 4) Create the model


In [161]:
from enflow.problems.objective import PinballLoss

### Step 5) Create the model


In [262]:
import lightgbm as lgb
import pandas as pd

class CatboostGEFCom2014(ef.Predictor):
    def __init__(self, quantiles=None):
        """
        Initialize the Predictor class.
        
        Args:
            quantiles (list): List of quantiles for which to create separate models.
                              Example: [0.1, 0.5, 0.9]
        """
        self.models = {}  # Dictionary to hold models for each site and quantile
        self.quantiles = [0.1, 0.5, 0.9] 
        
    def train(self, features: pd.DataFrame, target: pd.DataFrame, **kwargs):
        """
        Train separate LightGBM models for each site and quantile.
        
        Args:
            features (pd.DataFrame): Multi-indexed dataframe where the top-level index corresponds to sites.
            target (pd.DataFrame): The target dataframe (y), also multi-indexed by site.
            kwargs: Additional parameters to pass to the LightGBMRegressor model.
        """
        # Get the list of unique sites from the multi-index (top level)
        sites = features.columns.get_level_values(0).unique()
        feature_names = features.columns.get_level_values(1).unique()

        # Loop over each site
        for site in sites:
            # Extract the features and target for the current site
            site_features = features.xs(site, axis=1, level=0)
            site_target = target.xs(site, axis=1, level=0)

            # Loop over each quantile
            for quantile in self.quantiles:
                # Initialize a LightGBM model for this quantile
                params = {'objective': 'quantile', 'alpha': quantile, "verbose": -1}
                params.update(kwargs)  # Add any additional LightGBM parameters
                
                model = lgb.LGBMRegressor(**params)
                
                # Train the model on the site's data
                model.fit(site_features, site_target)
                
                # Store the trained model with a key (site, quantile)
                self.models[(site, quantile)] = model
                
    def predict(self, features: pd.DataFrame):
        """
        Make predictions for all sites and quantiles using the trained models.

        Args:
            features (pd.DataFrame): The feature dataframe (X), multi-indexed by site.

        Returns:
            pd.DataFrame: Multi-indexed DataFrame with predictions for each site and quantile.
        """
        # Get the list of unique sites from the multi-index (top level)
        sites = features.columns.get_level_values(0).unique()

        # Create a dictionary to store predictions
        predictions_dict = {}

        # Loop over each site and quantile
        for site in sites:
            # Extract the features for the current site
            site_features = features.xs(site, axis=1, level=0)

            for quantile in self.quantiles:
                # Check if the model for the given site and quantile exists
                if (site, quantile) not in self.models:
                    raise ValueError(f"No trained model for site '{site}' and quantile '{quantile}'.")

                # Make predictions using the stored model
                model = self.models[(site, quantile)]
                site_predictions = model.predict(site_features)

                # Store predictions in the dictionary with multi-index structure (site, quantile)
                predictions_dict[(site, quantile)] = site_predictions

        # Convert the predictions dictionary to a pandas DataFrame with multi-index columns
        predictions_df = pd.DataFrame(predictions_dict)

        # Set the multi-index columns (site, quantile) for the predictions DataFrame
        predictions_df.columns = pd.MultiIndex.from_tuples(predictions_df.columns, names=["Site", "Quantile"])

        return predictions_df


    def predict(self, features: pd.DataFrame):
        """
        Make predictions for a specific site and quantile using the trained model.
        
        Args:
            features (pd.DataFrame): The feature dataframe (X), multi-indexed by site.
            site (str): The site for which to make predictions.
            quantile (float): The quantile for which to make predictions.
        
        Returns:
            np.array: Predictions from the model.
        """

        # Create a nested dictionary to store predictions
        predictions = {}

        # Extract the features for the specific site
        sites = features.columns.get_level_values(0).unique()

        # Loop over each site and quantile
        for site in sites:
            # Extract the features for the current site
            site_features = features.xs(site, axis=1, level=0)

            # Initialize an inner dictionary for each site

            for quantile in self.quantiles:
                # Check if the model for the given site and quantile exists
                if (site, quantile) not in self.models:
                    raise ValueError(f"No trained model for site '{site}' and quantile '{quantile}'.")

                # Make predictions using the stored model
                model = self.models[(site, quantile)]
                site_predictions = model.predict(site_features)

                # Store the predictions under the quantile for the current site
                predictions[(site, f"quantile_{round(100*quantile)}")] = site_predictions

        # Convert the nested dictionary to a DataFrame with multi-index columns
        predictions = pd.DataFrame.from_dict(predictions)
        predictions.index = features.index
        
        return predictions


In [263]:
predictor = CatboostGEFCom2014()
predictor.train(features=df.loc[:,(slice(None), ["U10", "V10"])], 
                target=df.loc[:,(slice(None), "Power")])

In [264]:
predictor.predict(features=df.loc[:,(slice(None), ["U10", "V10"])])

Unnamed: 0_level_0,Unnamed: 1_level_0,Site1,Site1,Site1,Site2,Site2,Site2,Site3,Site3,Site3,Site4,...,Site7,Site8,Site8,Site8,Site9,Site9,Site9,Site10,Site10,Site10
Unnamed: 0_level_1,Unnamed: 1_level_1,quantile_10,quantile_50,quantile_90,quantile_10,quantile_50,quantile_90,quantile_10,quantile_50,quantile_90,quantile_10,...,quantile_90,quantile_10,quantile_50,quantile_90,quantile_10,quantile_50,quantile_90,quantile_10,quantile_50,quantile_90
ref_datetime,valid_datetime,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
2012-01-01 01:00:00,2012-01-01 01:00:00,0.019041,0.182683,0.450023,0.321280,0.698802,0.895029,0.316881,0.602427,0.857842,0.071743,...,0.083236,0.000000e+00,-0.000519,0.079825,0.000000e+00,0.080557,0.219355,3.756120e-01,0.702652,0.952726
2012-01-01 01:00:00,2012-01-01 02:00:00,0.018177,0.183932,0.478619,0.256163,0.648614,0.843780,0.164322,0.364693,0.723257,0.005650,...,0.099903,0.000000e+00,-0.000496,0.113907,1.903292e-04,0.100236,0.303851,2.072917e-01,0.596181,0.957036
2012-01-01 01:00:00,2012-01-01 03:00:00,0.038226,0.236469,0.520034,0.075746,0.452458,0.770182,0.082339,0.288610,0.609236,0.002001,...,0.131530,-9.675213e-07,0.050910,0.159496,5.598080e-04,0.104716,0.291445,1.104306e-01,0.328957,0.891124
2012-01-01 01:00:00,2012-01-01 04:00:00,0.015072,0.156671,0.380125,0.033320,0.327028,0.641561,0.049286,0.258867,0.484978,0.001030,...,0.246097,1.167793e-02,0.105531,0.249238,-5.457183e-09,0.092971,0.306339,1.000524e-02,0.238517,0.819778
2012-01-01 01:00:00,2012-01-01 05:00:00,0.011781,0.143335,0.423033,0.040495,0.239598,0.560727,0.016513,0.253836,0.481383,0.001397,...,0.358210,2.858782e-02,0.143353,0.386800,0.000000e+00,0.009984,0.193305,1.279349e-07,0.061517,0.465009
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2013-12-31 01:00:00,2013-12-31 20:00:00,0.129919,0.555189,0.862168,0.201635,0.756356,0.938464,0.089024,0.424835,0.756011,0.578162,...,0.798065,7.937297e-02,0.654637,0.858155,1.955738e-01,0.670117,0.941839,5.128547e-01,0.873424,0.965744
2013-12-31 01:00:00,2013-12-31 21:00:00,0.148537,0.595106,0.879154,0.406811,0.740943,0.941219,0.088832,0.521981,0.867599,0.578162,...,0.831177,9.394141e-02,0.738395,0.912082,1.955738e-01,0.680668,0.916091,5.128547e-01,0.884382,0.979357
2013-12-31 01:00:00,2013-12-31 22:00:00,0.261004,0.588266,0.939648,0.379019,0.630665,0.860346,0.171682,0.628052,0.894931,0.578162,...,0.679034,1.322866e-01,0.455612,0.776017,1.955738e-01,0.692873,0.891373,1.680033e-01,0.785204,0.980317
2013-12-31 01:00:00,2013-12-31 23:00:00,0.040923,0.188008,0.422063,0.224493,0.556199,0.871211,0.360569,0.704741,0.940998,0.686719,...,0.574843,6.433630e-02,0.345886,0.667089,1.126636e-01,0.353066,0.668032,1.261548e-01,0.558438,0.956885


In [261]:
# Next steps: 
# 1) Run initial training and evaluate the scores
# 2) Implement the step function and calculate scores in a loop
# 3) Finally compare with the performance of the participants in the competition


### Step 6) Run the sequential decision loop and model evaluate performance

In [208]:
env = GEFCom2014Wind(dataset=dataset)
initial_df = env.reset()
initial_features = initial_df.loc[:,(slice(None), ["U10", "V10"])]
initial_target = initial_df.loc[:,(slice(None), "Power")]
predictor.train(features=initial_features, target=initial_target)

In [210]:
predictor

TypeError: 'CatboostGEFCom2014' object is not callable

In [137]:
state, exogenous, done = env.step()