In [1]:
import os, sys
import plotly.express as px
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import darts
from darts.dataprocessing.transformers.boxcox import BoxCox
from darts.models import LightGBMModel, XGBModel, LinearRegressionModel, TFTModel, NHiTSModel, RNNModel, TFTModel
from darts.metrics import smape, mape, mase, mse, rmse, r2_score, mae
from darts.dataprocessing.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler, RobustScaler   
from darts.dataprocessing.transformers.scaler import Scaler
from darts.utils.missing_values import extract_subseries

from pytorch_lightning.loggers import WandbLogger
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from torch.optim.lr_scheduler import ReduceLROnPlateau
from pytorch_lightning.callbacks import ModelCheckpoint
import torch
from wandb.xgboost import WandbCallback

import wandb
wandb.login()


import warnings
warnings.filterwarnings('ignore')

# Set seed
np.random.seed(42)

# Set working directory
os.chdir(r"..") # should be the git repo root directory
print("Current working directory: " + os.getcwd())
repo_name = 'net-load-forecasting'
assert os.getcwd()[-len(repo_name):] == "net-load-forecasting", "Working directory is not the git repo root directory"


from utils.utils import *

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mnikolaushouben[0m ([33mwattcast[0m). Use [1m`wandb login --relogin`[0m to force relogin


Current working directory: c:\Users\nik\Desktop\Berkeley_Projects\net-load-forecasting


In [2]:
clean_data_path = os.path.join(os.getcwd(),'data','clean_data')

In [3]:
# run parameters

config_dataset = {
    'spatial_scale': '1_county',
    'METER': '2',
    'META': '1',
}

config_dataset['temp_resolution'] = 1 if config_dataset['METER'] == '1' else 15 # in minutes


config_modeldesign = {'boxcox': False,
                    'horizon_in_hours': 24, # in hours
                    'lookback_in_hours': 36, # in hours
                    'liklihood': None,
                    'holiday': True,
                    'pv_data': True, # true if pv data is used, false if irradiance data is used
                    'datetime_encodings': False,
                    }
                   

if config_dataset['temp_resolution'] == 1:
     timestep_encoding = timestep_encoding = ["hour", "minute"]
elif config_dataset['temp_resolution'] == 15:
     timestep_encoding = ['quarter']


config_encoders =  {
                    "cyclic": {"past": timestep_encoding}, 
                    "position": {"past": ["relative",]},
                    "datetime_attribute": {"past": ["dayofweek", "week"]},
                    'position': {'past': ['relative']},
            }



In [4]:
# calculate derived parameters
datetime_encoders = config_encoders if config_modeldesign['datetime_encodings'] else None
timesteps_per_hour = int(60 / config_dataset['temp_resolution'])
n_lags = config_modeldesign['lookback_in_hours'] * timesteps_per_hour
n_ahead = config_modeldesign['horizon_in_hours'] * timesteps_per_hour
list_metrics = [smape, mape, rmse, r2_score, mae] # evaluation metrics
eval_stride = int(np.sqrt(n_ahead)) # evaluation stride, how often to evaluate the model, in this case we evaluate every n_ahead steps

df = pd.read_hdf(os.path.join(clean_data_path, "data_net_load_forecasting.h5"), key=f"{config_dataset['temp_resolution']}min/netload")

df_pv = pd.read_hdf(os.path.join(clean_data_path, "data_net_load_forecasting.h5"), key=f"{config_dataset['temp_resolution']}min/community_pv")

df_meta = pd.read_hdf(os.path.join(clean_data_path, "data_net_load_forecasting.h5"), key='pv_metadata')

df_irr = pd.read_hdf(os.path.join(clean_data_path, "data_net_load_forecasting.h5"), key=f"{config_dataset['temp_resolution']}min/weather")
df_irr.rename({'temperature': 'temp_air'}, axis=1, inplace=True)





In [275]:
if config_modeldesign['pv_data'] == True:
    df_cov = pv_generation_['META-'+config_dataset['META']].reindex(df.index)
else:
    df_cov = df_irr

In [276]:
train_end = '2015-10-01'
val_end = '2016-04-01'

df_train = df.loc[:train_end]
df_val = df.loc[train_end:val_end]
df_test = df.loc[val_end:]

df_cov_train = df_cov.loc[:train_end]
df_cov_val = df_cov.loc[train_end:val_end]
df_cov_test = df_cov.loc[val_end:]



In [277]:
# into darts format
ts_train = darts.TimeSeries.from_dataframe(df_train, freq=str(config_dataset['temp_resolution']) + 'min')
ts_train = extract_subseries(ts_train)
ts_val = darts.TimeSeries.from_dataframe(df_val, freq=str(config_dataset['temp_resolution']) + 'min')
ts_val = extract_subseries(ts_val)
ts_test = darts.TimeSeries.from_dataframe(df_test, freq=str(config_dataset['temp_resolution']) + 'min')
ts_test = extract_subseries(ts_test)

# Covariates
ts_cov_train = darts.TimeSeries.from_dataframe(df_cov_train, freq=str(config_dataset['temp_resolution']) + 'min')
ts_cov_val = darts.TimeSeries.from_dataframe(df_cov_val, freq=str(config_dataset['temp_resolution']) + 'min')
ts_cov_test = darts.TimeSeries.from_dataframe(df_cov_test, freq=str(config_dataset['temp_resolution']) + 'min')


# Reviewing subseries to make sure they are long enough
ts_train, ts_cov_train = review_subseries(ts_train, (n_lags +n_ahead), ts_cov_train)
ts_val, ts_cov_val = review_subseries(ts_val,  (n_lags +n_ahead), ts_cov_val)
ts_test, ts_cov_test = review_subseries(ts_test,  (n_lags +n_ahead), ts_cov_test)

# getting the index of the longest subseries, to be used for evaluation later
longest_ts_val_idx = get_longest_subseries_idx(ts_val)
longest_ts_test_idx = get_longest_subseries_idx(ts_test)

In [278]:
# Load pipeline
pipeline = Pipeline( # missing values have been filled in the 'data_prep.ipynb'
                    [
                    #BoxCox() if config_modeldesign['boxcox'] else None,
                    Scaler(MinMaxScaler()),
                    ]
                    )

ts_train_piped = pipeline.fit_transform(ts_train)
ts_val_piped = pipeline.transform(ts_val)
ts_test_piped = pipeline.transform(ts_test)

# Future Covariate Pipeline

pipeline_weather = Pipeline([Scaler(MinMaxScaler())])
ts_cov_train_piped = pipeline_weather.fit_transform(ts_cov_train)
ts_cov_val_piped = pipeline_weather.transform(ts_cov_val)
ts_cov_test_pipied = pipeline_weather.transform(ts_cov_test)


In [279]:
px.line(pd.concat([ts_train_piped[0].pd_dataframe(), ts_cov_train_piped[0].pd_dataframe()], axis=1))

In [280]:
trg_train_inversed = pipeline.inverse_transform(ts_train_piped, partial=True) # inverse transform the target, we need the original values for the evaluation
trg_val_inversed = pipeline.inverse_transform(ts_val_piped, partial=True)[longest_ts_val_idx] # inverse transform the target, we need the original values for the evaluation
trg_test_inversed = pipeline.inverse_transform(ts_test_piped, partial=True)[longest_ts_test_idx] # inverse transform the target, we need the original values for the evaluation

### XGBoost

In [284]:
xgb_model = LinearRegressionModel(lags=48,
                lags_future_covariates=[0],
                add_encoders=datetime_encoders, 
                output_chunk_length=n_ahead, 
                likelihood=None,
                random_state=42
                )

print("Training model...")
xgb_model.fit(ts_train_piped, future_covariates = ts_cov_train)


Training model...


<darts.models.forecasting.linear_regression_model.LinearRegressionModel at 0x17db45a38e0>

In [285]:

print("Evaluating model...")
predictions, score = predict_testset(xgb_model, 
                                ts_test_piped[longest_ts_test_idx][:1500], 
                                ts_cov_test[longest_ts_test_idx][:1500],
                                n_lags, n_ahead, eval_stride, pipeline,
                                )


print("Plotting predictions...")
df_compare = pd.concat([trg_test_inversed.pd_dataframe(), predictions], axis=1).dropna()
df_compare.columns = ['target', 'prediction']
fig = px.line(df_compare, title='Predictions vs. Test Set')

Evaluating model...
Plotting predictions...


In [286]:
fig.show()