In [1]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from sklearn.preprocessing import StandardScaler, PowerTransformer

from tot.datasets.dataset import Dataset
from tot.models.models_darts import RandomForestModel, LinearRegressionModel
from tot.models.models_neuralprophet import NeuralProphetModel
from tot.benchmark import SimpleBenchmark
from tot.plotting import plot_plotly

In [2]:
data_location = "https://raw.githubusercontent.com/ourownstory/neuralprophet-data/main/datasets/"
df_ercot = pd.read_csv(data_location + "multivariate/load_ercot_regions.csv")

In [3]:
regions = list(df_ercot)[1:]

In [4]:
df_global = pd.DataFrame()
for col in regions:
    aux = df_ercot[["ds", col]].copy(deep=True)  
    aux = aux.copy(deep=True) 
    aux = aux.iloc[:26301, :].copy(deep=True) 
    aux = aux.rename(columns={col: "y"})
    aux["ID"] = col
    df_global = pd.concat((df_global, aux))

In [5]:
dataset_list = [
    Dataset(df=df_global, name="df_global", freq="H"),
]

In [6]:
N_FORECASTS = 1
LR = 0.01
EPOCHS = 20

In [7]:
model_classes_and_params = [
#     (RandomForestModel, {"scaler": StandardScaler, "scaling_level": "per_dataset", "n_forecasts": N_FORECASTS, "lags": 24, "output_chunk_length": N_FORECASTS}),
#     (RandomForestModel, {"scaler": StandardScaler, "scaling_level": "per_time_series", "n_forecasts": N_FORECASTS, "lags": 24, "output_chunk_length": N_FORECASTS}),
    (NeuralProphetModel, {
        "scaler": StandardScaler(), 
        "scaling_level": "per_time_series",
        "n_lags": 24,
        "n_forecasts": N_FORECASTS,
        "learning_rate": LR,
        "epochs": EPOCHS, 
        "normalize": "off", 
        "weighted_loss": True}),
    (NeuralProphetModel, {
        "scaler": StandardScaler(), 
        "scaling_level": "per_dataset",
        "n_lags": 24,
        "n_forecasts": N_FORECASTS,
        "learning_rate": LR,
        "epochs": EPOCHS, 
        "normalize": "off"}),
    (NeuralProphetModel, {
        "scaler": StandardScaler(), 
        "scaling_level": "per_time_series",
        "n_lags": 24,
        "n_forecasts": N_FORECASTS,
        "learning_rate": LR,
        "epochs": EPOCHS, 
        "normalize": "off"}),
    (NeuralProphetModel, {
        "n_lags": 24,
        "n_forecasts": N_FORECASTS,
        "learning_rate": LR,
        "epochs": EPOCHS, 
        "normalize": "off"}),
]

In [8]:
benchmark = SimpleBenchmark(
    model_classes_and_params=model_classes_and_params,
    datasets=dataset_list,
    metrics=["MSE", "MASE"],
    test_percentage=0.25,
)

In [None]:
results_train, results_test = benchmark.run(verbose=True)

3 missing values in column y were detected in total. 
3 missing values in column y were detected in total. 
3 missing values in column y were detected in total. 
3 missing values in column y were detected in total. 
3 missing values in column y were detected in total. 
3 missing values in column y were detected in total. 
3 missing values in column y were detected in total. 
3 missing values in column y were detected in total. 
INFO - (NP.df_utils._infer_frequency) - Major frequency H corresponds to 99.995% of the data.
INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - H
INFO - (NP.df_utils._infer_frequency) - Major frequency H corresponds to 99.995% of the data.
INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - H
INFO - (NP.df_utils._infer_frequency) - Major frequency H corresponds to 99.995% of the data.
INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - H
INFO - (NP.df_ut

In [None]:
results_train

In [None]:
results_test

In [None]:
def plot_forecast(df, regions=regions):
    fig = go.Figure()

    for region in regions:
        fig.add_trace(
            go.Scatter(
                name=region + ' predicted', 
                x=df[df['ID'] == region]['ds'], 
                y=df[df['ID'] == region]['yhat1']))
        fig.add_trace(
            go.Scatter(
                name=region + ' actual', 
                mode="markers",
                x=df[df['ID'] == region]['ds'], 
                y=df[df['ID'] == region]['y']))
    fig.show()

In [None]:
plot_forecast(benchmark.fcst_test[0], regions=["NORTH_C", "WEST"])

In [None]:
plot_forecast(benchmark.fcst_test[1], regions=["NORTH_C", "WEST"])