In [None]:
import pandas as pd
from gluonts.dataset.pandas import PandasDataset
from gluonts.torch import DeepAREstimator
import numpy as np
from fusiontimeseries.finetuning.preprocessing.utils import get_valid_flux_traces

In [None]:
from datetime import datetime


flux_data: dict[int, np.ndarray] = get_valid_flux_traces()

id_records = []
for item_id, flux_trace in flux_data.items():
    for t in range(flux_trace.shape[0]):
        id_records.append(
            {
                "item_id": item_id,
                "time_idx": t,
                "target": flux_trace[t],
            }
        )
df = pd.DataFrame(id_records)

df = df.sort_values(["item_id", "time_idx"])

df["timestamp"] = pd.to_datetime(datetime(2000, 1, 1)) + pd.to_timedelta(
    df["time_idx"], unit="h"
)
df.head()

In [None]:
dataset = PandasDataset.from_long_dataframe(
    df,
    item_id="item_id",
    timestamp="timestamp",
    target="target",
    freq="h",  # frequency of the generated time index
)

In [None]:
# create training dataset
from dataclasses import dataclass
from datetime import datetime
from gluonts.dataset.split import (
    AbstractBaseSplitter,
    TrainingDataset,
    DataEntry,
    slice_data_entry,
    FieldName,
)


@dataclass
class DummySplitter(AbstractBaseSplitter):
    """
    A dummy splitter that does not acutally split but is required to create a TrainingDataset.
    """

    def training_entry(self, entry: DataEntry) -> DataEntry:
        return entry

    def test_pair(
        self, entry: DataEntry, prediction_length: int, offset: int = 0
    ) -> tuple[DataEntry, DataEntry]:
        input_slice = slice(
            0, len(entry[FieldName.TARGET]) - offset - prediction_length
        )
        label_slice = slice(
            len(entry[FieldName.TARGET]) - offset - prediction_length,
            len(entry[FieldName.TARGET]) - offset,
        )
        print(f"input_slice: {input_slice}, label_slice: {label_slice}")
        return (
            slice_data_entry(entry, input_slice, prediction_length=prediction_length),
            slice_data_entry(entry, label_slice, prediction_length=prediction_length),
        )


training_data = TrainingDataset(dataset=dataset, splitter=DummySplitter())

In [None]:
PREDICTION_LENGTH = 266 - 80  # tail of 80ÃŸ time steps is used in benchmark as well

# Train the model and make predictions
model = DeepAREstimator(
    prediction_length=PREDICTION_LENGTH, freq="h", trainer_kwargs={"max_epochs": 20}
).train(training_data)

In [None]:
from typing import Any
from torch import Tensor
from gluonts.dataset.split import TestTemplate

from fusiontimeseries.benchmarking.benchmark_utils import (
    BenchmarkDataProvider,
    IN_DISTRIBUTION_ITERATIONS,
    OUT_OF_DISTRIBUTION_ITERATIONS,
)

benchmark_data = BenchmarkDataProvider()

id_records: list[dict[str, Any]] = []
for item_id, iteration in enumerate(IN_DISTRIBUTION_ITERATIONS):
    id_iteration: Tensor = benchmark_data.get_id(iteration)
    for t in range(id_iteration.shape[0]):
        id_records.append(
            {
                "timestamp": pd.to_datetime(datetime(2000, 1, 1))
                + pd.to_timedelta(t, unit="h"),
                "item_id": item_id,
                "target": id_iteration[t].item(),
            }
        )

id_benchmark_dataset = PandasDataset.from_long_dataframe(
    pd.DataFrame(id_records),
    item_id="item_id",
    timestamp="timestamp",
    target="target",
    freq="h",  # frequency of the generated time index
)
id_benchmark_set = TestTemplate(
    id_benchmark_dataset, DummySplitter()
).generate_instances(prediction_length=PREDICTION_LENGTH, windows=1)

ood_records: list[dict[str, Any]] = []
for item_id, iteration in enumerate(OUT_OF_DISTRIBUTION_ITERATIONS):
    id_iteration: Tensor = benchmark_data.get_ood(iteration)
    for t in range(id_iteration.shape[0]):
        ood_records.append(
            {
                "timestamp": pd.to_datetime(datetime(2000, 1, 1))
                + pd.to_timedelta(t, unit="h"),
                "item_id": item_id,
                "target": id_iteration[t].item(),
            }
        )


ood_benchmark_dataset = PandasDataset.from_long_dataframe(
    pd.DataFrame(ood_records),
    item_id="item_id",
    timestamp="timestamp",
    target="target",
    freq="h",  # frequency of the generated time index
)
ood_benchmark_set = TestTemplate(
    ood_benchmark_dataset, DummySplitter()
).generate_instances(prediction_length=PREDICTION_LENGTH, windows=1)

In [None]:
id_forecasts = list(model.predict(id_benchmark_set.input))

In [None]:
id_forecasts[0].samples.shape

In [None]:
from matplotlib import pyplot as plt

id_df = pd.DataFrame(id_records)
id_df.set_index("timestamp", inplace=True)

for forecast in id_forecasts:
    forecast.plot()
    id_df[id_df["item_id"] == int(forecast.item_id)]["target"].plot()
    plt.show()

In [None]:
ood_forecasts = list(model.predict(ood_benchmark_set.input))

In [None]:
ood_df = pd.DataFrame(ood_records)
ood_df.set_index("timestamp", inplace=True)

for forecast in ood_forecasts:
    forecast.plot()
    ood_df[ood_df["item_id"] == int(forecast.item_id)]["target"].plot()
    plt.show()

# Autoregressive Forecasting

Train with a smaller prediction horizon (e.g., 20 steps) and then perform autoregressive forecasting to reach the full 186-step benchmark horizon.

In [None]:
# Train with a smaller prediction horizon
TRAIN_PREDICTION_LENGTH = 44  # Smaller horizon for training
BENCHMARK_PREDICTION_LENGTH = 266 - 80  # Full benchmark horizon (186 steps)

# Train the model with smaller prediction length
autoregressive_model = DeepAREstimator(
    prediction_length=TRAIN_PREDICTION_LENGTH,
    freq="h",
    hidden_size=64,
    num_layers=4,
    trainer_kwargs={"max_epochs": 32},
).train(training_data)

In [None]:
from gluonts.model.forecast import SampleForecast


def autoregressive_forecast(model, input_data, total_steps, step_size):
    """
    Perform autoregressive forecasting by iteratively predicting and appending results.

    Args:
        model: Trained GluonTS model
        input_data: Initial input data entry
        total_steps: Total number of steps to forecast
        step_size: Number of steps to predict in each iteration (model's prediction_length)

    Returns:
        Array of shape (num_samples, total_steps) with forecasted values
    """
    all_samples = []
    current_data = dict(input_data)
    remaining_steps = total_steps

    while remaining_steps > 0:
        steps_to_predict = min(step_size, remaining_steps)

        # Make predictions
        forecast = next(iter(model.predict([current_data])))

        # Get samples: shape (num_samples, prediction_length)
        samples = forecast.samples

        # Take only the required number of steps
        if steps_to_predict < step_size:
            samples = samples[:, :steps_to_predict]

        all_samples.append(samples)

        # Update for next iteration: append mean prediction to target
        mean_prediction = samples.mean(axis=0)
        current_data[FieldName.TARGET] = np.concatenate(
            [current_data[FieldName.TARGET], mean_prediction]
        )

        # Update start timestamp
        if FieldName.START in current_data:
            current_data[FieldName.START] = (
                current_data[FieldName.START] + steps_to_predict
            )

        remaining_steps -= steps_to_predict

    # Concatenate all samples along time dimension
    all_samples_array = np.concatenate(all_samples, axis=1)

    return all_samples_array

In [None]:
# Perform autoregressive forecasting on ID benchmark data
id_autoregressive_forecasts = []

for input_data, label_data in id_benchmark_set:
    samples = autoregressive_forecast(
        autoregressive_model,
        input_data,
        total_steps=BENCHMARK_PREDICTION_LENGTH,
        step_size=TRAIN_PREDICTION_LENGTH,
    )

    # Create a SampleForecast object for compatibility with existing code
    forecast = SampleForecast(
        samples=samples,
        start_date=label_data[FieldName.START],
        item_id=input_data.get("item_id", None),
    )
    id_autoregressive_forecasts.append(forecast)

print(f"Generated {len(id_autoregressive_forecasts)} autoregressive forecasts")
print(f"First forecast shape: {id_autoregressive_forecasts[0].samples.shape}")

In [None]:
# Visualize autoregressive forecasts for ID data
for forecast in id_autoregressive_forecasts:
    forecast.plot()
    id_df[id_df["item_id"] == int(forecast.item_id)]["target"].plot()
    plt.title(f"Autoregressive Forecast - ID Item {forecast.item_id}")
    plt.legend(["Forecast (median)", "Prediction intervals", "Actual"])
    plt.show()

In [None]:
# Perform autoregressive forecasting on OOD benchmark data
ood_autoregressive_forecasts = []

for input_data, label_data in ood_benchmark_set:
    samples = autoregressive_forecast(
        autoregressive_model,
        input_data,
        total_steps=BENCHMARK_PREDICTION_LENGTH,
        step_size=TRAIN_PREDICTION_LENGTH,
    )

    forecast = SampleForecast(
        samples=samples,
        start_date=label_data[FieldName.START],
        item_id=input_data.get("item_id", None),
    )
    ood_autoregressive_forecasts.append(forecast)

print(f"Generated {len(ood_autoregressive_forecasts)} OOD autoregressive forecasts")
print(f"First forecast shape: {ood_autoregressive_forecasts[0].samples.shape}")

In [None]:
# Visualize autoregressive forecasts for OOD data
for forecast in ood_autoregressive_forecasts:
    forecast.plot()
    ood_df[ood_df["item_id"] == int(forecast.item_id)]["target"].plot()
    plt.title(f"Autoregressive Forecast - OOD Item {forecast.item_id}")
    plt.legend(["Forecast (median)", "Prediction intervals", "Actual"])
    plt.show()