In [None]:
# always run

chronos2_hyperparameters = {
    # https://auto.gluon.ai/stable/tutorials/timeseries/forecasting-model-zoo.html#pretrained-models
    "Chronos2": {
        "fine_tune": True,
        "fine_tune_mode": "lora",
        "fine_tune_lr": 5e-5,
        "fine_tune_steps": 3000,
        "fine_tune_batch_size": 64,
        "cross_learning": False,
    }
}

In [None]:
from datetime import datetime
from functools import cache
import numpy as np
import pandas as pd
from fusiontimeseries.finetuning.preprocessing.utils import get_valid_flux_traces


@cache
def create_training_flux_dataframe() -> pd.DataFrame:
    training_flux_traces: dict[int, np.ndarray] = get_valid_flux_traces()

    records = []
    for item_id, flux_trace in training_flux_traces.items():
        for t in range(flux_trace.shape[0]):
            records.append(
                {
                    "item_id": item_id,
                    "timestamp": pd.to_datetime(datetime(2000, 1, 1))
                    + pd.to_timedelta(t, unit="ms"),
                    "target": flux_trace[t],
                }
            )
    training_flux_df = pd.DataFrame(records)
    return training_flux_df


training_flux_df = create_training_flux_dataframe()
training_flux_df.head()

In [None]:
from autogluon.timeseries import TimeSeriesDataFrame

training_data = TimeSeriesDataFrame.from_data_frame(
    training_flux_df,
    id_column="item_id",
    timestamp_column="timestamp",
)
training_data.size

The training data consists of 250 timeseries with 266 timestamps in form of an 1-dimensional np array. The goal is to autoregressively forecast on a held out test set from timestep 80 until timestep 266 with 6 in-distribution and 5 out-of-distribution timeseries with the same length. Informational only: Measured is the RMSE with standard error of the means on both in-distribution and out-of-distribution prediction tails (last 80 timesteps)

- Chronos-2â€™s underlying training infrastructure handles sliding window creation from training data automatically.
- Chronos series are transformer models -> trained to use long contexts.
- 

In [None]:
from autogluon.timeseries import TimeSeriesPredictor

predictor = TimeSeriesPredictor(
    prediction_length=64,  # forecasting horizon (266-80)
    target="target",
    eval_metric="RMSE",
)
predictor.fit(
    train_data=training_data,  # your training TimeSeriesDataFrame
    hyperparameters={**chronos2_hyperparameters},
    time_limit=3600,
    enable_ensemble=False,
)

In [None]:
from fusiontimeseries.finetuning.evaluation_utils import (
    create_benchmark_dfs_from_flux_traces,
)


ood_benchmark_flux_df, id_benchmark_flux_df = create_benchmark_dfs_from_flux_traces()
ood_benchmark_data = TimeSeriesDataFrame.from_data_frame(ood_benchmark_flux_df)
id_benchmark_data = TimeSeriesDataFrame.from_data_frame(id_benchmark_flux_df)
ood_benchmark_data.size, id_benchmark_data.size

In [None]:
predictor.leaderboard(
    data=ood_benchmark_data, extra_metrics=["MAE", "MASE", "MAPE", "SMAPE", "RMSE"]
)

In [None]:
from fusiontimeseries.finetuning.evaluation_utils import autoregressive_forecast


forecasts: pd.DataFrame = autoregressive_forecast(
    benchmark_data_df=ood_benchmark_flux_df,
    predictor=predictor,
)
forecasts.head()

In [None]:
from fusiontimeseries.finetuning.evaluation_utils import plot_forecast_vs_true

plot_forecast_vs_true(
    benchmark_data_df=ood_benchmark_flux_df,
    forecasts=forecasts,
)

In [None]:
from fusiontimeseries.finetuning.evaluation_utils import evaluate_forecasts


rsme, se_rmse = evaluate_forecasts(
    benchmark_data_df=ood_benchmark_flux_df,
    forecasts=forecasts,
)
rsme, se_rmse

In [None]:
id_forecasts: pd.DataFrame = autoregressive_forecast(
    benchmark_data_df=id_benchmark_flux_df,
    predictor=predictor,
)
id_forecasts.head()

In [None]:
plot_forecast_vs_true(
    benchmark_data_df=id_benchmark_flux_df,
    forecasts=id_forecasts,
)

In [None]:
id_rsme, id_se_rmse = evaluate_forecasts(
    benchmark_data_df=id_benchmark_flux_df,
    forecasts=id_forecasts,
)
id_rsme, id_se_rmse

## ðŸ“Š Complete Evaluation & Results Storage

This section runs a comprehensive evaluation that:
- Generates autoregressive forecasts for both ID and OOD data
- Computes RMSE with standard error on the prediction tails
- Creates and saves plots for each time series
- Saves all metadata and metrics to a JSON file in the `data/` folder

The results will be saved in a format consistent with the benchmarking experiments for easy comparison.


In [None]:
# Complete evaluation with automatic saving of all results
from fusiontimeseries.finetuning.evaluation_utils import (
    FinetuningConfig,
    run_complete_evaluation,
)

# Create configuration object
finetuning_config = FinetuningConfig(
    model_name="Chronos2",
    prediction_length=predictor.prediction_length,
    target="target",
    eval_metric="RMSE",
    hyperparameters=chronos2_hyperparameters,
    time_limit=3600,
    start_context_length=80,
    relevant_tail_length=80,
)

# Run complete evaluation (forecasts, evaluates, plots, and saves everything)
results, json_path, plots_dir = run_complete_evaluation(
    predictor=predictor,
    config=finetuning_config,
    training_data_size=training_data.num_items,
    predictor_path=predictor.path if hasattr(predictor, "path") else None,
)

print("\nâœ… All results saved!")
print(f"ðŸ“Š Plots directory: {plots_dir}")
print(f"ðŸ“„ JSON results: {json_path}")