In [1]:
import pandas as pd
import numpy as np
from pathlib import Path

In [2]:
def save_forecast_parquet(
    dates,
    y_pred,
    y_true=None,
    model_name=None,
    split_name=None,
    out_dir="../data/models",
    filename="forecast.parquet"
):
    """
    Save forecast results (date, y_pred, y_actual) to a parquet file.

    Designed to work for both:
      - evaluation (val/test): y_true provided
      - future forecasting: y_true=None

    Parameters
    ----------
    dates : array-like
        Sequence of timestamps corresponding to predictions.

    y_pred : array-like
        Model predictions.

    y_true : array-like or None, optional
        Ground-truth values. Use None for future forecasts.

    model_name : str, optional
        Name of the model used (for metadata).

    split_name : str, optional
        Split identifier ("val", "test", "future", etc.).

    out_dir : str or Path, default="../data/forecasts"
        Directory where parquet file is saved.

    filename : str or None
        Custom filename. If None, a name is auto-generated.

    Returns
    -------
    Path
        Path to the saved parquet file.

    Example Use-Case
    -------------
    best = load_best_models(
        "neural_models_results.csv",
        k=1,
        group_by="Model",
        split="test"
    )

    best_model_name = best.iloc[0]["Model"]

    save_forecast_parquet(
        dates=test_nf["ds"],
        y_pred=test_forecasts[best_model_name].values,
        y_true=test_nf["y"].values,
        model_name=best_model_name,
        split_name="test"
    )
    -------------
    Example Use-Case for future forecasting
    -------------
    # Example: future_forecasts has columns ["unique_id", "ds", best_model_name]

    save_forecast_parquet(
        dates=future_forecasts["ds"],
        y_pred=future_forecasts[best_model_name].values,
        y_true=None,  # <-- future, unknown
        model_name=best_model_name,
        split_name="future",
        filename="best_model_future_forecast.parquet"
    )

    """

    dates = pd.to_datetime(dates)
    y_pred = np.asarray(y_pred, dtype=float)

    if y_true is not None:
        y_true = np.asarray(y_true, dtype=float)
        if len(y_true) != len(y_pred):
            raise ValueError("y_true and y_pred must have the same length.")
    else:
        y_true = np.full(len(y_pred), np.nan)

    if len(dates) != len(y_pred):
        raise ValueError("dates and y_pred must have the same length.")

    df = pd.DataFrame({
        "date": dates,
        "y_pred": y_pred,
        "y_actual": y_true,
    })

    if model_name is not None:
        df["model"] = model_name

    if split_name is not None:
        df["split"] = split_name

    out_dir = Path(out_dir)
    out_dir.mkdir(parents=True, exist_ok=True)

    if filename is None:
        parts = ["forecast"]
        if model_name:
            parts.append(model_name)
        if split_name:
            parts.append(split_name)
        filename = "_".join(parts) + ".parquet"

    out_path = out_dir / filename
    df.to_parquet(out_path, index=False)

    return out_path
