### Use the AG example in the docs (Forecasting with Chronos) and then convert to training container and inf container

In [61]:
from autogluon.timeseries import TimeSeriesDataFrame, TimeSeriesPredictor
import numpy as np
import boto3
import uuid
from io import BytesIO
import os
import sagemaker
from sagemaker.pytorch import PyTorch
from sagemaker.session import TrainingInput

In [28]:
data = TimeSeriesDataFrame.from_path(
    "https://autogluon.s3.amazonaws.com/datasets/timeseries/australian_electricity_subset/test.csv"
)
data.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,target
item_id,timestamp,Unnamed: 2_level_1
T000000,2013-03-10 00:00:00,5207.959961
T000000,2013-03-10 00:30:00,5002.275879
T000000,2013-03-10 01:00:00,4747.569824
T000000,2013-03-10 01:30:00,4544.880859
T000000,2013-03-10 02:00:00,4425.952148


### Add features (not in ex)

In [30]:
random_values = np.random.uniform(0, 500, size=len(data['target']))
data['random_feature'] = data['target'].values + random_values
data.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,target,random_feature
item_id,timestamp,Unnamed: 2_level_1,Unnamed: 3_level_1
T000000,2013-03-10 00:00:00,5207.959961,5558.00664
T000000,2013-03-10 00:30:00,5002.275879,5390.543877
T000000,2013-03-10 01:00:00,4747.569824,4798.287882
T000000,2013-03-10 01:30:00,4544.880859,4545.22837
T000000,2013-03-10 02:00:00,4425.952148,4706.047467


In [117]:
prediction_length = 48
train_data, test_data = data.train_test_split(prediction_length)
train_data = train_data.reset_index()
test_data = test_data.reset_index()

predictor = TimeSeriesPredictor(prediction_length=prediction_length
                               ).fit(train_data)

Sorting the dataframe index before generating the train/test split.
Beginning AutoGluon training...
AutoGluon will save models to '/home/sagemaker-user/customer-demos/AutoGluonPyTorch/AutogluonModels/ag-20250904_071459'
AutoGluon Version:  1.2
Python Version:     3.12.9
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Thu Aug 7 19:21:39 UTC 2025
CPU Count:          48
GPU Count:          4
Memory Avail:       176.83 GB / 186.60 GB (94.8%)
Disk Space Avail:   83.73 GB / 99.99 GB (83.7%)

Fitting with arguments:
{'enable_ensemble': True,
 'eval_metric': WQL,
 'hyperparameters': 'default',
 'known_covariates_names': [],
 'num_val_windows': 1,
 'prediction_length': 48,
 'quantile_levels': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
 'random_seed': 123,
 'refit_every_n_windows': 1,
 'refit_full': False,
 'skip_model_selection': False,
 'target': 'target',
 'verbosity': 2}



AttributeError: 'RangeIndex' object has no attribute 'levels'

### Write files to parquet (not in ex.) to test train and inf job

In [46]:
bucket = 'ag-example-timeseries'

# Create S3 client
s3 = boto3.client("s3")

In [118]:
train_data

Unnamed: 0,item_id,timestamp,target,random_feature
0,T000000,2013-03-10 00:00:00,5207.959961,5558.006640
1,T000000,2013-03-10 00:30:00,5002.275879,5390.543877
2,T000000,2013-03-10 01:00:00,4747.569824,4798.287882
3,T000000,2013-03-10 01:30:00,4544.880859,4545.228370
4,T000000,2013-03-10 02:00:00,4425.952148,4706.047467
...,...,...,...,...
172795,T000004,2015-02-27 21:30:00,368.948792,452.005475
172796,T000004,2015-02-27 22:00:00,346.332764,598.047604
172797,T000004,2015-02-27 22:30:00,327.962677,499.156166
172798,T000004,2015-02-27 23:00:00,307.481934,498.390110


In [47]:
# Number of parquet files to create
num_files = 100

dfs_to_write = {'train': train_data, 'test': test_data}

for key in dfs_to_write.keys():
    print(key)

train
test


In [119]:
# Clear train & test buckets [can change this]
! aws s3 rm s3://{bucket}/train/ --recursive 
! aws s3 rm s3://{bucket}/test/ --recursive 

delete: s3://ag-example-timeseries/train/dummy_0200e716.parquet
delete: s3://ag-example-timeseries/train/dummy_06031b87.parquet
delete: s3://ag-example-timeseries/train/dummy_0239602b.parquet
delete: s3://ag-example-timeseries/train/dummy_0a8314ae.parquet
delete: s3://ag-example-timeseries/train/dummy_15a7f40a.parquet
delete: s3://ag-example-timeseries/train/dummy_175c1289.parquet
delete: s3://ag-example-timeseries/train/dummy_1b96077d.parquet
delete: s3://ag-example-timeseries/train/dummy_29959034.parquet
delete: s3://ag-example-timeseries/train/dummy_1f582372.parquet
delete: s3://ag-example-timeseries/train/dummy_0e66365e.parquet
delete: s3://ag-example-timeseries/train/dummy_0ea345a1.parquet
delete: s3://ag-example-timeseries/train/dummy_2c548bed.parquet
delete: s3://ag-example-timeseries/train/dummy_12e32cb2.parquet
delete: s3://ag-example-timeseries/train/dummy_118bf4d5.parquet
delete: s3://ag-example-timeseries/train/dummy_29bbdb66.parquet
delete: s3://ag-example-timeseries/train

In [120]:
for split_name, df in dfs_to_write.items():
    # Shuffle and split into chunks
    shuffled = df.sample(frac=1, random_state=42).reset_index(drop=True)
    chunks = np.array_split(shuffled, num_files)

    for i, chunk in enumerate(chunks, 1):
        filename = f"dummy_{uuid.uuid4().hex[:8]}.parquet"
        s3_key = f"{split_name}/{filename}"   # <-- include folder + filename

        buffer = BytesIO()
        chunk_reset = chunk.reset_index()
        chunk_reset.to_parquet(buffer, index=False)
        buffer.seek(0)

        s3.upload_fileobj(buffer, bucket, s3_key)
        print(f"Uploaded {len(chunk)} rows to s3://{bucket}/{s3_key}")


  return bound(*args, **kwds)


Uploaded 1728 rows to s3://ag-example-timeseries/train/dummy_5bbb3399.parquet
Uploaded 1728 rows to s3://ag-example-timeseries/train/dummy_c3eb8da8.parquet
Uploaded 1728 rows to s3://ag-example-timeseries/train/dummy_64764251.parquet
Uploaded 1728 rows to s3://ag-example-timeseries/train/dummy_5ddcd6f5.parquet
Uploaded 1728 rows to s3://ag-example-timeseries/train/dummy_5ef7709c.parquet
Uploaded 1728 rows to s3://ag-example-timeseries/train/dummy_67347828.parquet
Uploaded 1728 rows to s3://ag-example-timeseries/train/dummy_acb07ec2.parquet
Uploaded 1728 rows to s3://ag-example-timeseries/train/dummy_bf0b0f1d.parquet
Uploaded 1728 rows to s3://ag-example-timeseries/train/dummy_e9a3d22b.parquet
Uploaded 1728 rows to s3://ag-example-timeseries/train/dummy_762a5997.parquet
Uploaded 1728 rows to s3://ag-example-timeseries/train/dummy_2e8d7323.parquet
Uploaded 1728 rows to s3://ag-example-timeseries/train/dummy_9c9f21c0.parquet
Uploaded 1728 rows to s3://ag-example-timeseries/train/dummy_e29

  return bound(*args, **kwds)


Uploaded 1731 rows to s3://ag-example-timeseries/test/dummy_a8ee0419.parquet
Uploaded 1731 rows to s3://ag-example-timeseries/test/dummy_f3d25011.parquet
Uploaded 1731 rows to s3://ag-example-timeseries/test/dummy_cbf7a4cf.parquet
Uploaded 1731 rows to s3://ag-example-timeseries/test/dummy_023e9270.parquet
Uploaded 1731 rows to s3://ag-example-timeseries/test/dummy_4cd44d04.parquet
Uploaded 1731 rows to s3://ag-example-timeseries/test/dummy_b816400d.parquet
Uploaded 1731 rows to s3://ag-example-timeseries/test/dummy_7f6ae6de.parquet
Uploaded 1731 rows to s3://ag-example-timeseries/test/dummy_13adb48d.parquet
Uploaded 1731 rows to s3://ag-example-timeseries/test/dummy_07a3bc28.parquet
Uploaded 1731 rows to s3://ag-example-timeseries/test/dummy_4a94b012.parquet
Uploaded 1731 rows to s3://ag-example-timeseries/test/dummy_048d6dd1.parquet
Uploaded 1731 rows to s3://ag-example-timeseries/test/dummy_605d34d7.parquet
Uploaded 1731 rows to s3://ag-example-timeseries/test/dummy_16a66882.parquet

In [124]:
train_data.columns

Index(['item_id', 'timestamp', 'target', 'random_feature'], dtype='object')

In [32]:
train_data
predictions = predictor.predict(train_data)
# predictor.plot(
#     data=data,
#     predictions=predictions,
#     item_ids=data.item_ids[:2],
#     max_history_length=200,
# );

Model not specified in predict, will default to the model with the best validation score: WeightedEnsemble


In [None]:
# predictions = predictor.predict(train_data)
# predictor.plot(
#     data=data,
#     predictions=predictions,
#     item_ids=data.item_ids[:2],
#     max_history_length=200,
# );

Model not specified in predict, will default to the model with the best validation score: WeightedEnsemble


### Create train container

In [153]:
%%writefile train.py
import os, sys, time, glob, argparse
from functools import wraps

import pandas as pd
import pyarrow.parquet as pq
import mlflow

from autogluon.timeseries import TimeSeriesDataFrame, TimeSeriesPredictor
from helper_functions import AGTimeSeriesWrapper  # keep this file in source_dir

# ----------------------------
# Retry helper
# ----------------------------
def retry_decorator(max_attempts=3, delay_seconds=60, backoff_factor=2):
    def decorator(func):
        @wraps(func)
        def wrapper(*args, **kwargs):
            attempts, delay = 0, delay_seconds
            while attempts < max_attempts:
                try:
                    return func(*args, **kwargs)
                except Exception as e:
                    attempts += 1
                    if attempts >= max_attempts:
                        raise
                    print(f"[retry] {e} | attempt {attempts}/{max_attempts} | sleeping {delay}s")
                    time.sleep(delay)
                    delay *= backoff_factor
        return wrapper
    return decorator

# ----------------------------
# Args
# ----------------------------
def parse_args():
    p = argparse.ArgumentParser()
    p.add_argument('--output_dir', type=str, default='/opt/ml/model')

    # MLflow (managed tracking server ARN, like your tabular script)
    p.add_argument('--mlflow_arn', type=str, required=True)
    p.add_argument('--mlflow_experiment', type=str, required=True)

    # Data roots (default to SageMaker channels)
    p.add_argument('--train-dir', type=str, default=os.environ.get('SM_CHANNEL_TRAINING', '/opt/ml/input/data/training'))
    p.add_argument('--test-dir',  type=str, default=os.environ.get('SM_CHANNEL_TEST', '/opt/ml/input/data/test'))  # empty means "no test"

    # Optional filename filters (empty -> load all)
    p.add_argument('--train-keyword', type=str, default=None)
    p.add_argument('--test-keyword',  type=str, default=None)

    # Schema
    p.add_argument('--id-col', type=str, default='item_id')
    p.add_argument('--time-col', type=str, default='timestamp')
    p.add_argument('--target-col', type=str, default='target')

    # Model
    p.add_argument('--prediction-length', type=int, default=24)
    p.add_argument('--eval-metric', type=str, default='MAPE')
    p.add_argument('--presets', type=str, default='best_quality')
    p.add_argument('--time-limit', type=int, default=900)
    p.add_argument('--num-gpus', type=int, default=int(os.environ.get('SM_NUM_GPUS', '0')))
    return p.parse_args()

# ----------------------------
# File discovery (recursive)
# ----------------------------
PARQUET_EXTS = {".parquet", ".PARQUET", ".pq", ".PQ"}

def _find_parquet_files(root: str, keyword: str | None):
    if not root or not os.path.isdir(root):
        raise FileNotFoundError(f"Data directory not found: {root}")
    files = []
    for r, _, fns in os.walk(root):
        for fn in fns:
            _, ext = os.path.splitext(fn)
            if ext in PARQUET_EXTS:
                if keyword:
                    if keyword in fn:
                        files.append(os.path.join(r, fn))
                else:
                    files.append(os.path.join(r, fn))
    print(f"[finder] root={root} keyword={keyword!r} found={len(files)}")
    if not files:
        raise FileNotFoundError(f"No parquet files in {root} (keyword={keyword!r})")
    return sorted(files)

# ----------------------------
# Loader -> TSF objects (target + optional covariates)
# ----------------------------
@retry_decorator(max_attempts=3, delay_seconds=30, backoff_factor=2)
def load_timeseries_parquet(
    data_root: str,
    keyword: str | None,
    id_col: str,
    time_col: str,
    target_col: str,
    covariate_cols: list[str] | None = None,  # e.g., ["random_feature"]
):
    files = _find_parquet_files(data_root, keyword)

    def resolve(cols: list[str], desired: str, aliases: list[str]) -> str | None:
        norm = {c.strip().lower(): c for c in cols}
        for cand in [desired] + aliases:
            k = cand.strip().lower()
            if k in norm:
                return norm[k]
        return None

    frames = []
    for fp in files:
        t = pq.read_table(fp)
        df = t.to_pandas()
        frames.append(df)

    all_df = pd.concat(frames, ignore_index=True).sort_values(["item_id", "timestamp"]).reset_index(drop=True)

    target_tsf = TimeSeriesDataFrame.from_data_frame(
        all_df[["item_id", "timestamp", "target"]],
        id_column="item_id",
        timestamp_column="timestamp",
    )

    cov_tsf = None
    if covariate_cols:
        present = [c for c in covariate_cols if c in all_df.columns]  # (only if you merged covs into all_df)
        if present:
            cov_df = all_df[["item_id", "timestamp"] + present]
            cov_tsf = TimeSeriesDataFrame.from_data_frame(cov_df, id_column="item_id", timestamp_column="timestamp")

    return target_tsf, cov_tsf

# ----------------------------
# Main
# ----------------------------
def main():
    args = parse_args()

    # MLflow (managed server). Requires sagemaker-mlflow installed in the container.
    mlflow.set_tracking_uri(args.mlflow_arn)
    mlflow.set_experiment(args.mlflow_experiment)

    # TRAIN
    print(f"[load] train_dir={args.train_dir} keyword={args.train_keyword!r}")
    train_tsf, train_cov_tsf = load_timeseries_parquet(
        args.train_dir, args.train_keyword, args.id_col, args.time_col, args.target_col,
        covariate_cols=["random_feature"],
    )

    # TEST (optional)
    test_tsf = test_cov_tsf = None
    if args.test_dir and os.path.isdir(args.test_dir):
        print(f"[load] test_dir={args.test_dir} keyword={args.test_keyword!r}")
        try:
            test_tsf, test_cov_tsf = load_timeseries_parquet(
                args.test_dir, args.test_keyword, args.id_col, args.time_col, args.target_col,
                covariate_cols=["random_feature"],
            )
        except Exception as e:
            print(f"[load] test skipped: {e}")

    with mlflow.start_run():
        mlflow.log_params({
            "prediction_length": args.prediction_length,
            "eval_metric": args.eval_metric,
            "presets": args.presets,
            "time_limit": args.time_limit,
            "train_dir": args.train_dir,
            "test_dir": args.test_dir,
            "train_keyword": args.train_keyword,
            "test_keyword": args.test_keyword,
        })

        predictor = TimeSeriesPredictor(
            prediction_length=args.prediction_length,
            eval_metric=args.eval_metric,
            path=args.output_dir,
        )

        predictor.fit(
            train_data=train_tsf,
            past_covariates=train_cov_tsf,    # random_feature as past covariate (change if you want 'known' or 'static')
            presets=args.presets,
            time_limit=args.time_limit,
            num_gpus=args.num_gpus,
        )
        predictor.save()

        if test_tsf is not None:
            scores = predictor.evaluate(test_tsf, past_covariates=test_cov_tsf)
            for k, v in scores.items():
                mlflow.log_metric(f"test_{k}", float(v))

        # Deployable PyFunc
        conda_env = {
            "name": "agts-env",
            "channels": ["conda-forge"],
            "dependencies": [
                "python=3.10",
                {"pip": [
                    "autogluon.timeseries[all]==1.1.1",
                    "pandas>=2.0.0",
                    "pyarrow>=13.0.0",
                    "mlflow>=2.9.0",
                    "sagemaker-mlflow>=0.1.0",
                ]},
            ],
        }
        mlflow.pyfunc.log_model(
            artifact_path="model",
            python_model=AGTimeSeriesWrapper(),
            artifacts={"predictor": args.output_dir},
            conda_env=conda_env,
        )
        print("[done] training complete and model logged to MLflow.")

if __name__ == "__main__":
    main()


Overwriting train.py


### Run the training container

In [154]:
# Config
region      = sagemaker.Session().boto_region_name
session     = sagemaker.Session()
role        = sagemaker.get_execution_role()  # or set your role arn string

instance_type   = "ml.g5.xlarge"             # CPU example; use a GPU like "ml.g5.2xlarge" if needed
instance_count  = 1
use_spot        = True                        # optional cost saver
max_wait        = 3600 + 600                  # seconds (must be > max_run if use_spot)
max_run         = 3600                        # seconds

# Hyperparameters for train.py (match argparse names)
hyperparameters = {
    "id-col": "item_id",
    "time-col": "timestamp",
    "target-col": "target",
    # "train-keyword": None,                 # your parquet file name filter
    # "test-keyword": None,                   # set None/"" if no test set
    "prediction-length": 24,
    "eval-metric": "MAPE",
    "presets": "best_quality",
    "time-limit": 900,                        # seconds
    "mlflow_arn": 'arn:aws:sagemaker:us-east-1:543531862107:mlflow-tracking-server/ag-ex-timeseries',
    "mlflow_experiment": "autogluon-timeseries",
}

In [155]:
train_s3_uri

's3://ag-example-timeseries/train'

In [None]:
# Inputs
train_s3_uri = f"s3://{bucket}/train"
test_s3_uri = f"s3://{bucket}/test"

inputs = {
    "training": TrainingInput(
        s3_data=train_s3_uri,
        s3_data_type="S3Prefix",
        content_type="application/x-parquet",
        input_mode="File"
    ),
    "test": TrainingInput(
        s3_data=test_s3_uri,
        s3_data_type="S3Prefix",
        content_type="application/x-parquet",
        input_mode="File"
    )
}
# (Optional) if you kept train/test together under the same prefix and rely purely on keyword,
# you only need one channel. If you prefer a separate channel for test, you can add another:
# inputs["test"] = TrainingInput(s3_data=f"s3://{bucket}/{test_prefix}/", ...)

# -----------------------------------
# Estimator: Managed PyTorch DLC
# -----------------------------------
estimator = PyTorch(
    entry_point="train.py",          # your training script
    source_dir=".",                  # folder containing train.py (and any utils/requirements.txt)
    role=role,
    framework_version="2.1.0",       # pick a supported version
    py_version="py310",
    instance_type=instance_type,
    instance_count=instance_count,
    dependencies = ['requirements.txt', 'helper_functions.py'],
    hyperparameters=hyperparameters,
    sagemaker_session=session,
    disable_profiler=True,
    debugger_hook_config=False,
    # max_run=max_run,
    # use_spot_instances=use_spot,
    # max_wait=max_wait if use_spot else None,
    # Optional: checkpointing (recommended if using spot)
    # checkpoint_s3_uri=f"s3://{bucket}/checkpoints/autogluon-ts/",
)

# (Optional) if you have a requirements.txt in the source_dir, PyTorch Estimator will install it.
# Example requirements.txt lines that work for your script:
# autogluon.timeseries[all]==1.1.1
# pyarrow>=13.0.0
# mlflow>=2.9.0
# pandas>=2.0.0

# -----------------------------------
# Launch training
# -----------------------------------
job_name = sagemaker.utils.unique_name_from_base("ag-ts-train")
print("Starting training job:", job_name)
estimator.fit(inputs, job_name=job_name, wait=True, logs=True)

# After completion:
print("Model artifacts:", estimator.model_data)

INFO:sagemaker.telemetry.telemetry_logging:SageMaker Python SDK will collect telemetry to help us better understand our user's needs, diagnose issues, and deliver additional features.
To opt out of telemetry, please disable via TelemetryOptOut parameter in SDK defaults config. For more information, refer to https://sagemaker.readthedocs.io/en/stable/overview.html#configuring-and-using-defaults-with-the-sagemaker-python-sdk.


Starting training job: ag-ts-train-1757015024-d26d


INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.
INFO:sagemaker:Creating training-job with name: ag-ts-train-1757015024-d26d


2025-09-04 19:44:21 Starting - Starting the training job
2025-09-04 19:44:21 Pending - Training job waiting for capacity...............
2025-09-04 19:46:33 Pending - Preparing the instances for training......
2025-09-04 19:47:52 Downloading - Downloading the training image........................
2025-09-04 19:51:34 Training - Training image download completed. Training in progress.[34mbash: cannot set terminal process group (-1): Inappropriate ioctl for device[0m
[34mbash: no job control in this shell[0m
  "cipher": algorithms.TripleDES,[0m
  "class": algorithms.TripleDES,[0m
[34m2025-09-04 19:51:42,753 sagemaker-training-toolkit INFO     Imported framework sagemaker_pytorch_container.training[0m
[34m2025-09-04 19:51:42,772 sagemaker-training-toolkit INFO     No Neurons detected (normal if no neurons installed)[0m
[34m2025-09-04 19:51:42,783 sagemaker_pytorch_container.training INFO     Block until all host DNS lookups succeed.[0m
[34m2025-09-04 19:51:42,788 sagemaker_pyt

### Inference 

In [None]:
%%writefile ag_mlflow_pyfunc.py
import json
import pandas as pd
import mlflow
import mlflow.pyfunc
from autogluon.timeseries import TimeSeriesPredictor, TimeSeriesDataFrame

class AGTimeSeriesWrapper(mlflow.pyfunc.PythonModel):
    def load_context(self, context):
        # Load the saved AutoGluon predictor (artifact named "predictor")
        self.predictor = TimeSeriesPredictor.load(context.artifacts["predictor"])

        # Load exogenous feature config if present
        try:
            with open(f'{context.artifacts["predictor"]}/exog_config.json', "r") as f:
                self.cfg = json.load(f)
        except Exception:
            self.cfg = {"feature_name": None, "feature_role": "none", "feature_fill": "ffill"}

    def _extend_known(self, cov_tsf: TimeSeriesDataFrame, horizon: int, fill: str):
        pdf = cov_tsf.to_pandas().sort_values(["item_id", "timestamp"])
        vname = pdf.columns.difference(["item_id", "timestamp"])[0]
        out = []
        for item, g in pdf.groupby("item_id"):
            if len(g) < 2:
                raise ValueError(f"Cannot infer frequency for item {item}")
            freq = g["timestamp"].iloc[1] - g["timestamp"].iloc[0]
            fut_idx = pd.date_range(g["timestamp"].max() + freq, periods=horizon, freq=freq)
            if fill == "ffill":
                vals = [g[vname].iloc[-1]] * horizon
            elif fill == "zero":
                vals = [0.0] * horizon
            elif fill == "mean":
                vals = [float(g[vname].mean())] * horizon
            else:
                vals = [g[vname].iloc[-1]] * horizon
            fut = pd.DataFrame({"item_id": item, "timestamp": fut_idx, vname: vals})
            out.append(pd.concat([g, fut], ignore_index=True))
        cov_ext = pd.concat(out, ignore_index=True)
        return TimeSeriesDataFrame.from_data_frame(cov_ext, id_column="item_id", timestamp_column="timestamp")

    def predict(self, context, model_input: pd.DataFrame) -> pd.DataFrame:
        # Expect columns: item_id, timestamp, optional target, optional random_feature
        df = model_input.copy()
        df["timestamp"] = pd.to_datetime(df["timestamp"])

        # Base (target optional at inference)
        cols = [c for c in ["item_id", "timestamp", "target"] if c in df.columns]
        tsf = TimeSeriesDataFrame.from_data_frame(df[cols], id_column="item_id", timestamp_column="timestamp")

        # Exogenous handling
        feature = self.cfg.get("feature_name")
        role    = self.cfg.get("feature_role", "none")
        fill    = self.cfg.get("feature_fill", "ffill")

        known = past = None
        static = None
        if feature and feature in df.columns:
            cov = TimeSeriesDataFrame.from_data_frame(
                df[["item_id", "timestamp", feature]], id_column="item_id", timestamp_column="timestamp"
            )
            if role == "past":
                past = cov
            elif role == "known":
                known = self._extend_known(cov, self.predictor.prediction_length, fill)
            elif role == "static":
                static = df.groupby("item_id")[feature].last().to_frame(name=feature)
                static.index.name = "item_id"

        forecast = self.predictor.predict(
            tsf, known_covariates=known, past_covariates=past, static_features=static
        )
        # Return flat frame: item_id, timestamp, mean, (quantiles if available)
        return forecast.to_pandas().reset_index()


def log_ag_pyfunc(predictor_local_dir: str,
                  experiment: str = "autogluon-timeseries",
                  pip_deps = None) -> str:
    """
    Logs an MLflow PyFunc model that wraps an AutoGluon predictor dir.
    Returns model URI (runs:/<run_id>/model)
    """
    if pip_deps is None:
        pip_deps = [
            "autogluon.timeseries[all]==1.1.1",
            "pandas>=2.0.0",
            "pyarrow>=13.0.0",
            "mlflow>=2.9.0",
        ]

    mlflow.set_experiment(experiment)
    with mlflow.start_run() as run:
        mlflow.pyfunc.log_model(
            artifact_path="model",
            python_model=AGTimeSeriesWrapper(),
            artifacts={"predictor": predictor_local_dir},
            conda_env={
                "name": "agts-env",
                "channels": ["conda-forge"],
                "dependencies": [
                    "python=3.10",
                    {"pip": pip_deps}
                ],
            },
        )
        return f"runs:/{run.info.run_id}/model"
