### Deploy registered AG model in MLFlow for RT Inference

In [6]:
! pip install fastavro

Collecting fastavro
  Downloading fastavro-1.12.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl.metadata (5.7 kB)
Downloading fastavro-1.12.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl (3.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.5/3.5 MB[0m [31m74.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: fastavro
Successfully installed fastavro-1.12.0


In [1]:
import boto3
import fastavro
import matplotlib
import mlflow
import mlflow.sagemaker as mfs
from mlflow import MlflowClient
from sagemaker.serve import SchemaBuilder, ModelBuilder, Mode
import sagemaker

import json
import pandas as pd
import mlflow.pyfunc
import tempfile
import os 
import io
import boto3
import time
from fastavro import reader as avro_reader
from sagemaker.serve import ModelBuilder, Mode, SchemaBuilder


sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml


In [2]:
bucket = 'ag-example-timeseries'
avro_prefix = 'avro-inf-stream'

# Create S3 client
s3 = boto3.client("s3")


mlflow_uri = "arn:aws:sagemaker:us-east-1:543531862107:mlflow-tracking-server/ag-ex-timeseries"  # with sagemaker-mlflow plugin
mlflow_experiment   = "autogluon-timeseries"
region      = sagemaker.Session().boto_region_name
session     = sagemaker.Session()
role        = sagemaker.get_execution_role() 

In [3]:
from helper_functions import package_mlflow_model

mlflow_run_id = "3ac5a4113be24a6fb64936ae5562ee54" 
prefix = "packed-mlflow-models"

s3_model_data_uri = package_mlflow_model(mlflow_run_id, bucket, prefix, 
                                         mlflow_tracking_arn=mlflow_uri, artifact_path='models')
print(f"Packaged model uploaded to: {s3_model_data_uri}")

Downloading artifacts:   0%|          | 0/11 [00:00<?, ?it/s]

Packaged model uploaded to: s3://ag-example-timeseries/packed-mlflow-models/model.tar.gz


### Generate dummy data

In [3]:
import io, json, time, uuid, threading, datetime as dt
import numpy as np
import pandas as pd
import boto3
from fastavro import writer, parse_schema

In [4]:
ITEM_IDS        = ["A", "B"]                        # a couple of series to demo
FREQ_SECS       = 5                                 # new file cadence
HORIZON         = 24  

In [5]:
AVRO_SCHEMA = {
    "type": "record",
    "name": "TimePoint",
    "fields": [
        {"name": "item_id",        "type": "string"},
        {"name": "timestamp",      "type": "string"},  # ISO 8601
        {"name": "target",         "type": ["null", "double"], "default": None},
        {"name": "random_feature", "type": ["null", "double"], "default": None},
    ],
}
PARSED_SCHEMA = parse_schema(AVRO_SCHEMA)

_stop_stream = {"flag": False}

def _sine(i, base=10.0, noise=0.3):
    return base + 2.0*np.sin(i/6.0) + np.random.randn()*noise

def write_dummy_avro_loop():
    i = 0
    while not _stop_stream["flag"]:
        now = pd.Timestamp.utcnow().floor("s")
        rows = []
        for item in ITEM_IDS:
            rows.append({
                "item_id": item,
                "timestamp": (now).isoformat(),
                "target": float(_sine(i + hash(item)%7)),      # last observed target (optional at inference)
                "random_feature": float(np.random.randn()),    # example past/known covariate
            })
        buf = io.BytesIO()
        writer(buf, PARSED_SCHEMA, rows)
        buf.seek(0)
        key = f"{avro_prefix}{now.strftime('%Y/%m/%d/%H%M%S')}_{uuid.uuid4().hex[:8]}.avro"
        s3.upload_fileobj(buf, bucket, key)
        print(f"[gen] wrote {len(rows)} rows to s3://{bucket}/{key}")
        i += 1
        time.sleep(FREQ_SECS)

# start writer thread
t = threading.Thread(target=write_dummy_avro_loop, daemon=True)
t.start()



[gen] wrote 2 rows to s3://ag-example-timeseries/avro-inf-stream2025/09/05/224812_68a11bd5.avro
[gen] wrote 2 rows to s3://ag-example-timeseries/avro-inf-stream2025/09/05/224817_8b662117.avro
[gen] wrote 2 rows to s3://ag-example-timeseries/avro-inf-stream2025/09/05/224822_dae3c05d.avro


In [6]:
# Stop it later with:
_stop_stream["flag"] = True
t.join()

### Inference 

In [90]:
# import sagemaker
# import pandas as pd

# # Helper wrappers referred earlier
# from ag_model import (
#     AutoGluonSagemakerEstimator,
#     AutoGluonNonRepackInferenceModel,
#     AutoGluonSagemakerInferenceModel,
#     AutoGluonRealtimePredictor,
#     AutoGluonBatchPredictor,
# )
# from sagemaker import utils
# from sagemaker.serializers import CSVSerializer
# import os
# import boto3

# role = sagemaker.get_execution_role()
# sagemaker_session = sagemaker.session.Session()
# region = sagemaker_session._region_name

# bucket = sagemaker_session.default_bucket()
# s3_prefix = f"autogluon_sm/{utils.sagemaker_timestamp()}"
# output_path = f"s3://{bucket}/{s3_prefix}/output/"

In [4]:
%%writefile ag_ts_inference.py

import os
import json
import pandas as pd
import fastavro
import io
import cloudpickle

from autogluon.timeseries import TimeSeriesPredictor, TimeSeriesDataFrame

# Required by SageMaker for loading the model
def model_fn(model_dir):
    """
    Loads the trained TimeSeriesPredictor from the model directory.
    
    Args:
        model_dir (str): The directory where the model artifact is located.
        
    Returns:
        TimeSeriesPredictor: The loaded predictor.
    """
    return TimeSeriesPredictor.load(model_dir)

# Required by SageMaker for inference
def transform_fn(predictor, data, content_type, accept_type):
    """
    Handles data deserialization, prediction, and serialization.
    
    Args:
        predictor (TimeSeriesPredictor): The loaded Autogluon predictor.
        data (str or bytes): The input data.
        content_type (str): The Content-Type header of the input data.
        accept_type (str): The Accept header requested by the client.
        
    Returns:
        bytes: The serialized prediction result.
        str: The Content-Type of the prediction result.
    """
    # 1. Deserialize input
    if content_type == "application/json":
        # Handle pandas-split format from JSON
        df = pd.read_json(io.StringIO(data), orient="split")
    elif content_type == "application/x-avro-bytes":
        # Handle Avro bytes from a custom content type
        input_stream = io.BytesIO(data)
        reader = fastavro.reader(input_stream)
        records = [r for r in reader]
        df = pd.DataFrame.from_records(records)
    else:
        raise ValueError(f"Unsupported content type: {content_type}")
    
    # 2. Perform prediction
    ts_dataframe = TimeSeriesDataFrame(df)
    predictions = predictor.predict(ts_dataframe)

    # 3. Serialize output
    if accept_type == "application/json":
        return predictions.to_json(orient="split"), accept_type
    elif accept_type == "text/csv":
        return predictions.to_csv(), accept_type
    else:
        raise ValueError(f"Unsupported accept type: {accept_type}")


Overwriting ag_ts_inference.py


In [5]:
import sagemaker
from sagemaker import get_execution_role
from sagemaker.pytorch.model import PyTorchModel
from sagemaker import image_uris

# --- Re-use existing setup ---
role = get_execution_role()
sagemaker_session = sagemaker.Session()
region = sagemaker_session.boto_region_name

# Retrieve the URI for a PyTorch GPU inference image
gpu_image_uri = image_uris.retrieve(
    framework="pytorch",
    region=region,
    version="2.2.0",
    py_version="py310",
    instance_type="ml.g4dn.2xlarge",
    image_scope="inference",
)

# --- Create a SageMaker PyTorch Model instance ---
pytorch_model = PyTorchModel(
    model_data=s3_model_data_uri,
    role=role,
    entry_point="ag_ts_inference.py", # Path to your inference script
    source_dir=".", # Directory containing the entry point script
    image_uri=gpu_image_uri,
    framework_version="2.2.0", # Must match the framework version in the image URI
    py_version="py310", # Must match the Python version in the image URI
    sagemaker_session=sagemaker_session
)

# --- Deploy the model ---
predictor = pytorch_model.deploy(
    initial_instance_count=1,
    instance_type="ml.g4dn.2xlarge",
)


INFO:sagemaker:Repacking model artifact (s3://ag-example-timeseries/packed-mlflow-models/model.tar.gz), script artifact (.), and dependencies ([]) into single tar.gz file located at s3://sagemaker-us-east-1-543531862107/pytorch-inference-2025-09-05-22-56-59-709/model.tar.gz. This may take some time depending on model size...
INFO:sagemaker:Creating model with name: pytorch-inference-2025-09-05-22-57-36-683
INFO:sagemaker:Creating endpoint-config with name pytorch-inference-2025-09-05-22-57-37-354
INFO:sagemaker:Creating endpoint with name pytorch-inference-2025-09-05-22-57-37-354


-------------!