# Bring Your Own Time Series Model for Demand Forecasting

In this notebook, we will test the Chronos-Bolt model for demand forecasting and later, log it to the Model Registry, and finally create a container service to run inference. The Chronos-Bolt model is a specialized time series forecasting model from Amazon that can perform zero-shot forecasting on various time series data.



## Step 1: Test Chronos-Bolt model in this notebook, without creating a service


In [None]:
# Install exact working versions (validated - FIXED torch compatibility)
%pip install chronos-forecasting==1.5.3 --quiet
%pip install "torch>=2.5.0,<2.7.0" --quiet
%pip install transformers>=4.53.2 --quiet
%pip install numpy>=1.26.4 --quiet
%pip install pandas>=2.2.2 --quiet
%pip install matplotlib --quiet
%pip install accelerate>=1.9.0 --quiet

print("✅ Packages installed successfully!")


In [None]:
# Standard Chronos flow using BaseChronosPipeline
from chronos import BaseChronosPipeline
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

model_name = "amazon/chronos-bolt-base"

# Load the model with CPU device mapping
pipeline = BaseChronosPipeline.from_pretrained(
    model_name,
    device_map="cpu",
    torch_dtype=torch.float32,
)


In [None]:
print(torch.__version__)

In [None]:
# Create and test demand forecasting with sample data
# Generate synthetic demand data
np.random.seed(42)
days = 150
base_demand = 100
trend = np.linspace(0, 20, days)
seasonality = 10 * np.sin(2 * np.pi * np.arange(days) / 7)  # Weekly seasonality
noise = np.random.normal(0, 5, days)
demand_history = base_demand + trend + seasonality + noise
demand_history = np.maximum(demand_history, 0)  # Ensure non-negative

print(f"Generated demand history: {len(demand_history)} days")
print(f"Recent values: {demand_history[-10:].round(2)}")

# Create forecast
context_length = 120  # Use last 120 days for context
prediction_length = 30  # Forecast next 30 days

# Convert to torch tensor and add batch dimension
context = torch.tensor(demand_history[-context_length:], dtype=torch.float32).unsqueeze(0)

# Generate forecast (Chronos-Bolt returns multiple samples by default)
forecast = pipeline.predict(
    context=context,
    prediction_length=prediction_length
)

# Extract median forecast and quantiles using the WORKING approach
# Forecast shape: [batch_size, num_samples, prediction_length]
print(f"\nForecast results:")
print(f"Forecast shape: {forecast.shape}")  # Should be [1, num_samples, prediction_length]
print(f"Number of samples: {forecast.shape[1]}")

# Use the working method from ChronosBolt_WorkingVersions.ipynb
median_idx = forecast.shape[1] // 2  # Middle sample as median
forecast_median = forecast[0, median_idx, :].numpy()
forecast_lower = forecast[0, 1, :].numpy()  # Second sample as lower bound
forecast_upper = forecast[0, -2, :].numpy()  # Second-to-last sample as upper bound

print(f"Median forecast (first 10 days): {forecast_median[:10].round(2)}")
print(f"Lower bound (first 5 days): {forecast_lower[:5].round(2)}")
print(f"Upper bound (first 5 days): {forecast_upper[:5].round(2)}")


In [None]:
# Visualize the forecast
plt.figure(figsize=(12, 6))

# Plot historical data
historical_days = range(len(demand_history))
plt.plot(historical_days, demand_history, label='Historical Demand', color='blue', alpha=0.7)

# Plot forecast
forecast_days = range(len(demand_history), len(demand_history) + prediction_length)
plt.plot(forecast_days, forecast_median, label='Forecast (Median)', color='red', linewidth=2)
plt.fill_between(forecast_days, forecast_lower, forecast_upper, alpha=0.3, color='red', label='Confidence Interval')

# Add vertical line to separate historical from forecast
plt.axvline(x=len(demand_history)-1, color='gray', linestyle='--', alpha=0.5)

plt.title('Demand Forecasting with Chronos-Bolt')
plt.xlabel('Days')
plt.ylabel('Demand')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

print("✅ Chronos-Bolt model working successfully!")


## Step 2: Log Chronos-Bolt model to Model Registry

Model Registry Documentation: https://docs.snowflake.com/en/developer-guide/snowflake-ml/model-registry/overview

* Standard HuggingFace Pipelines: https://docs.snowflake.com/en/developer-guide/snowflake-ml/model-registry/built-in-models/hugging-face - the example below constructs a custom model
* Custom Model Pipeline: https://docs.snowflake.com/en/developer-guide/snowflake-ml/model-registry/bring-your-own-model-types


In [None]:
# Build the custom model class
import os
import torch
import pandas as pd
from chronos import BaseChronosPipeline
from snowflake.ml.registry import Registry
from snowflake.ml.model import custom_model, model_signature
from snowflake.snowpark.context import get_active_session

session = get_active_session()

# Create a custom model class for the instantiation and inference of this model
class ChronosBoltModel(custom_model.CustomModel):
    def __init__(self, context: custom_model.ModelContext) -> None:
        super().__init__(context)

        # For `chronos` set the environment variables to use local files only
        # We will download them to a local dir using huggingface_hub
        os.environ['HF_HUB_OFFLINE'] = '1'
        os.environ['TRANSFORMERS_OFFLINE'] = '1'
        
        self.pipeline = BaseChronosPipeline.from_pretrained(
            context.path("model_path"),
            device_map="cpu",
            torch_dtype=torch.float32,
        )

    # Inference function with a dataframe as input
    @custom_model.inference_api
    def predict(self, input_df: pd.DataFrame) -> pd.DataFrame:
        results = []
        
        for idx, row in input_df.iterrows():
            try:
                product_id = row['product_id']
                demand_history = row['demand_history']
                prediction_length = int(row.get('prediction_length', 30))
                
                # Parse demand history (handle both list and string formats)
                if isinstance(demand_history, str):
                    history_values = [float(x.strip()) for x in demand_history.split(',')]
                else:
                    history_values = list(demand_history)
                
                # Convert to tensor
                context = torch.tensor(history_values, dtype=torch.float32)
                
                # Generate forecast using the WORKING approach
                with torch.no_grad():
                    forecast_result = self.pipeline.predict(
                        context=context.unsqueeze(0),
                        prediction_length=prediction_length
                    )
                
                # Extract results using the working method
                median_idx = forecast_result.shape[1] // 2
                forecast_median = forecast_result[0, median_idx, :].numpy().tolist()
                forecast_lower = forecast_result[0, 1, :].numpy().tolist()
                forecast_upper = forecast_result[0, -2, :].numpy().tolist()
                
                results.append({
                    'product_id': product_id,
                    'forecast_median': forecast_median,
                    'forecast_lower': forecast_lower,
                    'forecast_upper': forecast_upper,
                    'prediction_length': prediction_length,
                    'model_used': 'chronos-bolt-base'
                })
                
            except Exception as e:
                results.append({
                    'product_id': row.get('product_id', f'error_{idx}'),
                    'forecast_median': [],
                    'forecast_lower': [],
                    'forecast_upper': [],
                    'prediction_length': 0,
                    'model_used': 'error',
                    'error': str(e)
                })
        
        return pd.DataFrame(results)


In [None]:
# Download the model from huggingface to a local directory
# TO USE YOUR OWN MODEL, skip this step and pass in the model directory path in the place of 
# `local_model_location`. Finally instantiate the CustomModel class.
import tempfile
from huggingface_hub import snapshot_download

tmpdir = tempfile.mkdtemp()
local_model_location = snapshot_download(
    repo_id=model_name,
    local_dir=tmpdir
)

path_list = {"model_path": local_model_location}
chronos_model = ChronosBoltModel(context=custom_model.ModelContext(artifacts=path_list))



In [None]:
# Generate a forecast from the model using the predict() method
test_input = pd.DataFrame({
    'product_id': ['DEMO_PRODUCT_001', 'DEMO_PRODUCT_002'],
    'demand_history': [demand_history[-60:].tolist(), (demand_history[-60:] * 1.2).tolist()],
    'prediction_length': [14, 14]
})

response = chronos_model.predict(test_input)
print("✅ Model prediction results:")
print(response)
print(f"\n📊 Forecast summary:")
for _, row in response.iterrows():
    print(f"Product {row['product_id']}: {len(row['forecast_median'])} day forecast")
    if row['forecast_median']:
        print(f"  First 5 days: {[round(x, 2) for x in row['forecast_median'][:5]]}")


In [None]:
# Infer the model signature from the input and the response above.
# Documentation: https://docs.snowflake.com/en/developer-guide/snowflake-ml/model-registry/model-signature
signature = model_signature.infer_signature(test_input, response)


In [None]:
# Log the model to the Snowflake Model Registry
reg = Registry(session)
mv = reg.log_model(
    chronos_model,
    model_name='CHRONOS_BOLT_DEMAND_FORECASTING',
    version_name='V6',  # Can remove this parameter to auto-create version names
    pip_requirements=[
        'chronos-forecasting==1.5.3',
        'torch>=2.5.0,<2.7.0',
        'transformers>=4.53.2',
        'pandas>=2.2.2',
        'numpy>=1.26.4',
        'accelerate>=1.9.0',
        'huggingface_hub',
        'snowflake-ml-python'
    ],
    signatures={"predict": signature},
    options={"enable_remote_image_build": True}
)

In [None]:
# This step SHOULD fail!!
# The default for models is to predict using a warehouse, however, these models will need container services for proper resource allocation
mv.run(test_input)


## 3. Create a Container Service for Model Serving

Read more here: https://docs.snowflake.com/en/developer-guide/snowflake-ml/model-registry/container

*>>> Important Note: This is a long-running service, so once you are done, you will want to suspend the service to stop incurring costs. To do this, run `ALTER SERVICE CHRONOS_SERVICE SUSPEND;` in a Notebook or SQL worksheet*


In [None]:
# Create a compute pool for CPU access to run this service

# Compute Pool definition
DATABASE_NAME = 'DEMODB'
SCHEMA_NAME = 'WATER_FORECAST'
IMAGE_REPO_NAME = "CHRONOS_SERVICE_REPO"
COMPUTE_POOL_NAME = "CHRONOS_SERVICE_POOL_M"
COMPUTE_POOL_NODES = 1
COMPUTE_POOL_INSTANCE_TYPE = 'CPU_X64_M'

session.sql(f"use database {DATABASE_NAME};").collect()
session.sql(f"use schema {SCHEMA_NAME};").collect()
session.sql(f"create image repository if not exists {IMAGE_REPO_NAME}").collect()
session.sql(f"alter compute pool if exists {COMPUTE_POOL_NAME} stop all").collect()
session.sql(f"drop compute pool if exists {COMPUTE_POOL_NAME}").collect()
session.sql(f"create compute pool if not exists {COMPUTE_POOL_NAME} min_nodes={COMPUTE_POOL_NODES} " +
            f"max_nodes={COMPUTE_POOL_NODES} instance_family={COMPUTE_POOL_INSTANCE_TYPE} " +
            f"initially_suspended=True auto_resume=True auto_suspend_secs=300").collect()


In [None]:
import logging

# Basic setup - just fix the syntax error
logging.getLogger().setLevel(logging.INFO)

In [None]:
# Create a Service object that can be called easily
# Name of the Service for powering inference
SERVICE_NAME = 'CHRONOS_SERVICE'

# **This step may take >15 mins** - it is building a full container runtime.
mv.create_service(
    service_name=SERVICE_NAME,
    service_compute_pool=COMPUTE_POOL_NAME,
    image_repo=IMAGE_REPO_NAME,
    ingress_enabled=True,
    max_instances=int(COMPUTE_POOL_NODES),
    build_external_access_integration='ALLOW_ALL_INTEGRATION'
)


## 4. Serve model from Registry and use for Inference
This code can be used in other places like a streamlit app or from a SQL worksheet to call the Chronos-Bolt model

Documentation link: https://docs.snowflake.com/en/developer-guide/snowflake-ml/model-registry/container#using-a-model-deployed-to-spcs


In [None]:
# PYTHON CALL - useful for Streamlit app
# Pull Model from Registry for Inference
from snowflake.ml.registry import Registry
from snowflake.snowpark.context import get_active_session

# Modify these based on your details.
DATABASE_NAME = 'DEMODB'
SCHEMA_NAME = 'WATER_FORECAST'
SELECTED_MODEL = 'CHRONOS_BOLT_DEMAND_FORECASTING'
MODEL_VERSION = 'V5'

session = get_active_session()
reg = Registry(session=session, database_name=DATABASE_NAME, schema_name=SCHEMA_NAME)
chronos_from_registry = reg.get_model(SELECTED_MODEL).version(MODEL_VERSION)

chronos_from_registry.run(test_input, service_name=SERVICE_NAME)


In [None]:
USE DATABASE DEMODB;
USE SCHEMA WATER_FORECAST; 

SHOW SERVICES;

In [None]:
-- Example of running the SQL:
USE DATABASE DEMODB;
USE SCHEMA WATER_FORECAST; 

SELECT CHRONOS_SERVICE!PREDICT(
    'TEST_PRODUCT_001',
    ARRAY_CONSTRUCT(100, 105, 98, 110, 95, 102, 108, 97, 115, 92, 120),
    7
) as forecast_result;

## 5. Cost Management - Suspend Services

*>>> IMPORTANT: Remember to suspend the service when done to avoid ongoing costs!*


In [None]:
-- SUSPEND SERVICE TO STOP COSTS


ALTER SERVICE CHRONOS_SERVICE SUSPEND;

-- SUSPEND COMPUTE POOL
ALTER COMPUTE POOL CHRONOS_SERVICE_POOL_M SUSPEND;

-- Check status
SHOW SERVICES LIKE 'CHRONOS_SERVICE';
SHOW COMPUTE POOLS LIKE 'CHRONOS_SERVICE_POOL_M';


In [None]:
-- RESUME SERVICES (when needed for inference)
-- ALTER COMPUTE POOL CHRONOS_SERVICE_POOL_M RESUME;
-- ALTER SERVICE CHRONOS_SERVICE RESUME;
