---
title: "Snowflake Storage Cortex Forecast Example"
date: "08/25/2024"
format:
    html:
        code-fold: False
        output: False
---

# Overview

In [1]:
%load_ext autoreload
%autoreload 2

In [6]:
#| hide
from nbdev.showdoc import *

# Example Useage

In [17]:
#| skip
from snowflake.snowpark.version import VERSION
from cortex_forecast.forecast import SnowflakeMLForecast
import os

In [18]:
#| skip
forecast_model = SnowflakeMLForecast(
    config_file='./cortex_forecast/files/yaml/storage_forecast_config.yaml',
    connection_config={
        'user': os.getenv('SNOWFLAKE_USER'),
        'password': os.getenv('SNOWFLAKE_PASSWORD'),
        'account': os.getenv('SNOWFLAKE_ACCOUNT'),
        'database': 'CORTEX',
        'warehouse': 'CORTEX_WH',
        'schema': 'DEV',
        'role': 'CORTEX_USER_ROLE'  # Use the desired role
    }
)

snowflake_environment = forecast_model.session.sql('SELECT current_user(), current_version()').collect()
snowpark_version = VERSION
print('\nConnection Established with the following parameters:')
print('User                        : {}'.format(snowflake_environment[0][0]))
print('Role                        : {}'.format(forecast_model.session.get_current_role()))
print('Database                    : {}'.format(forecast_model.session.get_current_database()))
print('Schema                      : {}'.format(forecast_model.session.get_current_schema()))
print('Warehouse                   : {}'.format(forecast_model.session.get_current_warehouse()))
print('Snowflake version           : {}'.format(snowflake_environment[0][1]))
print('Snowpark for Python version : {}.{}.{}'.format(snowpark_version[0], snowpark_version[1], snowpark_version[2]))



Connection Established with the following parameters:
User                        : JD_SERVICE_ACCOUNT_ADMIN
Role                        : "CORTEX_USER_ROLE"
Database                    : "CORTEX"
Schema                      : "DEV"
Warehouse                   : "CORTEX_WH"
Snowflake version           : 8.31.1
Snowpark for Python version : 1.19.0


In [13]:
#| skip
# Create Training Data
training_days = 365
predicted_days = 30

forecast_model.session.sql(f'''CREATE OR REPLACE TABLE storage_usage_train AS
    SELECT 
        TO_TIMESTAMP_NTZ(usage_date) AS usage_date,
        storage_bytes / POWER(1024, 3) AS storage_gb
    FROM snowflake.account_usage.storage_usage
    WHERE TO_TIMESTAMP_NTZ(usage_date) < DATEADD(day, -{training_days}, CURRENT_DATE())
''').collect()
forecast_model.session.sql('SELECT * FROM storage_usage_train LIMIT 10').show()

--------------------------------------------
|"USAGE_DATE"         |"STORAGE_GB"        |
--------------------------------------------
|2024-01-31 00:00:00  |4.904916015453637   |
|2024-05-12 00:00:00  |210.28793087136     |
|2024-07-28 00:00:00  |263.7296593543142   |
|2024-07-03 00:00:00  |260.80696067772806  |
|2024-06-09 00:00:00  |256.271279534325    |
|2024-07-13 00:00:00  |263.72963202372193  |
|2024-02-17 00:00:00  |7.94266801699996    |
|2024-06-24 00:00:00  |256.9257180793211   |
|2024-08-06 00:00:00  |263.74474628735334  |
|2024-07-01 00:00:00  |260.81033707316965  |
--------------------------------------------



In [14]:
# Run Forecast
forecast_data = forecast_model.create_and_run_forecast()
forecast_data.head()

Attempting to create tag: environment
Tag 'environment' already exists.
Attempting to create tag: team
Tag 'team' already exists.
Step 1/4: Creating training table...
Generated SQL:

        CREATE OR REPLACE TEMPORARY TABLE my_forecast_model_20240821_dfayt_train AS
        SELECT TO_TIMESTAMP_NTZ(usage_date) AS usage_date, storage_gb AS storage_gb, * EXCLUDE (usage_date, storage_gb)
        FROM storage_usage_train
        WHERE TO_TIMESTAMP_NTZ(usage_date) >= DATEADD(day, -365, (SELECT MAX(usage_date) FROM storage_usage_train));
Step 2/4: Creating forecast model...
Generated SQL:

        CREATE OR REPLACE SNOWFLAKE.ML.FORECAST my_forecast_model_20240821_dfayt(
            INPUT_DATA => SYSTEM$REFERENCE('table', 'my_forecast_model_20240821_dfayt_train'),
            TIMESTAMP_COLNAME => 'usage_date',
            TARGET_COLNAME => 'storage_gb',
        CONFIG_OBJECT => {'on_error': 'skip', 'evaluate': TRUE, 'evaluation_config': {'n_splits': 2, 'gap': 0, 'prediction_interval': 0.95}}) 

Unnamed: 0,USAGE_DATE,FORECAST,LOWER_BOUND,UPPER_BOUND
0,2024-08-22,135.66905,75.064851,196.273249
1,2024-08-23,178.077473,92.370193,263.784753
2,2024-08-24,200.659917,95.690366,305.629469
3,2024-08-25,200.681777,79.473379,321.890174
4,2024-08-26,215.561086,80.045978,351.076194


In [15]:
# Generate Forecast and Visualization
forecasting_period = 30
confidence_interval = 0.95
forecast_model.generate_forecast_and_visualization(forecasting_period, confidence_interval)

Getting historical max date


            TS      Type   Value Type      Volume
0   2024-08-22  Forecast     FORECAST  135.669050
1   2024-08-23  Forecast     FORECAST  178.077473
2   2024-08-24  Forecast     FORECAST  200.659917
3   2024-08-25  Forecast     FORECAST  200.681777
4   2024-08-26  Forecast     FORECAST  215.561086
..         ...       ...          ...         ...
513 2024-09-16  Forecast  UPPER_BOUND  451.766374
514 2024-09-17  Forecast  UPPER_BOUND  452.910810
515 2024-09-18  Forecast  UPPER_BOUND  462.853923
516 2024-09-19  Forecast  UPPER_BOUND  466.044861
517 2024-09-20  Forecast  UPPER_BOUND  460.832551

[304 rows x 4 columns]
Top 10 Feature Importances


   RANK                          FEATURE_NAME  SCORE             FEATURE_TYPE
0     1                                  lag7   0.09  derived_from_endogenous
1     2                                 lag91   0.08  derived_from_endogenous
2     3  aggregated_endogenous_trend_features   0.07  derived_from_endogenous
3     4                                 lag21   0.07  derived_from_endogenous
4     5                                 lag35   0.07  derived_from_endogenous
5     6                                 lag14   0.06  derived_from_endogenous
6     7                                 lag42   0.06  derived_from_endogenous
7     8                                 lag49   0.06  derived_from_endogenous
8     9                                 lag56   0.06  derived_from_endogenous
9    10                                 lag84   0.05  derived_from_endogenous
Underlying Model Metrics
             ERROR_METRIC  METRIC_VALUE  STANDARD_DEVIATION  LOGS
0                     MAE        18.681            

In [16]:
# Example of 30 days of training data
forecasting_period = 30
confidence_interval = 0.95
forecast_model.generate_forecast_and_visualization(forecasting_period, confidence_interval)

Getting historical max date


            TS      Type   Value Type      Volume
0   2024-08-22  Forecast     FORECAST  135.669050
1   2024-08-23  Forecast     FORECAST  178.077473
2   2024-08-24  Forecast     FORECAST  200.659917
3   2024-08-25  Forecast     FORECAST  200.681777
4   2024-08-26  Forecast     FORECAST  215.561086
..         ...       ...          ...         ...
513 2024-09-16  Forecast  UPPER_BOUND  451.766374
514 2024-09-17  Forecast  UPPER_BOUND  452.910810
515 2024-09-18  Forecast  UPPER_BOUND  462.853923
516 2024-09-19  Forecast  UPPER_BOUND  466.044861
517 2024-09-20  Forecast  UPPER_BOUND  460.832551

[304 rows x 4 columns]
Top 10 Feature Importances


   RANK                          FEATURE_NAME  SCORE             FEATURE_TYPE
0     1                                  lag7   0.09  derived_from_endogenous
1     2                                 lag91   0.08  derived_from_endogenous
2     3  aggregated_endogenous_trend_features   0.07  derived_from_endogenous
3     4                                 lag21   0.07  derived_from_endogenous
4     5                                 lag35   0.07  derived_from_endogenous
5     6                                 lag14   0.06  derived_from_endogenous
6     7                                 lag42   0.06  derived_from_endogenous
7     8                                 lag49   0.06  derived_from_endogenous
8     9                                 lag56   0.06  derived_from_endogenous
9    10                                 lag84   0.05  derived_from_endogenous
Underlying Model Metrics
             ERROR_METRIC  METRIC_VALUE  STANDARD_DEVIATION  LOGS
0                     MAE        18.681            