In [None]:
import streamlit as st
import snowflake.snowpark.functions as F

session = get_active_session()

Let us have the Vancover sales data handy for reference.

In [None]:
SELECT * FROM vancouver_sales;

Create a view to fetch all the Canadian Holidays within our sales forecast data range i.e. after `2022-01-01`

In [None]:
CREATE OR REPLACE VIEW canadian_holidays AS (
    SELECT
        date,
        holiday_name,
        is_financial
    FROM
        frostbyte_cs_public.cybersyn.public_holiday_calendar
    WHERE
        ISO_ALPHA2 LIKE 'CA'
        AND date > '2022-01-01'
        AND (
            subdivision IS null
            OR subdivision LIKE 'BC'
        )
    ORDER BY
        date ASC
);

Create a dataset for multiple items namely `Mothers Favorite`, `Bottled Soda`, `Ice Tea` along with the Canadian holidays. The holidays inthe dataset helps us to check if the holidays is having an impact on the sales and the subsequent forecast.

In [None]:
CREATE OR REPLACE VIEW allitems_vancouver AS (
    SELECT
        vs.timestamp,
        vs.menu_item_name,
        vs.total_sold,
        ch.holiday_name
    FROM 
        vancouver_sales vs
        LEFT JOIN canadian_holidays ch ON vs.timestamp = ch.date
    WHERE MENU_ITEM_NAME IN ('Mothers Favorite', 'Bottled Soda', 'Ice Tea')
);

Build the mutli-series forecast model. 

In [None]:
CREATE OR REPLACE forecast vancouver_forecast (
    INPUT_DATA => TABLE(allitems_vancouver),
    SERIES_COLNAME => 'MENU_ITEM_NAME',
    TIMESTAMP_COLNAME => 'TIMESTAMP',
    TARGET_COLNAME => 'TOTAL_SOLD'
);

In [None]:
SHOW forecast;

In [None]:
vancouver_sales.to_df().select(F.max("timestamp"))

Build the inference dataset on which we will do the predictions.

In [None]:
CREATE OR REPLACE VIEW vancouver_forecast_data AS (
    WITH future_dates AS (
        SELECT
            '2023-05-28' ::DATE + row_number() OVER (
                ORDER BY
                    0
            ) AS timestamp
        FROM
            TABLE(generator(rowcount => 10))
    ),
    food_items AS (
        SELECT
            DISTINCT menu_item_name
        FROM
            allitems_vancouver
    ),
    joined_menu_items AS (
        SELECT
            *
        FROM
            food_items
            CROSS JOIN future_dates
        ORDER BY
            menu_item_name ASC,
            timestamp ASC
    )
    SELECT
        jmi.menu_item_name,
        to_timestamp_ntz(jmi.timestamp) AS timestamp,
        ch.holiday_name
    FROM
        joined_menu_items AS jmi
        LEFT JOIN canadian_holidays ch ON jmi.timestamp = ch.date
    ORDER BY
        jmi.menu_item_name ASC,
        jmi.timestamp ASC
);

Run the predictions i.e. do the multi-series forecasting.

In [None]:
CALL vancouver_forecast!forecast(
        INPUT_DATA => TABLE(vancouver_forecast_data),
        SERIES_COLNAME => 'menu_item_name',
        TIMESTAMP_COLNAME => 'timestamp'
);

Save the predections to the table `vancouver_predictions`

In [None]:
vancouver_predictions.to_df().write.mode("overwrite").save_as_table("vancouver_predictions")

Analyse the importance of features.

In [None]:

CALL VANCOUVER_FORECAST!explain_feature_importance();

Explore vital metrics from the model.

In [None]:
CALL VANCOUVER_FORECAST!show_evaluation_metrics();

Pull out all features(variables) that did not have any impact in the prediction, the ones with `SCORE == 0`

In [None]:
feature_importance.to_df().select(F.col("FEATURE_NAME")).filter(F.col("SCORE") == 0)