# Getting Started with Snowflake Dynamic Tables

This notebook demonstrates how to build declarative data pipelines using Snowflake Dynamic Tables for continuous data transformations, data validation, and alerting. Dynamic Tables automatically refresh based on defined queries and target freshness, simplifying data pipeline management without manual scheduling.

**Original Quickstart:** https://quickstarts.snowflake.com/guide/getting_started_with_dynamic_tables/index.html

## Prerequisites
- **Packages Required:** The Python UDTFs in this demo use the `Faker` package which is available in Snowflake's Anaconda channel. No additional package installation needed.
- **Permissions:** Ensure you have privileges to create databases, schemas, warehouses, tables, and dynamic tables.
- **Change Tracking:** Will be automatically enabled on base tables as needed.


In [None]:
-- Display current session context and connection information
SELECT 
    CURRENT_DATABASE() as current_database,
    CURRENT_SCHEMA() as current_schema,
    CURRENT_WAREHOUSE() as current_warehouse,
    CURRENT_USER() as current_user,
    CURRENT_ROLE() as current_role,
    CURRENT_REGION() as current_region;


In [None]:
-- Set up database, schema, and warehouse for the demo
CREATE DATABASE IF NOT EXISTS DEMO;
CREATE SCHEMA IF NOT EXISTS DEMO.DT_DEMO;
USE SCHEMA DEMO.DT_DEMO;

CREATE WAREHOUSE IF NOT EXISTS XSMALL_WH 
    WAREHOUSE_TYPE = STANDARD
    WAREHOUSE_SIZE = XSMALL
    AUTO_SUSPEND = 300
    AUTO_RESUME = TRUE;


## Sample Data Generation

We'll create three source tables using Python UDTFs to generate realistic sample data:
1. **CUST_INFO** - Customer information with spending limits
2. **PROD_STOCK_INV** - Product inventory with stock levels
3. **SALESDATA** - Raw sales transactions in JSON format


In [None]:
-- Create Python UDTF to generate customer information data
CREATE OR REPLACE FUNCTION gen_cust_info(num_records number)
RETURNS TABLE (custid number(10), cname varchar(100), spendlimit number(10,2))
LANGUAGE PYTHON
RUNTIME_VERSION=3.10
HANDLER='CustTab'
PACKAGES = ('Faker')
AS $$
from faker import Faker
import random

fake = Faker()
# Generate a list of customers  

class CustTab:
    # Generate multiple customer records
    def process(self, num_records):
        customer_id = 1000 # Starting customer ID                 
        for _ in range(num_records):
            custid = customer_id + 1
            cname = fake.name()
            spendlimit = round(random.uniform(1000, 10000),2)
            customer_id += 1
            yield (custid,cname,spendlimit)
$$;




In [None]:
-- Create Python UDTF to generate product inventory data
--DROP FUNCTION gen_prod_inv() ;
CREATE OR REPLACE FUNCTION gen_prod_inv()
RETURNS TABLE (product_id number(10), pname varchar(100), stock number(10,2), stockdate date)
LANGUAGE PYTHON
RUNTIME_VERSION=3.10
HANDLER='ProdTab'
PACKAGES = ('Faker')
AS $$
from faker import Faker
import random
from datetime import datetime, timedelta
fake = Faker()

class ProdTab:
    # Generate multiple product records
    def process(self):
        product_id = 100 # initialize product_id                 
        for _ in range(100):
            product_id += 1
            pname = fake.catch_phrase()
            stock = round(random.uniform(500, 1000),0)
            # Get the current date
            current_date = datetime.now()
            
            # Calculate the maximum date (3 months from now)
            min_date = current_date - timedelta(days=90)
            
            # Generate a random date within the date range
            stockdate = fake.date_between_dates(min_date,current_date)
            
            yield (product_id,pname,stock,stockdate)
$$;




In [None]:
-- Create Python UDTF to generate sales transaction data
CREATE OR REPLACE FUNCTION gen_cust_purchase(start_date date)
RETURNS TABLE (custid number(10), product_id number(10), quantity number(5), purchase_amount number(10,2), purchase_date date)
LANGUAGE PYTHON
RUNTIME_VERSION=3.10
HANDLER='genCustPurchase'
PACKAGES = ('Faker')
AS $$
from faker import Faker
import random
from datetime import timedelta

fake = Faker()

class genCustPurchase:
    # Generate customer purchase records for 28 days with 50 purchases per day
    def process(self, start_date):
        # Generate 28 days of data with 50 purchases per day
        for day_offset in range(28):
            purchase_date = start_date + timedelta(days=day_offset)
            
            # Generate 50 purchases for this specific date
            for _ in range(50):
                # c_id = fake.random_int(min=1001, max=1999)
                c_id = 1000 + (10 * purchase_date.month) +  random.randint(0, 9)
                product_id = fake.random_int(min=101, max=199)
                quantity = fake.random_int(min=1, max=5)
                purchase_amount = round(random.uniform(10, 1000), 2)
                yield (c_id, product_id, quantity, purchase_amount, purchase_date)
$$;

In [None]:
CREATE OR REPLACE TABLE cust_info AS 
SELECT * FROM table(gen_cust_info(1000)) ORDER BY 1;

In [None]:
CREATE OR REPLACE TABLE prod_stock_inv AS 
SELECT * FROM table(gen_prod_inv()) ORDER BY 1;

In [None]:
-- Create table and insert records for Jan 2025
CREATE OR REPLACE TABLE salesdata AS 
SELECT * FROM table(gen_cust_purchase('2025-01-01'::DATE));

## Verify the Generation of Data

In [None]:
-- Verify sample data has been created successfully
-- Customer information table, each customer has spending limits
SELECT 'CUST_INFO' as table_name, COUNT(*) as record_count FROM cust_info
UNION ALL
-- Product stock table, each product has stock level from fulfillment day
SELECT 'PROD_STOCK_INV' as table_name, COUNT(*) as record_count FROM prod_stock_inv
UNION ALL
-- Sales data for products purchased online by various customers
SELECT 'SALESDATA' as table_name, COUNT(*) as record_count FROM salesdata;


In [None]:
-- Preview sample data from customer info table
SELECT * FROM cust_info LIMIT 5;


In [None]:
-- Preview sample data from product inventory table
SELECT * FROM prod_stock_inv LIMIT 5;


In [None]:
-- Preview sample data from sales transactions table
SELECT * FROM salesdata LIMIT 5;


## Query Types Showcase: Basic Incremental Operations

Dynamic Tables support different types of queries that are optimized for incremental refresh mode. These operations can efficiently process only the changed data rather than requiring a full table refresh. Let's demonstrate the four basic incremental operation types:

1. **Simple Aggregations** - Basic SUM, COUNT, AVG operations on streaming data
2. **Filter Operations** - WHERE clauses filtering on timestamp or ID columns  
3. **Simple Joins** - INNER and LEFT JOINs between tables with proper join keys
4. **UNION Operations** - Combining similar datasets with UNION ALL

Each example will create a Dynamic Table that showcases these incremental-friendly patterns.

DOCUMENTATION: https://docs.snowflake.com/en/user-guide/dynamic-tables-supported-queries


In [None]:
-- Example 1: Simple Aggregations Dynamic Table
-- Demonstrates basic SUM, COUNT, AVG operations optimal for incremental refresh
CREATE OR REPLACE DYNAMIC TABLE daily_sales_summary
    LAG = '1 MINUTE'
    WAREHOUSE = XSMALL_WH
AS
SELECT 
    purchase_date as sales_date,
    COUNT(*) as total_transactions,
    SUM(purchase_amount) as total_revenue,
    AVG(purchase_amount) as avg_transaction_amount,
    SUM(quantity) as total_units_sold,
    COUNT(DISTINCT custid) as unique_customers,
FROM salesdata sd
GROUP BY sales_date;


In [None]:
-- Example 2: Filter Operations Dynamic Table  
-- Demonstrates WHERE clauses filtering on timestamp/ID columns for incremental processing
CREATE OR REPLACE DYNAMIC TABLE high_value_sales_lastyr
    LAG = '1 MINUTE'
    WAREHOUSE = XSMALL_WH
AS
SELECT 
    sd.custid as customer_id,
    sd.product_id ,
    sd.purchase_amount as saleprice,
    sd.quantity,
    sd.purchase_date
FROM salesdata sd
WHERE 
    sd.purchase_amount > 500  -- Filter for high-value transactions
    AND sd.purchase_date >= DATEADD('day', -365, CURRENT_DATE())  -- Last year transactions only
    AND sd.custid % 10 < 3;  -- Specific customer segment


In [None]:
-- Example 3: Simple Joins Dynamic Table
-- Demonstrates INNER JOINs with proper join keys for incremental processing
CREATE OR REPLACE DYNAMIC TABLE sales_report
    LAG = '1 MINUTE'
    WAREHOUSE=XSMALL_WH
AS
    SELECT
        sd.purchase_date,
        sd.custid as customer_id,
        c.cname as customer_name,
        sd.product_id ,
        p.pname as product_name,        
        sd.purchase_amount as saleprice,
        sd.quantity,
        (sd.purchase_amount/sd.quantity) as unitsalesprice        
    FROM salesdata sd 
    INNER JOIN prod_stock_inv p ON sd.product_id = p.product_id
    INNER JOIN cust_info c ON sd.custid = c.custid
        ;


In [None]:
--Example 4: UNION Operations Dynamic Table
--Demonstrates combining similar datasets with UNION ALL for incremental processing
CREATE OR REPLACE DYNAMIC TABLE unified_transaction_log
    LAG = '1 MINUTE'
    WAREHOUSE = XSMALL_WH
AS
-- High value customers (>= 750 total sales)
SELECT 
    'HIGH_VALUE' as transaction_category,
    customer_id,
    customer_name,
    purchase_date,
    sum(saleprice) as total_sales
FROM sales_report 
GROUP BY ALL
HAVING total_sales >= 750

UNION

-- Medium value customers (250-749 total sales)
SELECT 
    'MEDIUM_VALUE' as transaction_category,
    customer_id,
    customer_name,
    purchase_date,
    sum(saleprice) as total_sales
FROM sales_report 
GROUP BY ALL
HAVING total_sales >= 250 AND total_sales < 750

UNION

-- Low value customers (< 250 total sales)
SELECT 
    'LOW_VALUE' as transaction_category,
    customer_id,
    customer_name,
    purchase_date,
    sum(saleprice) as total_sales
FROM sales_report 
GROUP BY ALL
HAVING total_sales < 250;


## Verify the Dynamic Tables Group 1 Pipelines

In [None]:
SELECT * FROM DAILY_SALES_SUMMARY LIMIT 5;

In [None]:
SELECT * FROM HIGH_VALUE_SALES_LASTYR LIMIT 5;

In [None]:
SELECT * FROM SALES_REPORT LIMIT 5;

In [None]:
SELECT * FROM UNIFIED_TRANSACTION_LOG LIMIT 5;

### Testing Incremental Refresh Behavior

Now let's test the incremental refresh behavior of our Dynamic Tables by:
1. Adding new sales data to trigger automatic refreshes
2. Monitoring the refresh history to verify incremental processing
3. Comparing before/after record counts to confirm updates


In [None]:
-- Verify the new Dynamic Tables were created successfully
SELECT '01.DAILY_SALES_SUMMARY' as table_name, COUNT(*) as record_count FROM daily_sales_summary
UNION ALL
SELECT '02.HIGH_VALUE_SALES_LASTYR' as table_name, COUNT(*) as record_count FROM high_value_sales_lastyr
UNION ALL
SELECT '03.SALES_REPORT' as table_name, COUNT(*) as record_count FROM sales_report
UNION ALL
SELECT '04.UNIFIED_TRANSACTION_LOG' as table_name, COUNT(*) as record_count FROM unified_transaction_log;


In [None]:
-- Test the Dynamic Tables pipeline by adding new data for Feb 2025
INSERT INTO salesdata SELECT * FROM table(gen_cust_purchase('2025-02-01'::DATE));

### WAIT! It can take 1 minute for automatic refresh

In [None]:
-- Verify the new Dynamic Tables were created successfully
SELECT '01.DAILY_SALES_SUMMARY' as table_name, COUNT(*) as record_count FROM daily_sales_summary
UNION ALL
SELECT '02.HIGH_VALUE_SALES_LASTYR' as table_name, COUNT(*) as record_count FROM high_value_sales_lastyr
UNION ALL
SELECT '03.SALES_REPORT' as table_name, COUNT(*) as record_count FROM sales_report
UNION ALL
SELECT '04.UNIFIED_TRANSACTION_LOG' as table_name, COUNT(*) as record_count FROM unified_transaction_log;


In [None]:
-- Check Dynamic Table refresh history to verify incremental vs full refresh behavior
-- REFRESH_TYPE indicates whether the refresh was 'INCREMENTAL' or 'FULL'
SELECT 
    NAME as dynamic_table_name,
    REFRESH_ACTION,
    REFRESH_TRIGGER,
    DATA_TIMESTAMP,
    REFRESH_START_TIME,
    REFRESH_END_TIME,
    STATISTICS
FROM 
    TABLE(INFORMATION_SCHEMA.DYNAMIC_TABLE_REFRESH_HISTORY())
WHERE 
    NAME IN ('DAILY_SALES_SUMMARY', 'HIGH_VALUE_SALES_LASTYR', 'SALES_REPORT', 'UNIFIED_TRANSACTION_LOG')
    AND REFRESH_START_TIME >= DATEADD('minutes', -10, CURRENT_TIMESTAMP())
    AND REFRESH_ACTION <> 'NO_DATA'
ORDER BY 
    NAME, DATA_TIMESTAMP DESC, REFRESH_END_TIME DESC;


Oh No!  Two of our DTs are doing FULL refresh?  What's up?

ISSUE: This is because the default REFRESH_MODE is AUTO, which chooses FULL refresh if it's likely not to perform well incrementally (eg. from multiple joins or unions with aggs).

FIX: It's fine to explicitly use REFRESH_MODE=INCREMENTAL if your DT should work well incrementally.

Let's try it!

In [None]:
-- Example 3: Simple Joins Dynamic Table
-- Demonstrates INNER JOINs with proper join keys for incremental processing
CREATE OR REPLACE DYNAMIC TABLE sales_report
    LAG = '1 MINUTE'
    WAREHOUSE=XSMALL_WH
    REFRESH_MODE=INCREMENTAL
AS
    SELECT
        sd.purchase_date,
        sd.custid as customer_id,
        c.cname as customer_name,
        sd.product_id ,
        p.pname as product_name,        
        sd.purchase_amount as saleprice,
        sd.quantity,
        (sd.purchase_amount/sd.quantity) as unitsalesprice        
    FROM salesdata sd 
    INNER JOIN prod_stock_inv p ON sd.product_id = p.product_id
    INNER JOIN cust_info c ON sd.custid = c.custid
        ;


In [None]:
--Example 4: UNION Operations Dynamic Table
--Demonstrates combining similar datasets with UNION ALL for incremental processing
CREATE OR REPLACE DYNAMIC TABLE unified_transaction_log
    LAG = '1 MINUTE'
    WAREHOUSE = XSMALL_WH
    REFRESH_MODE=INCREMENTAL    
AS
-- High value customers (>= 750 total sales)
SELECT 
    'HIGH_VALUE' as transaction_category,
    customer_id,
    customer_name,
    purchase_date,
    sum(saleprice) as total_sales
FROM sales_report 
GROUP BY ALL
HAVING total_sales >= 750

UNION

-- Medium value customers (250-749 total sales)
SELECT 
    'MEDIUM_VALUE' as transaction_category,
    customer_id,
    customer_name,
    purchase_date,
    sum(saleprice) as total_sales
FROM sales_report 
GROUP BY ALL
HAVING total_sales >= 250 AND total_sales < 750

UNION

-- Low value customers (< 250 total sales)
SELECT 
    'LOW_VALUE' as transaction_category,
    customer_id,
    customer_name,
    purchase_date,
    sum(saleprice) as total_sales
FROM sales_report 
GROUP BY ALL
HAVING total_sales < 250;


In [None]:
-- Test the Dynamic Tables pipeline by adding new data for Feb 2025 again
INSERT INTO salesdata SELECT * FROM table(gen_cust_purchase('2025-02-01'::DATE));

In [None]:
-- Check Dynamic Table refresh history to verify incremental vs full refresh behavior
-- REFRESH_TYPE indicates whether the refresh was 'INCREMENTAL' or 'FULL'
SELECT 
    NAME as dynamic_table_name,
    REFRESH_ACTION,
    REFRESH_TRIGGER,
    DATA_TIMESTAMP,
    REFRESH_START_TIME,
    REFRESH_END_TIME,
    STATISTICS
FROM 
    TABLE(INFORMATION_SCHEMA.DYNAMIC_TABLE_REFRESH_HISTORY())
WHERE 
    NAME IN ('DAILY_SALES_SUMMARY', 'HIGH_VALUE_SALES_LASTYR', 'SALES_REPORT', 'UNIFIED_TRANSACTION_LOG')
    AND REFRESH_START_TIME >= DATEADD('minutes', -10, CURRENT_TIMESTAMP())
    AND REFRESH_ACTION <> 'NO_DATA'
ORDER BY 
    NAME, DATA_TIMESTAMP DESC, REFRESH_END_TIME DESC;


## hurray!
Everything is running on **Incremental** refreshes!


In [None]:
 DROP SCHEMA DEMO.DT_DEMO CASCADE;
 DROP DATABASE DEMO;
