# üîÑ Snowflake Dynamic Tables - Data Engineering Demo

This notebook demonstrates end-to-end data engineering with:
- **Bronze Layer**: Raw data ingestion
- **Silver Layer**: Cleaned & transformed data (Dynamic Tables)
- **Gold Layer**: Business aggregations (Dynamic Tables)

## Prerequisites
Run the SQL setup script first: `sql/01_setup_dynamic_tables.sql`

## To Import into Snowflake Notebooks:
1. Go to Snowflake ‚Üí Notebooks
2. Click "Import" or drag this .ipynb file
3. Select your warehouse and run!


## 1Ô∏è‚É£ Setup & Configuration


In [None]:
# Imports and Snowflake session
import json
import random
import uuid
from datetime import datetime, timedelta

# Get Snowflake session (works in Snowflake Notebooks)
from snowflake.snowpark.context import get_active_session
session = get_active_session()

# Configuration
DATABASE = "DATA_ENGINEERING_DEMO"
WAREHOUSE = "WH_DATA_ENG"
session.sql(f"USE DATABASE {DATABASE}").collect()
session.sql(f"USE WAREHOUSE {WAREHOUSE}").collect()
print(f"‚úÖ Connected to {DATABASE}")


## 2Ô∏è‚É£ Data Generators


In [None]:
# Sensor Data Generator
class SensorDataGenerator:
    SENSOR_TYPES = ["TEMPERATURE", "PRESSURE", "HUMIDITY", "VIBRATION"]
    DEVICES = [f"DEVICE_{str(i).zfill(3)}" for i in range(1, 51)]
    
    @classmethod
    def generate_batch(cls, size: int) -> list:
        data = []
        for _ in range(size):
            sensor_type = random.choice(cls.SENSOR_TYPES)
            device_id = random.choice(cls.DEVICES)
            ranges = {"TEMPERATURE": (15, 85), "PRESSURE": (100, 500), "HUMIDITY": (20, 80), "VIBRATION": (0, 100)}
            min_v, max_v = ranges[sensor_type]
            value = random.uniform(max_v * 1.5, max_v * 2) if random.random() < 0.05 else random.uniform(min_v, max_v)
            
            data.append({
                "DEVICE_ID": device_id, "SENSOR_TYPE": sensor_type,
                "READING_VALUE": round(value, 2), "READING_UNIT": {"TEMPERATURE": "CELSIUS", "PRESSURE": "PSI", "HUMIDITY": "PERCENT", "VIBRATION": "MM/S"}[sensor_type],
                "READING_TIMESTAMP": datetime.now() - timedelta(seconds=random.randint(0, 3600)),
                "RAW_PAYLOAD": json.dumps({"raw": True, "device": device_id}),
                "SOURCE_FILE": f"iot_{datetime.now().strftime('%Y%m%d')}.json"
            })
        return data


In [None]:
# Transaction Data Generator
class TransactionDataGenerator:
    PRODUCTS = [f"SKU_{str(i).zfill(4)}" for i in range(1, 201)]
    
    @classmethod
    def generate_batch(cls, size: int) -> list:
        data = []
        for _ in range(size):
            data.append({
                "TRANSACTION_ID": f"TXN_{uuid.uuid4().hex[:16].upper()}",
                "CUSTOMER_ID": random.randint(10001, 10500),
                "PRODUCT_SKU": random.choice(cls.PRODUCTS),
                "QUANTITY": random.randint(1, 10),
                "UNIT_PRICE": round(random.uniform(10, 500), 2),
                "TRANSACTION_TYPE": random.choice(["SALE", "SALE", "SALE", "RETURN", "EXCHANGE"]),
                "TRANSACTION_TIME": datetime.now() - timedelta(hours=random.randint(0, 72)),
                "RAW_DATA": json.dumps({"source": "POS", "store_id": random.randint(1, 50)})
            })
        return data


In [None]:
# Customer Event Generator
class CustomerEventGenerator:
    EVENT_TYPES = ["PAGE_VIEW", "CLICK", "ADD_TO_CART", "PURCHASE", "SEARCH"]
    
    @classmethod
    def generate_batch(cls, num_sessions: int) -> list:
        data = []
        for _ in range(num_sessions):
            session_id = f"SESS_{uuid.uuid4().hex[:24]}"
            customer_id = random.randint(10001, 10500)
            device = random.choice(["mobile", "desktop", "tablet"])
            base_time = datetime.now() - timedelta(hours=random.randint(0, 168))
            
            for i in range(random.randint(3, 15)):
                data.append({
                    "EVENT_ID": f"EVT_{uuid.uuid4().hex[:24].upper()}",
                    "SESSION_ID": session_id, "CUSTOMER_ID": customer_id,
                    "EVENT_TYPE": "PAGE_VIEW" if i == 0 else random.choice(cls.EVENT_TYPES),
                    "EVENT_PROPERTIES": json.dumps({"device": device}),
                    "PAGE_URL": f"/page/{random.randint(1, 100)}",
                    "USER_AGENT": f"Mozilla/5.0 ({device})",
                    "IP_ADDRESS": f"{random.randint(1,255)}.{random.randint(1,255)}.{random.randint(1,255)}.{random.randint(1,255)}",
                    "EVENT_TIMESTAMP": base_time + timedelta(seconds=i * random.randint(5, 60))
                })
        return data


## 3Ô∏è‚É£ Ingest Data into Bronze Layer


In [None]:
# Ingest Sensor Data
import pandas as pd

data = SensorDataGenerator.generate_batch(500)
df = session.create_dataframe(pd.DataFrame(data))
df.write.mode("append").save_as_table("BRONZE.RAW_SENSOR_READINGS")
print(f"‚úÖ Inserted 500 sensor readings")


In [None]:
# Ingest Transactions
data = TransactionDataGenerator.generate_batch(200)
df = session.create_dataframe(pd.DataFrame(data))
df.write.mode("append").save_as_table("BRONZE.RAW_TRANSACTIONS")
print(f"‚úÖ Inserted 200 transactions")


In [None]:
# Ingest Customer Events
data = CustomerEventGenerator.generate_batch(50)
df = session.create_dataframe(pd.DataFrame(data))
df.write.mode("append").save_as_table("BRONZE.RAW_CUSTOMER_EVENTS")
print(f"‚úÖ Inserted {len(data)} customer events from 50 sessions")


In [None]:
# Check Bronze layer row counts
print("üì¶ Bronze Layer Row Counts:")
for table in ["RAW_SENSOR_READINGS", "RAW_TRANSACTIONS", "RAW_CUSTOMER_EVENTS"]:
    count = session.sql(f"SELECT COUNT(*) as cnt FROM BRONZE.{table}").collect()[0]['CNT']
    print(f"   {table}: {count:,} rows")


In [None]:
# Check Dynamic Tables status
print("üîÑ Dynamic Tables:")
session.sql("SHOW DYNAMIC TABLES IN DATABASE DATA_ENGINEERING_DEMO").to_pandas()
