In [None]:
%pip install -r requirements.txt
%restart_python

In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *
from delta.tables import DeltaTable
from datetime import datetime, timedelta
import time

In [None]:
%load_ext autoreload
%autoreload 2

from lakebase_utils import LakebaseConnection

In [None]:
dbutils.widgets.text("user", "lars.liahagen@databricks.com")
username = dbutils.widgets.get("user")
dbutils.widgets.text("lakebase_instance_name", "smart-stock-db")
lakebase_instance_name = dbutils.widgets.get("lakebase_instance_name")
dbutils.widgets.text("catalog", "smart_stock")
catalog = dbutils.widgets.get("catalog")
dbutils.widgets.text("schema_silver", "smart_stock_silver")
schema_silver = dbutils.widgets.get("schema_silver")
dbutils.widgets.text("schema_gold", "smart_stock_gold")
schema_gold = dbutils.widgets.get("schema_gold")


In [0]:
# =============================================
# CONFIGURATION
# =============================================

# PostgreSQL connection
conn = LakebaseConnection(username, lakebase_instance_name)

print("✅ Setup complete")

✅ Setup complete


In [0]:
def update_dim_products():
    """
    Update product dimension table - Run once daily at 6 AM
    Simple full refresh since products table is small
    """
    
    print(f"\n📦 Updating dim_products at {datetime.now()}")
    
    # Read all products from PostgreSQL
    products_df = conn.execute_query("SELECT * FROM products")
    if products_df.empty:
        print("  ℹ️ No products found")
        return 0
    products_df = spark.createDataFrame(products_df)
    
    # Create dimension with business logic
    dim_products = products_df.select(
        col("product_id"),
        col("name").alias("product_name"),
        col("category"),
        col("sku"),
        col("price"),
        col("unit"),
        col("reorder_level"),
        # Add derived columns for easier reporting
        when(col("price") >= 500, "High Value") \
            .when(col("price") >= 100, "Medium Value") \
            .otherwise("Low Value").alias("price_tier"),
        when(col("category").isin("Motors", "Batteries", "Frames"), "Core Component") \
            .when(col("category").isin("Wheels", "Brakes", "Drivetrain"), "Major Component") \
            .otherwise("Accessory").alias("component_type"),
        current_timestamp().alias("last_updated")
    )
    
    # Overwrite dimension table (products don't change often)
    dim_products.write \
        .mode("overwrite") \
        .option("overwriteSchema", "true") \
        .saveAsTable(f"`{catalog}`.`{schema_silver}`.dim_products")
    
    record_count = dim_products.count()
    print(f"✅ Updated dim_products with {record_count} products")
    
    return record_count


In [0]:
update_dim_products()


📦 Updating dim_products at 2025-09-16 08:00:44.364715
✅ Updated dim_products with 41 products


41