In [0]:
# Define the three-level namespace for Unity Catalog
CATALOG_NAME = "zenith_online"

SYSTEM_SCHEMA = "_system"
LANDING_SCHEMA = "00_landing"
BRONZE_SCHEMA = "01_bronze"
SILVER_SCHEMA = "02_silver"
GOLD_SCHEMA = "03_gold"

# The data generator wrote data to these UC Volume paths
RAW_STREAMING_PATH = f"/Volumes/{CATALOG_NAME}/{LANDING_SCHEMA}/streaming/user_events"
RAW_BATCH_CUSTOMERS_PATH = f"/Volumes/{CATALOG_NAME}/{LANDING_SCHEMA}/batch/customers"
RAW_BATCH_PRODUCTS_PATH = f"/Volumes/{CATALOG_NAME}/{LANDING_SCHEMA}/batch/products"

# Define UC Volume paths for streaming checkpoints and schema metadata
CHECKPOINT_BASE_PATH = f"/Volumes/{CATALOG_NAME}/{SYSTEM_SCHEMA}/checkpoints"
SCHEMA_BASE_PATH = f"/Volumes/{CATALOG_NAME}/{SYSTEM_SCHEMA}/schemas"

# Full table names
BRONZE_EVENTS_TABLE = f"{CATALOG_NAME}.{BRONZE_SCHEMA}.bronze_user_events"
BRONZE_CUSTOMERS_TABLE = f"{CATALOG_NAME}.{BRONZE_SCHEMA}.bronze_customer_profiles"
BRONZE_PRODUCTS_TABLE = f"{CATALOG_NAME}.{BRONZE_SCHEMA}.bronze_product_details"
SILVER_TABLE = f"{CATALOG_NAME}.{SILVER_SCHEMA}.silver_sessionized_activity"
GOLD_DAILY_PRODUCT_TABLE = f"{CATALOG_NAME}.{GOLD_SCHEMA}.gold_daily_product_performance"
GOLD_CUSTOMER_SUMMARY_TABLE = f"{CATALOG_NAME}.{GOLD_SCHEMA}.customer_purchase_summary"

In [0]:
# Create the Catalog and Schemas if they do not exist
spark.sql(f"CREATE CATALOG IF NOT EXISTS {CATALOG_NAME}")
spark.sql(f"USE CATALOG {CATALOG_NAME}")

In [0]:
# CREATE SCHEMAS
spark.sql(f"CREATE SCHEMA IF NOT EXISTS {SYSTEM_SCHEMA}")
spark.sql(f"CREATE SCHEMA IF NOT EXISTS {LANDING_SCHEMA}")
spark.sql(f"CREATE SCHEMA IF NOT EXISTS {BRONZE_SCHEMA}")
spark.sql(f"CREATE SCHEMA IF NOT EXISTS {SILVER_SCHEMA}")
spark.sql(f"CREATE SCHEMA IF NOT EXISTS {GOLD_SCHEMA}")

In [0]:
# CREATE VOLUMES FOR RAW DATA GENERATOR
spark.sql(f"CREATE VOLUME IF NOT EXISTS {CATALOG_NAME}.{LANDING_SCHEMA}.batch")
spark.sql(f"CREATE VOLUME IF NOT EXISTS {CATALOG_NAME}.{LANDING_SCHEMA}.streaming")

In [0]:
# CREATE DIRECTORIES FOR THE RAW DATA GENERATOR
def ensure_dir_exists(path):
    if not dbutils.fs.ls(path):
        dbutils.fs.mkdirs(path)
streaming_events_path = f"/Volumes/{CATALOG_NAME}/{LANDING_SCHEMA}/streaming/user_events"
batch_customers_path = f"/Volumes/{CATALOG_NAME}/{LANDING_SCHEMA}/batch/customers"
batch_products_path = f"/Volumes/{CATALOG_NAME}/{LANDING_SCHEMA}/batch/products"

for path in [streaming_events_path, batch_customers_path, batch_products_path]:
    try:
        dbutils.fs.ls(path)
    except Exception:
        dbutils.fs.mkdirs(path)

In [0]:
# CREATE VOLUMES FOR CHECKPOINTS AND SCHEMA METADATA
spark.sql(f"CREATE VOLUME IF NOT EXISTS {CATALOG_NAME}.{SYSTEM_SCHEMA}.checkpoints")
spark.sql(f"CREATE VOLUME IF NOT EXISTS {CATALOG_NAME}.{SYSTEM_SCHEMA}.schemas")

In [0]:
print(f"Unity Catalog environment '{CATALOG_NAME}' is ready.")