In [None]:
# Unity Catalog PoC – Rapid Test Suite
# Catalog: `unity_demo`, Schema: `governance_lab`

# Setup: Create catalog and schema
spark.sql("CREATE CATALOG IF NOT EXISTS unity_demo")
spark.sql("USE CATALOG unity_demo")
spark.sql("CREATE SCHEMA IF NOT EXISTS governance_lab")
spark.sql("USE SCHEMA governance_lab")


In [None]:
# Table 1: employee_records
from pyspark.sql import Row
employees = [
    Row(id=1, name="Alice", role="Data Engineer"),
    Row(id=2, name="Bob", role="Data Analyst")
]
spark.createDataFrame(employees).write.mode("overwrite").saveAsTable("unity_demo.governance_lab.employee_records")

spark.sql("""
ALTER TABLE unity_demo.governance_lab.employee_records SET TBLPROPERTIES (
  'data_controller' = 'HR Dept',
  'data_processor' = 'People Analytics',
  'retention_policy' = '3y',
  'owner' = 'hr@datacorp.com',
  'data_agreement_url' = 's3://agreements/hr_policy.pdf',
  'permitted_use' = 'internal use only'
)
""")


In [None]:
# Table 2: customer_orders
orders = [
    Row(order_id=1001, customer_id=501, amount=250.75),
    Row(order_id=1002, customer_id=502, amount=125.00)
]
spark.createDataFrame(orders).write.mode("overwrite").saveAsTable("unity_demo.governance_lab.customer_orders")

spark.sql("""
ALTER TABLE unity_demo.governance_lab.customer_orders SET TBLPROPERTIES (
  'data_controller' = 'Sales Dept',
  'data_processor' = 'E-Commerce Platform',
  'retention_policy' = '2y',
  'owner' = 'sales@datacorp.com',
  'data_agreement_url' = 's3://agreements/sales_dpa.pdf',
  'permitted_use' = 'analytics only'
)
""")


In [None]:
# Table 3: financial_transactions
transactions = [
    Row(txn_id=9001, account="A123", value=990.00),
    Row(txn_id=9002, account="A456", value=1345.50)
]
spark.createDataFrame(transactions).write.mode("overwrite").saveAsTable("unity_demo.governance_lab.financial_transactions")

spark.sql("""
ALTER TABLE unity_demo.governance_lab.financial_transactions SET TBLPROPERTIES (
  'data_controller' = 'Finance Dept',
  'data_processor' = 'Accounting Software',
  'retention_policy' = '6y',
  'owner' = 'finance@datacorp.com',
  'data_agreement_url' = 's3://agreements/finance_compliance.pdf',
  'permitted_use' = 'regulatory + audit only'
)
""")


In [None]:
# REQ-2: View Controller/Processor info for each table
for table in ["employee_records", "customer_orders", "financial_transactions"]:
    print(f"\n🔍 Metadata for: {table}")
    spark.sql(f"SHOW TBLPROPERTIES unity_demo.governance_lab.{table}") \
        .filter("key IN ('data_controller', 'data_processor')") \
        .show()


In [None]:
# REQ-3: Simulate retention enforcement
required_fields = ['data_controller', 'data_processor', 'retention_policy', 'owner']
for table in ["employee_records", "customer_orders", "financial_transactions"]:
    print(f"\nChecking retention policy for {table}")
    props = spark.sql(f"SHOW TBLPROPERTIES unity_demo.governance_lab.{table}").rdd.map(lambda r: (r.key, r.value)).collectAsMap()
    missing = [k for k in required_fields if k not in props]
    if missing:
        print("❌ Missing required metadata fields:", missing)
    else:
        print("✅ All required metadata fields are present.")
        print("📅 Retention policy:", props['retention_policy'])
