# 🏗️ REQ-1: Register Datasets with Metadata at Ingestion

This notebook demonstrates REQ-1 of our Unity Catalog governance requirements:

> ✅ "As a platform engineer, I need to register datasets with metadata at ingestion. Metadata fields include data controller, processor, retention policy, and owner."

Catalog: `unity_demo`  
Schema: `governance_lab`  


In [None]:
# Setup catalog and schema
spark.sql("CREATE CATALOG IF NOT EXISTS unity_demo")
spark.sql("USE CATALOG unity_demo")
spark.sql("CREATE SCHEMA IF NOT EXISTS governance_lab")
spark.sql("USE SCHEMA governance_lab")


In [None]:
# Create tables with metadata
from pyspark.sql import Row

# Employee table
employees = [Row(id=1, name="Alice", role="Engineer"),
             Row(id=2, name="Bob", role="Analyst")]
spark.createDataFrame(employees).write.mode("overwrite").saveAsTable("unity_demo.governance_lab.employee_records")
spark.sql("""
ALTER TABLE unity_demo.governance_lab.employee_records SET TBLPROPERTIES (
  'data_controller' = 'HR Department',
  'data_processor' = 'People Analytics Team',
  'retention_policy' = '3y',
  'owner' = 'hr@datacorp.com',
  'data_agreement_url' = 's3://agreements/hr_policy.pdf',
  'permitted_use' = 'internal only'
)
""")

# Orders table
orders = [Row(order_id=1001, customer_id=501, amount=250.75),
          Row(order_id=1002, customer_id=502, amount=125.00)]
spark.createDataFrame(orders).write.mode("overwrite").saveAsTable("unity_demo.governance_lab.customer_orders")
spark.sql("""
ALTER TABLE unity_demo.governance_lab.customer_orders SET TBLPROPERTIES (
  'data_controller' = 'Sales Department',
  'data_processor' = 'E-Commerce Platform',
  'retention_policy' = '2y',
  'owner' = 'sales@datacorp.com',
  'data_agreement_url' = 's3://agreements/sales_dpa.pdf',
  'permitted_use' = 'analytics only'
)
""")

In [None]:
# Confirm key metadata fields
for tbl in ["employee_records", "customer_orders"]:
    print(f"\n▶️ Metadata for: {tbl}")
    spark.sql(f"SHOW TBLPROPERTIES unity_demo.governance_lab.{tbl}") \
        .filter("key in ('data_controller', 'data_processor', 'retention_policy', 'owner')") \
        .show()
