### Databricks notebook source 
Publish Approved Views (Spark-based)# This notebook:
1. Reads the candidate queries table for `status='approved'`
2. Creates or replaces the corresponding UC views using Spark SQL
3. Updates the status to `published`

In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col

spark = SparkSession.builder.getOrCreate()

CATALOG='finance'
SCHEMA='kyc_gold'
CAND = f'{CATALOG}.{SCHEMA}.ai_sql_candidates'
REPORT_CAND = f'{CATALOG}.{SCHEMA}.report_candidates'

# 1) Ensure report_candidates exists
spark.sql(f"""
CREATE TABLE IF NOT EXISTS {REPORT_CAND} (
  id STRING,
  report_name STRING,
  dataset_view STRING,
  chart_type STRING,
  filters STRING,
  owner STRING,
  status STRING,            -- PENDING, APPROVED, REJECTED, PUBLISHED
  dashboard_id STRING,
  report_url STRING,
  submitted_at TIMESTAMP,
  decision_at TIMESTAMP,
  published_at TIMESTAMP
)
""")

# 2) Fetch APPROVED AI SQL
rows = spark.sql(f"SELECT id, report_name, generated_sql FROM {CAND} WHERE status ='APPROVED'").collect()
print(f"Approved items: {len(rows)}")

# 3) For each, create/replace the view, grant, mark PUBLISHED, and seed a report candidate (if not already present)
current_user = spark.sql("SELECT current_user() AS u").collect()[0]["u"]

for r in rows:
    view_name = r["report_name"]
    sql_text = r["generated_sql"]

    # Create/replace the view
    spark.sql(f"CREATE OR REPLACE VIEW {CATALOG}.{SCHEMA}.{view_name} AS {sql_text}")
    spark.sql(f"ALTER VIEW {CATALOG}.{SCHEMA}.{view_name} SET TBLPROPERTIES ('rf.generated'='true')")
    #spark.sql(f"GRANT SELECT ON VIEW {CATALOG}.{SCHEMA}.{view_name} TO `{GRANT_GROUP}`")
    spark.sql(f"UPDATE {CAND} SET status='PUBLISHED', published_at=current_timestamp() WHERE id='{r['id']}'")
    print(f"Published view: {view_name}")

    # ---- Heuristics to propose a default report row ----
    # Infer a chart type: if there is >=1 numeric and >=1 non-numeric column -> bar; else table
    df = spark.table(f"{CATALOG}.{SCHEMA}.{view_name}")
    dtypes = dict(df.dtypes)
    num_cols = [c for c,t in dtypes.items() if t.lower() not in ('string','date','timestamp')]
    cat_cols = [c for c,t in dtypes.items() if t.lower() in ('string','date','timestamp')]
    default_chart = "bar" if (num_cols and cat_cols) else "table"

    # Derive a simple report name from view name
    report_name = view_name

    # Avoid duplicates: only insert if no PENDING/APPROVED/PUBLISHED row exists for this dataset_view
    existing = spark.sql(f"""
      SELECT COUNT(*) AS c FROM {REPORT_CAND}
      WHERE dataset_view = '{view_name}'
        AND status IN ('PENDING','APPROVED','PUBLISHED')
    """).collect()[0]["c"]

    if existing == 0:
        # Use Spark SQL uuid() for the id
        spark.sql(f"""
          INSERT INTO {REPORT_CAND}
          SELECT
             uuid() AS id,
             '{report_name}' AS report_name,
             '{view_name}' AS dataset_view,
             '{default_chart}' AS chart_type,
             '' AS filters,
             '{current_user}' AS owner,
             'PENDING' AS status,
             '' AS dashboard_id,
             '' AS report_url,
             current_timestamp() AS submitted_at,
             NULL AS decision_at,
             NULL AS published_at
        """)
        print(f"Seeded report candidate for view: {view_name} (chart={default_chart})")
    else:
        print(f"Report candidate already exists for view: {view_name}")

print("Publish + report candidate seeding complete.")