parameters:
- action_name
- connection_name
- gateway_pipeline_id
- ingestion_pipeline_id
- job_id
- stage_catalog
- stage_schema

In [None]:
# set default widget / parameters values
action_name=dbutils.widgets.text("action_name","")

connection_name=dbutils.widgets.text("connection_name","")
gateway_pipeline_name=dbutils.widgets.text('gateway_pipeline_name',"")
gateway_pipeline_id=dbutils.widgets.text('gateway_pipeline_id',"")
ingestion_pipeline_name=dbutils.widgets.text('ingestion_pipeline_name',"")
ingestion_pipeline_id=dbutils.widgets.text('ingestion_pipeline_id',"")
job_name=dbutils.widgets.text('job_name',"")
job_id=dbutils.widgets.text('job_id',"")

elog_catalog=dbutils.widgets.text('elog_catalog',"")
elog_schema=dbutils.widgets.text('elog_schema',"")
stage_catalog=dbutils.widgets.text('stage_catalog',"")
stage_schema=dbutils.widgets.text('stage_schema',"")
target_catalog=dbutils.widgets.text('target_catalog',"")
target_schema=dbutils.widgets.text('target_schema',"")

connection_created=dbutils.widgets.text('connection_created',"")
stage_created=dbutils.widgets.text('stage_created',"")
target_created=dbutils.widgets.text('target_created',"")

In [0]:
from databricks.sdk import WorkspaceClient
w = WorkspaceClient()
action_name=dbutils.widgets.get("action_name")
connection_name=dbutils.widgets.get("connection_name")
gateway_pipeline_name=dbutils.widgets.get('gateway_pipeline_name')
gateway_pipeline_id=dbutils.widgets.get('gateway_pipeline_id')
ingestion_pipeline_name=dbutils.widgets.get('ingestion_pipeline_name')
ingestion_pipeline_id=dbutils.widgets.get('ingestion_pipeline_id')
job_name=dbutils.widgets.get('job_name')
job_id=dbutils.widgets.get('job_id')

elog_catalog=dbutils.widgets.get('elog_catalog')
elog_schema=dbutils.widgets.get('elog_schema')
stage_catalog=dbutils.widgets.get('stage_catalog')
stage_schema=dbutils.widgets.get('stage_schema')
target_catalog=dbutils.widgets.get('target_catalog')
target_schema=dbutils.widgets.get('target_schema')

connection_created=dbutils.widgets.get('connection_created')
stage_created=dbutils.widgets.get('stage_created')
target_created=dbutils.widgets.get('target_created')

gateway_elog_table_name=f"{elog_catalog}.{elog_schema}.gateway_elog_{gateway_pipeline_id.replace('-', '_')}"
ingestion_elog_table_name=f"{elog_catalog}.{elog_schema}.ingestion_elog_{ingestion_pipeline_id.replace('-', '_')}"

In [0]:
if action_name=="stop" or action_name=="delete":
    try:
        w.pipelines.stop(pipeline_id=gateway_pipeline_id)
        print(f"pipelines stop {gateway_pipeline_id=}")
    except Exception as e:
        print(e)

In [0]:
if action_name=="stop" or action_name=="delete":
    try:
        w.pipelines.stop(pipeline_id=ingestion_pipeline_id)
        print(f"pipelines stop {ingestion_pipeline_id=}")
    except Exception as e:
        print(e)

In [0]:
if action_name=="stop" or action_name=="delete":
    try:
        spark.sql(f"""
        create table if not exists identifier('{gateway_elog_table_name}') select * from event_log('{gateway_pipeline_id}') limit 0;
        """)
        spark.sql(f"""
        insert into identifier('{gateway_elog_table_name}') select * from event_log('{gateway_pipeline_id}');
        """)
        print(f"insert into identifier('{gateway_elog_table_name=}') select * from event_log('{gateway_pipeline_id=}')")
    except Exception as e:
        print(e)

In [0]:
if action_name=="stop" or action_name=="delete":
    try:
        spark.sql(f"""
        create table if not exists identifier('{ingestion_elog_table_name}') select * from event_log('{ingestion_pipeline_id}') limit 0;
        """)
        spark.sql(f"""
        insert into identifier('{ingestion_elog_table_name}') select * from event_log('{ingestion_pipeline_id}');
        """)
        print(f"insert into identifier('{ingestion_elog_table_name=}') select * from event_log('{ingestion_pipeline_id=}')")
    except Exception as e:
        print(e)

In [0]:
from pyspark.sql import SparkSession
import logging
import inspect
import time

def loadGatewayMetricsTable(
  spark:SparkSession,
  staging_catalog_name:str, 
  staging_schema_name:str, 
  gateway_pipeline_id:str="",
  elog_catalog_name:str="", 
  elog_schema_name:str="", 
  timeout=30,
  max_wait=10,
  ) -> str:

  if not elog_catalog_name: elog_catalog_name=staging_catalog_name
  if not elog_schema_name: elog_schema_name=staging_schema_name

  rootPath = f"/Volumes/{staging_catalog_name}/{staging_schema_name}/__databricks_ingestion_gateway_staging_data-{gateway_pipeline_id}/{gateway_pipeline_id}/"
  loadPath = f"{rootPath}/telemetry/"
  checkpointPath = f"{rootPath}/monitoring/checkpoint/{gateway_pipeline_id}/"
  schemaLocation = f"{rootPath}/monitoring/schema/{gateway_pipeline_id}/"
  table_name = f"gateway_metrics_table_{gateway_pipeline_id}".replace("-","_")
  outputTableName = f"{elog_catalog_name}.{elog_schema_name}.{table_name}"
  outputTableName = outputTableName.replace("-", "_")

  outputTableName_exists = spark.catalog.tableExists(outputTableName)  
  if not outputTableName_exists:
    print(f"{outputTableName}")

  try:
    autoloaderStream = (
        spark.readStream.format("cloudFiles")
        .option("cloudFiles.includeExistingFiles", True)
        .option("cloudFiles.format", "json")
        .option("cloudFiles.inferColumnTypes", True)
        .option("cloudFiles.schemaLocation", schemaLocation)
        .option("cloudFiles.schemaHints", """
            catalog_name string, 
            dlt_event_type string, 
            event_subtype string, 
            event_timestamp_seconds long, 
            event_type string, 
            flow_name string, 
            flow_status string, 
            is_failed boolean, 
            phase string, 
            pipeline_id string, 
            replication_id string, 
            schema_name string, 
            source string, 
            statistics struct<deleteCount: long, duration_ms: long, insertCount: long, last_operation_timestamp: long, outputCount: long, updateCount: long, upsertCount: long, rowRangeStart: string, rowRangeEnd: string, startTimestamp: string, ddl map<string, string>, endTimestamp: string >, 
            table struct<catalog_name: string, schema_name: string, table_name: string>, 
            table_name string, 
            update_state string, 
            resources struct<name: string, type: string, committed_memory: long, max_memory: long, used_memory: long, collection_count: long, total_memory: long, free_memory: long, collection_time: long>,
            error_detail string,
            exit_code string,
            stacktrace string,
            exception_name string,
            exception string
        """)
      .option("cloudFiles.schemaEvolutionMode", "addNewColumns")
      .option("mergeSchema", True)
      .load(f"{loadPath}")
      .writeStream
      .option("mergeSchema", True)
      .option("checkpointLocation", checkpointPath)
      .trigger(availableNow=True)
      .toTable(outputTableName)
      )

    # wait for this to finish
    # not needed in jobs https://docs.databricks.com/en/structured-streaming/production.html
    # autoloaderStream.awaitTermination()
  except Exception as ex:
    print(ex)
    return("")

  # 
  waited = 0
  while not outputTableName_exists:
    print(f"{inspect.stack()[1][3]}: wait table creation: {waited}/{max_wait}. sleeping {timeout}")
    waited += 1
    time.sleep(timeout)
    outputTableName_exists = spark.catalog.tableExists(outputTableName)  

  if outputTableName_exists:  
    return(table_name)
  else:
    return("")




In [0]:
if 1:
  try:
    print(
      loadGatewayMetricsTable(
      spark=spark,
      staging_catalog_name=stage_catalog,
      staging_schema_name=stage_schema,
      gateway_pipeline_id=gateway_pipeline_id,
      elog_catalog_name=elog_catalog,
      elog_schema_name=elog_schema
      )
    )
  except Exception as e:
    print(e)

In [0]:
if action_name=="delete":
    try:
        w.jobs.delete(job_id=job_id)
        print(f"jobs delete {job_id=}")        
    except Exception as e:
        print(e)

In [0]:
if action_name=="delete":
    try:
        w.pipelines.delete(pipeline_id=ingestion_pipeline_id)
        print(f"pipelines delete {ingestion_pipeline_id=}")
    except Exception as e:
        print(e)

In [0]:
if action_name=="delete":
    try:
        w.pipelines.delete(pipeline_id=gateway_pipeline_id)
        print(f"pipelines delete {gateway_pipeline_id=}")
    except Exception as e:
        print(e)

In [0]:
if action_name=="delete" and connection_created:
    try:
        w.connections.delete(name=connection_name)
        print(f"connections delete {connection_name=}")
    except Exception as e:
        print(e)

In [None]:
if action_name=="delete" and stage_created:
    try:
        spark.sql(f"DROP SCHEMA {stage_catalog}.{stage_schema} CASCADE")
        print(f"DROP SCHEMA {stage_catalog=}.{stage_schema=} CASCADE")    
    except Exception as e:
        print(e)    

In [None]:
if action_name=="delete" and target_created:
    try:
        spark.sql(f"DROP SCHEMA {target_catalog}.{target_schema} CASCADE")
        print(f"DROP SCHEMA {target_catalog}.{target_schema} CASCADE")    
    except Exception as e:
        print(e)    