In [0]:
from datetime import datetime

import uuid

from pyspark.sql import SparkSession
 
def log_pipeline_activity(pipeline_name, activity_name, activity_type, execution_status, start_time, end_time, message):

    run_id = str(uuid.uuid4())  # Unique Run ID

    # Prepare data to log

    log_data = [(pipeline_name, activity_name, activity_type, execution_status, start_time, end_time, message, run_id)]

    # Convert to a Spark DataFrame

    log_df = spark.createDataFrame(log_data, ["PipelineName", "ActivityName", "ActivityType", "ExecutionStatus", "StartTime", "EndTime", "Message", "RunId"])

    # Write to Delta table

    log_df.write.format("delta").mode("append").saveAsTable("pipeline_activity_log")
 
# Example usage in a Databricks notebook

log_pipeline_activity(

    pipeline_name="DataFactory_Activity",

    activity_name="Notebook Execution",

    activity_type="Databricks Notebook",

    execution_status="Success",

    start_time=datetime.now(),

    end_time=datetime.now(),

    message="Pipeline ran successfully."

)

 

In [0]:
# ---------------------------------------------------------

# STEP 1: Read parameters from ADF widgets

# ---------------------------------------------------------

dbutils.widgets.text("pipeline_name", "")

dbutils.widgets.text("activity_name", "")

dbutils.widgets.text("activity_type", "")

dbutils.widgets.text("status", "")

dbutils.widgets.text("start_time", "")

dbutils.widgets.text("end_time", "")

dbutils.widgets.text("message", "")

dbutils.widgets.text("database_name", "")

dbutils.widgets.text("schema_name", "")

dbutils.widgets.text("table_name", "")

dbutils.widgets.text("run_id", "")
 
pipeline_name = dbutils.widgets.get("pipeline_name")

activity_name = dbutils.widgets.get("activity_name")

activity_type = dbutils.widgets.get("activity_type")

status = dbutils.widgets.get("status")

start_time_str = dbutils.widgets.get("start_time")

end_time_str = dbutils.widgets.get("end_time")

message = dbutils.widgets.get("message")

database_name = dbutils.widgets.get("database_name")

schema_name = dbutils.widgets.get("schema_name")

table_name = dbutils.widgets.get("table_name")

run_id = dbutils.widgets.get("run_id")
 
# ---------------------------------------------------------

# STEP 2: Safely parse timestamps

# ---------------------------------------------------------

from datetime import datetime

from pyspark.sql.types import StructType, StructField, StringType, TimestampType
 
def safe_parse(dt_str):

    try:

        return datetime.fromisoformat(dt_str.replace("Z", "+00:00")) if dt_str else None

    except Exception:

        return None
 
start_time = safe_parse(start_time_str)

end_time = safe_parse(end_time_str)
 
# ---------------------------------------------------------

# STEP 3: Define schema explicitly

# ---------------------------------------------------------

schema = StructType([

    StructField("PipelineName", StringType(), True),

    StructField("ActivityName", StringType(), True),

    StructField("ActivityType", StringType(), True),

    StructField("ExecutionStatus", StringType(), True),

    StructField("StartTime", TimestampType(), True),

    StructField("EndTime", TimestampType(), True),

    StructField("Message", StringType(), True),

    StructField("DatabaseName", StringType(), True),

    StructField("SchemaName", StringType(), True),

    StructField("TableName", StringType(), True),

    StructField("RunId", StringType(), True)

])
 
# ---------------------------------------------------------

# STEP 4: Create DataFrame with defined schema

# ---------------------------------------------------------

data = [(pipeline_name, activity_name, activity_type, status, start_time, end_time, message,

         database_name, schema_name, table_name, run_id)]

log_df = spark.createDataFrame(data, schema=schema)
 
# ---------------------------------------------------------

# STEP 5: Write to your Delta table

# ---------------------------------------------------------

# log_df.write \
# .format("delta") \
# .mode("append") \
# .saveAsTable("podlakehouse.demo_schema.PipelineActivityLog")

log_df.write \
    .format("delta") \
    .option("mergeSchema", "true") \
    .mode("append") \
    .saveAsTable("podlakehouse.demo_schema.PipelineActivityLog")

 

In [0]:
%sql
select * from podlakehouse.demo_schema.PipelineActivityLog;

PipelineName,ActivityName,ActivityType,ExecutionStatus,StartTime,EndTime,Message,DatabaseName,SchemaName,TableName,RunId,pipeline_name,activity_name,execution_status,start_time,end_time
MyADF_Pipeline,Databricks_Activity,Databricks Notebook,,,,Pipeline executed,,,,,,,,,
,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,
MyADF_Pipeline,Databricks_Activity,Databricks Notebook,,,,Pipeline executed,,,,,,,,,
MyADF_Pipeline,Databricks_Activity,Databricks Notebook,,,,Pipeline executed,,,,,,,,,
