In [0]:
# ---------------------------------------------------------
# STEP 1: Install required library (only once per cluster)
# ---------------------------------------------------------
#%pip install python-dateutil

# ---------------------------------------------------------
# STEP 2: Import required libraries
# ---------------------------------------------------------
from datetime import datetime
from dateutil import parser
from pyspark.sql.types import StructType, StructField, StringType, TimestampType
from pyspark.sql import SparkSession

# ---------------------------------------------------------
# STEP 3: Define widgets to receive input from ADF
# ---------------------------------------------------------
dbutils.widgets.text("pipeline_name", "")
dbutils.widgets.text("activity_name", "")
dbutils.widgets.text("status", "")
dbutils.widgets.text("start_time", "")
dbutils.widgets.text("end_time", "")
dbutils.widgets.text("message", "")

# ---------------------------------------------------------
# STEP 4: Get widget values
# ---------------------------------------------------------
pipeline_name = dbutils.widgets.get("pipeline_name")
activity_name = dbutils.widgets.get("activity_name")
status = dbutils.widgets.get("status")
start_time_str = dbutils.widgets.get("start_time")
end_time_str = dbutils.widgets.get("end_time")
message = dbutils.widgets.get("message")

# ---------------------------------------------------------
# STEP 5: Define a safe parser for ISO timestamps
# ---------------------------------------------------------
def safe_parse(dt_str):
    try:
        return parser.isoparse(dt_str) if dt_str else None
    except Exception as e:
        print(f"Failed to parse datetime: {dt_str} -> {e}")
        return None

start_time = safe_parse(start_time_str)
end_time = safe_parse(end_time_str)

# Debug check (optional)
print("Parsed start_time:", start_time)
print("Parsed end_time:", end_time)

# ---------------------------------------------------------
# STEP 6: Define schema and create DataFrame
# ---------------------------------------------------------
schema = StructType([
    StructField("pipeline_name", StringType(), True),
    StructField("activity_name", StringType(), True),
    StructField("status", StringType(), True),
    StructField("start_time", TimestampType(), True),
    StructField("end_time", TimestampType(), True),
    StructField("message", StringType(), True)
])

data = [(pipeline_name, activity_name, status, start_time, end_time, message)]

log_df = spark.createDataFrame(data, schema=schema)

# ---------------------------------------------------------
# STEP 7: Write to Delta log table
# ---------------------------------------------------------
log_df.write \
    .format("delta") \
    .mode("append") \
    .saveAsTable("podlakehouse.demo_schema.pipeline_log")

# ---------------------------------------------------------
# STEP 8: (Optional) View latest logs
# ---------------------------------------------------------
spark.sql("SELECT * FROM podlakehouse.demo_schema.pipeline_log ORDER BY start_time DESC").show(truncate=False)


Parsed start_time: None
Parsed end_time: None
+---------------+-------------+---------+--------------------------+--------+-----------------------------------+
|pipeline_name  |activity_name|status   |start_time                |end_time|message                            |
+---------------+-------------+---------+--------------------------+--------+-----------------------------------+
|with time stamp|"Timestamp"  |"Success"|2025-06-11 14:15:54.419838|NULL    |""Timestamp completed successfully"|
|with time stamp|"Timestamp"  |"Started"|NULL                      |NULL    |"Pipeline started"                 |
|               |             |         |NULL                      |NULL    |                                   |
|               |             |         |NULL                      |NULL    |                                   |
|               |             |         |NULL                      |NULL    |                                   |
+---------------+-------------+---------+-

In [0]:
%sql
truncate table incremental_db.pipeline_log