In [2]:
from pyspark.sql.types import StructType, StructField, StringType, TimestampType
from pyspark.sql import functions as F

StatementMeta(, d8f74002-59e8-4816-9ef8-b4e3bbc6d3c5, 4, Finished, Available, Finished)

## Execute this once to create error log table schema

In [8]:
"""
# One-time execution to create tables

# Define schema
schema = StructType([
    StructField("event_time_utc", TimestampType(), False),
    StructField("pipeline_name",  StringType(),   False),
    StructField("run_id",         StringType(),   False),
    StructField("activity_name",  StringType(),   True),
    StructField("job_stage",      StringType(),   True),
    StructField("status",         StringType(),   False),
    StructField("error_code",     StringType(),   True),
    StructField("error_message",  StringType(),   True),
    StructField("source_object",  StringType(),   True),
])

# Empty DF
empty_df = spark.createDataFrame([], schema)

# Write schema into a managed Delta table in the Lakehouse (default schema)
(empty_df
 .withColumn("event_time_utc", F.current_timestamp())
 .limit(0)  # write no rows
 .write
 .format("delta")
 .mode("overwrite")
 .saveAsTable("pipeline_failure_log"))

print("✅ Table pipeline_failure_log created in your Lakehouse (default schema).")
"""


StatementMeta(, 46b50cff-5784-4e4b-b921-8fb01a6b2542, 10, Finished, Available, Finished)

✅ Table pipeline_failure_log created in your Lakehouse (default schema).


##### Attach this to Notebook in pipeline and ensure correct arguments are passed for this to work

In [None]:
# ------------------------------
# Parametrized Error Logging
# ------------------------------

# 1) Imports (must exist in THIS notebook)
from pyspark.sql import functions as F
from pyspark.sql.types import (
    StructType, StructField,
    StringType, TimestampType
)

# 2) Safe parameter reader
def get_param_or_default(param_name: str, default_value: str = "N/A") -> str:
    """
    Return the value of a Fabric notebook parameter if it exists; otherwise a default.
    This prevents NameError when running the notebook manually.
    """
    try:
        value = eval(param_name)
        if value is None or str(value).strip() == "":
            return default_value
        return str(value)
    except NameError:
        return default_value

# 3) Collect inputs
pipeline_name_value  = get_param_or_default("pipeline_name")
run_id_value         = get_param_or_default("run_id")
activity_name_value  = get_param_or_default("activity_name")
job_stage_value      = get_param_or_default("job_stage")
error_code_value     = get_param_or_default("error_code")
error_message_value  = get_param_or_default("error_message")
source_object_value  = get_param_or_default("source_object")

# 4) Ensure the Lakehouse table exists
if not spark.catalog.tableExists("pipeline_failure_log"):
    raise RuntimeError(
        "The table 'pipeline_failure_log' was not found in the attached Lakehouse. "
        "Attach the correct Lakehouse to this notebook activity or create the table first."
    )

# 5) Build a single-row DataFrame using Spark literals (no None anywhere)
#    This avoids the 'Argument obj can not be None' error for NOT NULL columns.
error_df = (
    spark.range(1).select(
        F.current_timestamp().alias("event_time_utc"),
        F.lit(pipeline_name_value).alias("pipeline_name"),
        F.lit(run_id_value).alias("run_id"),
        F.lit(activity_name_value).alias("activity_name"),
        F.lit(job_stage_value).alias("job_stage"),
        F.lit("Failed").alias("status"),
        F.lit(error_code_value).alias("error_code"),
        F.lit(error_message_value).alias("error_message"),
        F.lit(source_object_value).alias("source_object"),
    )
)

# 6) Defensive fill for any accidental nulls in STRING columns
#    (event_time_utc is already a timestamp literal above)
error_df = error_df.na.fill({
    "pipeline_name": "N/A",
    "run_id": "N/A",
    "status": "Failed",          # keep 'Failed' even if someone passes blank
    "activity_name": "N/A",
    "job_stage": "N/A",
    "error_code": "N/A",
    "error_message": "N/A",
    "source_object": "N/A",
})

# 7) Append to the Delta table, selecting columns in the exact table order
(
    error_df
    .select(
        "event_time_utc",
        "pipeline_name",
        "run_id",
        "activity_name",
        "job_stage",
        "status",
        "error_code",
        "error_message",
        "source_object",
    )
    .write
    .format("delta")
    .mode("append")
    .saveAsTable("pipeline_failure_log")
)

print(f"✅ Logged failure for pipeline '{pipeline_name_value}' (run '{run_id_value}').")
