In [0]:
# In development mode:
# -- the data will be processed from end to end
# -- the to-be-processed data will be displayed
# -- the processed data will be displayed
# -- the processed version will not be saved

# === Parameters ===
job_name = dbutils.widgets.get("job_name") or "job_123"
job_start_timestamp_str = dbutils.widgets.get("job_start_timestamp")
task_name = dbutils.widgets.get("task_name")
task_metadata = dbutils.widgets.get("task_metadata")
development_mode_str = dbutils.widgets.get("development_mode") or "false"

%load_ext autoreload
%autoreload 2
import datetime
import logging
from mdd.logger import Logger
from mdd.metadata import Metadata
from mdd.utils import NotebookUtil, MDDUtil
from mdd.onboarder import Onboarder

# === Parse timestamp ===
try:
    job_start_timestamp = datetime.datetime.fromisoformat(job_start_timestamp_str)
except Exception:
    job_start_timestamp = datetime.datetime.now()

# === Parse development_mode flag ===
development_mode = development_mode_str.lower() in ("true", "1", "yes")

if not task_name:
    task_name = NotebookUtil.get_notebook_name(spark)
    
task_start_timestamp = datetime.datetime.now()

metadata = Metadata(task_metadata)
dataflow_type = metadata.get("dataflow_type")

# log task start
task_end_timestamp = None
task_status = "started"
error_message = None
MDDUtil.etl_job_tasks_update(
    job_name,
    job_start_timestamp,
    task_name,
    task_start_timestamp,
    task_end_timestamp,
    task_status,
    error_message
)

# === Execution ===
try:
    Logger.init(job_name, task_name, job_start_timestamp, False)
    if dataflow_type == "transform":
        dataflow = Transformer(spark, task_metadata, development_mode)
    elif dataflow_type == "onboard":
        dataflow = Onboarder(spark, task_metadata)
    else:
        raise Exception(f"Unknown dataflow type: {dataflow_type}")

    dataflow.run()
    task_status = "succeeded"
except Exception as e:
    logging.exception("Job failed: {job_name}")
    task_status = "failed"
    error_message = str(e)
    raise e
finally:
    # log task end
    task_end_timestamp = datetime.datetime.now()
    MDDUtil.etl_job_tasks_update(
        job_name,
        job_start_timestamp,
        task_name,
        task_start_timestamp,
        task_end_timestamp,
        task_status,
        error_message
    )

    logging.shutdown()
