In [0]:
%run ../utils/common

In [0]:
config = open("../configs/config.json")
settings = json.load(config)

In [0]:
dbutils.widgets.text("environment", "")
dbutils.widgets.text("job_name", "")

environment = dbutils.widgets.get("environment")
job_name = dbutils.widgets.get("job_name").strip()


print(environment)
print(job_name)

In [0]:
catalog = settings[environment]['catalog_name']
catalog

In [0]:
TBL_META_JOB_RUNS = f"{catalog}.default.meta_job_runs"
TBL_META_JOB_RUNS

In [0]:
# Constants
IN_PROGRESS = 'IN_PROGRESS'
COMPLETED = 'COMPLETED'
FAILED = 'FAILED'

In [0]:
df = (
    spark
    .table(TBL_META_JOB_RUNS)
    .filter((F.trim("job_name") == job_name) & (col("status") == f"{IN_PROGRESS}"))
    .orderBy(F.desc("updated_at"))
    .limit(1)
)

print(df.show())

if df.count() == 0:
    raise ValueError(f"No IN_PROGRESS record found for job_name: {job_name}")

In [0]:
# from pytz import timezone as pytz_timezone
vn_tz = pytz_timezone("Asia/Ho_Chi_Minh")
current_local_time = datetime.now(vn_tz)

updated_at = current_local_time
print(updated_at)

In [0]:
row = df.collect()[0]
row

In [0]:
# Create a temporary view with new values
update_df = spark.createDataFrame([{
    "job_name": row["job_name"],
    "proc_time": row["proc_time"],
    "updated_at": updated_at,
    "status": COMPLETED
}])
update_df.createOrReplaceTempView("update_view")

# Perform the update using MERGE
spark.sql(f"""
MERGE INTO {TBL_META_JOB_RUNS} target
USING update_view source
ON target.job_name = source.job_name
   AND target.proc_time = source.proc_time
   AND target.status = '{IN_PROGRESS}'
WHEN MATCHED THEN UPDATE SET
  target.updated_at = source.updated_at,
  target.status = source.status
""")

print(f"Job '{job_name}' marked as COMPLETE with updated_at: {updated_at} and process_date: {row['proc_time']}")