In [11]:
#---------------------------------------------------------------------
# Name: ops_begin_batch.ipynb
#---------------------------------------------------------------------
# Purpose:  Manages the initialization of ETL batches in the batch meta table.
#---------------------------------------------------------------------
# ver.  | date     | author         | change
#---------------------------------------------------------------------
# v1    | 10/28/25 | K. Hardis      | Initial Version.
#---------------------------------------------------------------------

# Standard library
import sys

# PySpark SQL functions
from pyspark.sql.functions import current_timestamp

# PySpark types
from datetime import datetime

sys.path.append("./builtin")

# External Modules
import shared_context as sc

import importlib

# Force reload in case modules were cached
importlib.reload(sc)

# Log external module versions
from log_module_versions import log_module_versions
log_module_versions(["shared_context"])

# Create spark shared context
ctx = sc.SparkContextWrapper(spark)

# Parameters
batch_name = "daily_update"
batch_description = "daily_update"
batch_type = "ETL"
batch_status = "STARTED"
status_message = "ops_begin_batch.py"

# Validate required input
if not batch_name:
    raise ValueError("batch_name argument is required")

# Print input summary
print("{")
print(f'"begin-batch_name": "{batch_name}",')
print(f'"begin-batch_description": "{batch_description}",')
print(f'"begin-batch_type": "{batch_type}",')
print(f'"begin-batch_status": "{batch_status}",')
print(f'"begin-status_message": "{status_message}",')

# Step 0: Get current max batch_id
max_id_row = spark.sql("SELECT COALESCE(MAX(batch_id), 0) AS max_id FROM meta_db.BATCH").first()
new_batch_id = max_id_row["max_id"] + 1

# Step 1: Prepare timestamp strings
now_ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

# Step 2: Insert using spark.sql
current_user = 'system'
ctx.spark.sql(f"""
    INSERT INTO meta_db.BATCH
    VALUES (
        {new_batch_id},
        '{batch_name}',
        '{batch_description}',
        '{batch_type}',
        NULL,
        '{batch_status}',
        '{status_message}',
        TIMESTAMP('{now_ts}'),
        NULL,
        TIMESTAMP('{now_ts}'),
        '{current_user}',
        NULL,
        NULL
    )
""")


# Step 3: Confirm insert
print(f'"end-msg": "end ops_begin_batch.py for {batch_name}"')
print("}")

StatementMeta(, 6e5ff71b-fb21-4f52-bdb1-a1fe9d44013b, 16, Finished, Available, Finished)

✅ Module 'shared_context' loaded with version: sc_1.0
{
"begin-batch_name": "daily_update",
"begin-batch_description": "daily_update",
"begin-batch_type": "ETL",
"begin-batch_status": "STARTED",
"begin-status_message": "ops_begin_batch.py",
"end-msg": "end ops_begin_batch.py for daily_update"
}


In [None]:
%%sql
select * from lk_cdsa_bronze.meta_db.batch;

-- delete from lk_cdsa_bronze.meta_db.batch;
