In [0]:
from pyspark.sql.functions import*
from pyspark.sql.types import*
from pyspark.sql.window import*

In [0]:
%sql
use catalog claims_leakage;
use schema silver;

In [0]:
DATE_FORMATS = [
    "yyyy-MM-dd",
    "dd/MM/yy",
    "MM-dd-yyyy",
    "MMM dd yyyy",
    "yyyy/MM/dd"
]

def parse_date(col_name):
    return coalesce(*[
        try_to_timestamp(col(col_name), lit(f))
        for f in DATE_FORMATS
    ])



In [0]:
def parse_amount(col_name):
    clean=upper(regexp_replace(col(col_name),",",""))
    base_value=regexp_replace(clean,"[^0-9,]","").try_cast("double")

    return(
        when(clean.endswith("L"),base_value*100000)
        .when(clean.endswith("K"),base_value*1000)
        .otherwise(base_value)
    )

In [0]:
def normalize(col_name):
    return upper(trim(col(col_name)))


In [0]:
import time
import builtins

def write_batch(df, table_name, mode="overwrite"):
    try:
        start_time = time.time()
        logger.info(f"[START] Batch write | table={table_name} | mode={mode}")

        (
            df.write
              .format("delta")
              .mode(mode)
              .option("overwriteSchema", "true")
              .saveAsTable(table_name)
        )

        end_time = time.time()

        if CONFIG["logging"]["track_execution_time"]:
            duration = end_time - start_time
            logger.info(
                f"[END] Batch write | table={table_name} | duration={duration:.2f} sec"
            )

    except Exception as e:
        logger.error(f"[ERROR] Batch write failed | table={table_name}")
        logger.error(traceback.format_exc())
        raise
