In [0]:
# 01_validate_bronze_claims.ipynb
# SOURCE: Bronze Claims table
# OUTPUT: N/A

from kflow.config import bronze_paths
from kflow.validation_utils import validate_and_log
from kflow.display_utils import banner

from pyspark.sql import functions as F

# Load Bronze paths
P = bronze_paths("claims")

In [0]:
# 1. Load Bronze Claims Delta table from DBFS
df = spark.table(P.table).withColumn("ClaimAmount", F.col("ClaimAmount").cast("double"))

In [0]:
# 2. Define extra metrics to compute
extra = {
    "null_patientid":        F.sum(F.col("PatientID").isNull().cast("int")).alias("null_patientid"),
    "negative_amount_count": F.sum((F.col("ClaimAmount") < 0).cast("int")).alias("negative_amount_count"),
    "min_amount":            F.min("ClaimAmount").alias("min_amount"),
    "max_amount":            F.max("ClaimAmount").alias("max_amount")
}

In [0]:
# 3. Run validation and append summary to kardia_validation.bronze_claims_summary
validate_and_log(df,
                 table_name="bronze_claims",
                 pk_col="ClaimID",
                 extra_metrics=extra,
                 assertions=None)

banner("Bronze Claims validation complete", ok=True)