In [0]:
# 01_validate_bronze_feedback.ipynb
# SOURCE: Bronze Feedback table
# OUTPUT: N/A

from pyspark.sql import functions as F

BRONZE_PATH           = "dbfs:/kardia/bronze/bronze_feedback"
BRONZE_FEEDBACK_TBL   = "bronze_feedback"

feedback_df = spark.read.format("delta").load(BRONZE_PATH)

In [0]:
# 1. Compute basic stats
stats_row = feedback_df.agg(
    F.count("*").alias("row_count"),
    F.countDistinct("feedback_id").alias("distinct_ids"),
    F.sum(F.when(F.col("feedback_id").isNull(), 1).otherwise(0)).alias("null_ids"),
    F.sum(F.when(F.col("satisfaction_score").isNull(), 1).otherwise(0)).alias("null_scores")
).first()

validation_passed = True
error_messages    = []

In [0]:
# 2. Assertions to catch critical data quality issues
if stats_row.null_ids != 0:
    validation_passed = False
    error_messages.append(f"{stats_row.null_ids} null feedback_id(s)")
if stats_row.distinct_ids != stats_row.row_count:
    validation_passed = False
    error_messages.append("Duplicate feedback_id(s) found")
if stats_row.null_scores != 0:
    validation_passed = False
    error_messages.append(f"{stats_row.null_scores} null satisfaction_score(s)")

print("Validation stats:", stats_row.asDict())
if validation_passed:
    print("Bronze Feedback validation passed")
else:
    print("Bronze Feedback validation failed:", "; ".join(error_messages))

In [0]:
# 3. Append validation summary
validation_summary_df = (
    spark.createDataFrame([stats_row.asDict()])
         .withColumn("table_name", F.lit(BRONZE_FEEDBACK_TBL))
         .withColumn("passed",      F.lit(validation_passed))
         .withColumn("errors",      F.lit(", ".join(error_messages)))
         .withColumn("_run_ts",     F.current_timestamp())
)

(validation_summary_df.write
    .mode("append")
    .option("mergeSchema", "true")
    .saveAsTable("kardia_validation.bronze_feedback_summary")
)

# Show latest summary entries
display(
    spark.sql(
        "SELECT * FROM kardia_validation.bronze_feedback_summary ORDER BY _run_ts DESC LIMIT 5"
    )
)