In [0]:
# silver_feedback_batch_append.ipynb
# SOURCE: kardia_bronze.bronze_feedback (batch JSONL ingest with audit metadata)
# OUTPUT: kardia_silver.silver_feedback (append-only deduplicated)
# PATTERN: Batch MERGE to prevent duplicate feedback_id entries
# NOTE: Feedback is append-only; users don’t update prior records, so SCD1 isn’t needed.

%pip install -q /dbfs/Shared/libs/kflow-latest.whl
from kflow.config import bronze_table, silver_paths

from delta.tables import DeltaTable
from pyspark.sql import functions as F
from pyspark.sql.types import MapType, StringType

# Load Silver paths
S = silver_paths("feedback")
SRC_TABLE = bronze_table("feedback")
TGT_TABLE = S.table

In [0]:
# 1. Ensure Silver DB and Feedback table exists
spark.sql(f"CREATE DATABASE IF NOT EXISTS {S.db}")

spark.sql(
    f"""
    CREATE TABLE IF NOT EXISTS {TGT_TABLE} (
        feedback_id        STRING  NOT NULL,
        provider_id        STRING,
        timestamp          TIMESTAMP,
        visit_id           STRING,
        satisfaction_score INT,
        comments           STRING,
        source             STRING,
        tags               ARRAY<STRING>,
        metadata           MAP<STRING,STRING>,
        _ingest_ts         TIMESTAMP,
        _source_file       STRING,
        _batch_id          STRING
    ) USING DELTA
    """
)

In [0]:
# 2. Load Bronze feedback records and prepare for Silver
bronze_df = spark.table(SRC_TABLE)

silver_src = (
    bronze_df
        .withColumn("timestamp", F.to_timestamp("timestamp"))
        .withColumn("metadata", F.from_json("metadata_json", MapType(StringType(), StringType())))
        .select(
            "feedback_id",
            "provider_id",
            "timestamp",
            "visit_id",
            "satisfaction_score",
            "comments",
            "source",
            "tags",
            "metadata",
            "_ingest_ts",
            "_source_file",
            "_batch_id"
        )
        .dropDuplicates(["feedback_id"])
)

In [0]:
# 3. Merge Bronze into Silver to append new feedback only
(DeltaTable.forName(spark, TGT_TABLE)
           .alias("t")
           .merge(silver_src.alias("s"), "t.feedback_id = s.feedback_id")
           .whenNotMatchedInsertAll()
           .execute())

In [0]:
# 4. Batch finished – Verify Silver Feedback table row count and preview records.
df = spark.table(TGT_TABLE)
print(f"Silver Feedback row count: {df.count():,}")
display(df.orderBy(F.col("_ingest_ts").desc()).limit(5))