In [None]:
%md
### Kardiaflow - Silver Encounters Enriched

**Source:** Stream from Silver encounters joined with static Silver patients

**Target:** `kardia_silver.silver_encounters_enriched`

**Pattern:** Stream–static LEFT JOIN; MERGE into Silver enriched table

**Trigger:**
- **Batch mode:** one-time processing of available data
- **Stream mode:** continuous micro-batches (30s)

Notes: LEFT JOIN ensures encounters are preserved even if patient is missing.

In [None]:
from delta.tables import DeltaTable
from pyspark.sql import functions as F

from kflow.config import silver_paths
from kflow.notebook_utils import init

init()

# Load table paths and names for the Encounters dataset
S = silver_paths("encounters_enriched")
TGT_TABLE = S.table

In [None]:
# 2. Retrieve runtime mode from job widget: "batch" (default) or "stream"
try:
    dbutils.widgets.dropdown("mode", "batch", ["batch", "stream"])
except:
    pass

MODE       = dbutils.widgets.get("mode") if "dbutils" in globals() else "batch"
IS_BATCH   = (MODE == "batch")
CHECKPOINT = f"{S.checkpoint}/{MODE}"

In [0]:
# 3. Ensure the Silver DB and Encounters table exist
spark.sql(f"CREATE DATABASE IF NOT EXISTS {S.db}")

spark.sql(
    f"""
    CREATE TABLE IF NOT EXISTS {TGT_TABLE} (
        encounter_id       STRING,
        patient_id         STRING,
        START_TS           TIMESTAMP,
        CODE               STRING,
        DESCRIPTION        STRING,
        REASONCODE         STRING,
        REASONDESCRIPTION  STRING,
        gender             STRING,
        birth_year         INT,
        _ingest_ts         TIMESTAMP,
        _batch_id          STRING,
        _source_file       STRING
    ) USING DELTA
    LOCATION '{S.path}'
    """
)

In [0]:
# 4. Read inputs
#  - Stream from Silver Encounters (fact/event data)
#  - Static lookup from Silver Patients (dimension: demographics)
enc_stream  = spark.readStream.table("kardia_silver.silver_encounters")
patients_df = spark.table("kardia_silver.silver_patients")

In [0]:
# 5. Join Encounters with Patient info
#    Left join ensures all encounters are preserved, even if patient record is missing
joined = (
    enc_stream.alias("e")
              .join(
                 patients_df.alias("p"),
                 F.col("e.patient_id") == F.col("p.id"),
                 "left"
              )
              .select(
                 F.col("e.encounter_id"),
                 F.col("e.patient_id"),
                 F.col("e.START_TS"),
                 F.col("e.CODE"),
                 F.col("e.DESCRIPTION"),
                 F.col("e.REASONCODE"),
                 F.col("e.REASONDESCRIPTION"),
                 F.col("p.gender").alias("gender"),
                 F.col("p.birth_year").alias("birth_year"),
                 F.col("e._ingest_ts").alias("_ingest_ts"),
                 F.col("e._batch_id").alias("_batch_id"),
                 F.col("e._source_file").alias("_source_file")
            )
)

In [0]:
# 6. Define upsert logic to merge each micro-batch into Silver Encounters Enriched
def upsert_to_encounters(batch_df, batch_id):
    (DeltaTable.forName(spark, TGT_TABLE)
               .alias("t")
               .merge(batch_df.alias("s"), "t.encounter_id = s.encounter_id")
               .whenMatchedUpdateAll()
               .whenNotMatchedInsertAll()
               .execute())

In [0]:
# 7. Configure output stream to persist joined results to Silver Encounters Enriched table
writer = (
    joined.writeStream
          .foreachBatch(upsert_to_encounters)
          .option("checkpointLocation", CHECKPOINT)
)

In [None]:
# 8. Toggle trigger mode at runtime via job param "mode" (set with dbutils.widgets, default: "batch")
if IS_BATCH:
    # Batch mode: process all available files once and exit
    query = writer.trigger(availableNow=True).start()
    print(f"[batch] Joined encounters with patients and wrote to {TGT_TABLE} (checkpoint={CHECKPOINT}) …")
    query.awaitTermination()
else:
    # Streaming mode: run continuously every 30s
    query = writer.trigger(processingTime="30 seconds").start()
    print(f"[live] Continuous 30s enrich to {TGT_TABLE} (checkpoint={CHECKPOINT})")