In [0]:
# 03_silver_patient_encounters_join.ipynb
# -------------------------------------------------------------
# Joins Silver encounters (stream) with Silver patients (static) for enriched fact table.

from pyspark.sql import SparkSession, functions as F

In [0]:
# Spark session
spark = (SparkSession.builder
         .appName("silver_patient_encounters_join")
         .config("spark.sql.shuffle.partitions", "1")
         .getOrCreate())

In [0]:
# 1. Load stream and static dimension
enc_stream  = (spark.readStream
                    .table("kardia_silver.silver_encounters")
                    .dropDuplicates(["EncounterID"]))

patients_df = spark.read.table("kardia_silver.silver_patients")   # static dim

In [0]:
# 2. Join and project columns
joined = (enc_stream.alias("e")
          .join(F.broadcast(patients_df).alias("p"),
                F.col("e.PatientID") == F.col("p.ID"),
                "left")
          .select(
              "EncounterID",
              "PatientID",
              "START",
              "STOP",
              "CODE",
              "BASE_ENCOUNTER_COST",
              "TOTAL_CLAIM_COST",
              F.coalesce("p.GENDER",      F.lit(None)).alias("GENDER"),
              F.coalesce("p.BIRTH_YEAR",  F.lit(None)).alias("BIRTH_YEAR")
          ))

In [0]:
# 3. Write result to Delta
CHKPT = "dbfs:/kardia/_checkpoints/silver_patient_encounters"
TARGET = "kardia_silver.silver_patient_encounters"

(joined.writeStream
       .format("delta")
       .outputMode("append")
       .option("checkpointLocation", CHKPT)
       .trigger(availableNow=True)
       .toTable(TARGET))