In [0]:
# 03_silver_encounters_demographics_join.ipynb
# Joins Silver encounters (stream) with Silver patients (static) for enriched fact table.
# This gives us an encounter-centric fact table that contains two demographic columns,
# and a null-vs-not-null flag that Gold layer uses to exclude bad rows and count in QA table.

from pyspark.sql import functions as F

SILVER_ENCOUNTERS_DEMOGRAPHICS = "kardia_silver.silver_encounters_demographics"

In [0]:
# 1. Load stream and static dimension
enc_df = spark.table("kardia_silver.silver_encounters")
pat_df = spark.table("kardia_silver.silver_patients")

In [0]:
# 2. Left-join encounters to patients

"""
Perform a left-join to retain all encounter records, even if
some have no matching patient. In Gold layer, these unmatched
rows are tracked for QA purposes.
"""

joined = (
    enc_df.alias("e")
        .join(pat_df.alias("p"),
            enc_df.PatientID == F.col("p.ID"),
            "left")
        .select(
            "e.EncounterID",
            "e.PatientID",
            "e.START_TS",
            "e.START_DATE",
            "e.CODE",
            "e.DESCRIPTION",
            "e.REASONCODE",
            "e.REASONDESCRIPTION",
            "p.GENDER",
            "p.BIRTH_YEAR"
        )
)

In [0]:
# 3. Overwrite target table (idempotent)
(
    joined.write
        .format("delta")
        .mode("overwrite")
        .option("overwriteSchema", "true")
        .saveAsTable(SILVER_ENCOUNTERS_DEMOGRAPHICS)
)

print("silver_encounters_demographics refreshed.")