In [0]:
# 02_silver_providers_dim.ipynb
# SOURCE:  kardia_bronze.bronze_providers
# OUTPUT:  kardia_silver.silver_providers_dim  (Type‑2 history)
# TRIGGER: Full‑snapshot merge; fine for demo‑scale data.

from delta.tables import DeltaTable
from pyspark.sql  import functions as F

# Table paths
BRONZE_TABLE = "kardia_bronze.bronze_providers"
DIM_TABLE    = "kardia_silver.silver_providers_dim"

In [0]:
# 1. Ensure Silver DB and dimension table exist
spark.sql("CREATE DATABASE IF NOT EXISTS kardia_silver")

if not spark.catalog.tableExists(DIM_TABLE):
    # Create empty dim table with correct schema
    empty_df = spark.table(BRONZE_TABLE).limit(0) \
                    .withColumn("eff_start_ts", F.current_timestamp()) \
                    .withColumn("eff_end_ts",   F.lit(None).cast("timestamp")) \
                    .withColumn("is_current",   F.lit(True))
    empty_df.write.format("delta").saveAsTable(DIM_TABLE)

In [0]:
# 2. Build (current) snapshot from Bronze
src_df = (
    spark.table(BRONZE_TABLE)
         .withColumn("eff_start_ts", F.current_timestamp())
         .withColumn("eff_end_ts",   F.lit(None).cast("timestamp"))
         .withColumn("is_current",   F.lit(True))
)

# NOTE:  Re‑runs of provider dim create extra versions. Only current timestamp as eff_start_ts.
# If you re‑run the job minutes later with no changes, all rows will be re‑inserted (new eff_start_ts)
# and previous versions closed. That inflates history.

In [0]:
# 3. SCD‑2 merge: close changed rows, insert new
dim = DeltaTable.forName(spark, DIM_TABLE)

(
    dim.alias("t")
       .merge(src_df.alias("s"), "t.ProviderID = s.ProviderID AND t.is_current")
       .whenMatchedUpdate(
           condition=(
            "t.ProviderSpecialty <> s.ProviderSpecialty OR "
            "t.ProviderLocation  <> s.ProviderLocation"
        ),
        set={
            "eff_end_ts": F.current_timestamp(),
            "is_current": F.lit(False)
        }
)
       .whenNotMatchedInsertAll()
       .execute()
)

print("Providers dimension refreshed.")