In [0]:
# 03_gold_gender_breakdown
# GOAL:   Patient count by gender
# SOURCE: kardia_silver.silver_patients
# OUTPUT: kardia_gold.gold_gender_breakdown (table)
# TRIGGER: Single batch job that computes the latest gender counts
#          and MERGEs them into the Gold table (Type-1 overwrite).

# 1. Ensure the Gold database exists.
spark.sql("CREATE DATABASE IF NOT EXISTS kardia_gold")

In [0]:
# 1 ▸ Build or refresh Gold table with pure SQL
spark.sql(
    """
    CREATE OR REPLACE TEMP VIEW v_gender_counts AS
    SELECT GENDER,
           COUNT(*) AS patient_cnt
    FROM kardia_silver.silver_patients
    WHERE GENDER IS NOT NULL
    GROUP BY GENDER
    """
)

spark.sql(
    """
    CREATE TABLE IF NOT EXISTS kardia_gold.gold_gender_breakdown (
    GENDER       STRING,
    patient_cnt  BIGINT
    ) USING DELTA
    """
)

spark.sql(
    """
    MERGE INTO kardia_gold.gold_gender_breakdown AS t
    USING v_gender_counts                        AS s
    ON    t.GENDER = s.GENDER
    WHEN MATCHED THEN UPDATE SET *
    WHEN NOT MATCHED THEN INSERT *
    """
)

In [0]:
# 2 ▸ Preview
display(
    spark.sql(
        """
        SELECT GENDER, 
               patient_cnt
        FROM   kardia_gold.gold_gender_breakdown
        ORDER  BY patient_cnt DESC
        """
    )
)