In [0]:
# 03_gold_view_gender_breakdown
# GOAL:   Patient count by gender
# SOURCE: kardia_silver.silver_patients
# OUTPUT: vw_gender_breakdown (view)

# NOTE
# In this demo, we use a view to calculate patient counts by gender.
# For a small cardinality dimension like GENDER, a view is sufficient.
# In Production, we might materialize this view or use a Streaming Table
# if the source volume grows significantly.

from pyspark.sql import SparkSession, functions as F

# Table paths
GOLD_DB = "kardia_gold"
SILVER_PATIENTS = "kardia_silver.silver_patients"
VIEW_GENDER_BREAKDOWN = "vw_gender_breakdown"

In [0]:
# Ensure the Gold database exists
spark.sql(f"CREATE DATABASE IF NOT EXISTS {GOLD_DB}")

# 1. Create or replace the view
spark.sql(
    f"""
    CREATE OR REPLACE VIEW {VIEW_GENDER_BREAKDOWN} AS
    SELECT
        GENDER,
        COUNT(*) AS patient_cnt
    FROM {SILVER_PATIENTS}
    WHERE GENDER IS NOT NULL
    GROUP BY GENDER
    """
)

In [0]:
# 2. Refresh & preview
spark.sql(f"REFRESH TABLE {VIEW_GENDER_BREAKDOWN}")
display(spark.sql(f"SELECT * FROM {VIEW_GENDER_BREAKDOWN} ORDER BY patient_cnt DESC"))