In [0]:
%sql
/* 03_gold_encounters_by_month.ipynb
GOAL 1: Monthly encounter volume (Gold fact table)
GOAL 2: QA view — encounters with missing demographics
GOAL 3: QA view — patients with no encounters

SOURCE: kardia_silver.silver_encounters_with_patients
OUTPUT: TABLE: kardia_gold.gold_encounters_by_month
        VIEW : kardia_gold.gold_enc_missing_demo_vw
        VIEW : kardia_gold.gold_patients_no_enc_vw

TRIGGER: Full snapshot overwrite each run — fast and simple for small datasets.
         Use foreachBatch and MERGE in production to avoid rewriting unchanged data.

NOTE: The Gold Encounters by Month table uses Delta Liquid Clustering and Predictive Optimization
      to reduce small files and maintain layout efficiency without manual OPTIMIZE or ZORDER. */

In [0]:
%sql
-- 1. Ensure Gold database exists
CREATE DATABASE IF NOT EXISTS kardia_gold;

-- 2. Monthly encounter counts  (pure SQL overwrite)
CREATE OR REPLACE TABLE kardia_gold.gold_encounters_by_month
TBLPROPERTIES (
  'delta.autoOptimize.optimizeWrite' = 'true',
  'delta.autoOptimize.autoCompact' = 'true'
) AS
SELECT DATE_FORMAT(start_ts, 'yyyy-MM') AS month,
       COUNT(*) AS encounters_n
FROM kardia_silver.silver_encounters_with_patients
WHERE gender IS NOT NULL
AND birth_year IS NOT NULL
GROUP BY DATE_FORMAT(start_ts, 'yyyy-MM');

In [0]:
%sql
-- 3. QA view – encounters missing demographic fields
CREATE OR REPLACE VIEW kardia_gold.gold_enc_missing_demo_vw AS
SELECT DATE_FORMAT(START_TS, 'yyyy-MM') AS month,
       COUNT(*) AS unmatched_encounters
FROM kardia_silver.silver_encounters_with_patients
WHERE gender IS NULL OR birth_year IS NULL
GROUP BY DATE_FORMAT(start_ts, 'yyyy-MM');

In [0]:
%sql
-- 4. QA view – patients with no encounter records
CREATE OR REPLACE VIEW kardia_gold.gold_patients_no_enc_vw AS
SELECT p.ID,
       p.GENDER,
       p.BIRTH_YEAR,
       current_date() AS as_of_date
FROM kardia_silver.silver_patients AS p
LEFT ANTI JOIN (
    SELECT DISTINCT patient_id FROM kardia_silver.silver_encounters_with_patients
) AS e
ON p.ID = e.patient_id;

In [0]:
%sql
-- 5. Preview: Most recent 12 months of encounter counts (fact table)
SELECT *
FROM kardia_gold.gold_encounters_by_month
ORDER BY month DESC
LIMIT 12;

In [0]:
%sql
-- Preview: QA view – most recent 12 months of encounters missing gender or birth year
  SELECT *
  FROM kardia_gold.gold_enc_missing_demo_vw
  ORDER BY month DESC
  LIMIT 12;

In [0]:
%sql
-- Preview: QA view – up to 10 patients with no encounter records (left anti join)
    SELECT *
    FROM kardia_gold.gold_patients_no_enc_vw
    LIMIT 10;

/* NOTE:
In production, we could keep the Gold fact table incremental by switching
to foreachBatch + MERGE (instead of overwrite) or by storing only changed
months. For this demo dataset, a full overwrite is simple and fast. */