In [0]:
%sql
/* 03_gold_patient_lifecycle.ipynb
 GOAL 1: gold_patient_lifecycle — time between visits, patient lifetime span, new/returning classification, age‑band utilization

 SOURCE: kardia_silver.silver_encounters_enriched
 OUTPUT: TABLE: kardia_gold.gold_patient_lifecycle

 TRIGGER: Full snapshot overwrite each run — fast and simple for small datasets.
          In production, switch to foreachBatch + MERGE to only update changed patients.

 NOTE:
  - lifetime_days = days between first and last visit  
  - classification = 'new' if only one visit, else 'returning'  
  - age_band buckets based on current_date() – birth_year  
  - avg_days_between_visits = lifetime_days ÷ (visit_count–1) 
  - Current dataset has 2,565 distinct patient_id. */

In [0]:
%sql
-- 1. Ensure Gold database exists
CREATE DATABASE IF NOT EXISTS kardia_gold;

-- 2. Patient lifecycle fact table (pure SQL overwrite)
CREATE OR REPLACE TABLE kardia_gold.gold_patient_lifecycle
TBLPROPERTIES (
  'delta.autoOptimize.optimizeWrite' = 'true',
  'delta.autoOptimize.autoCompact' = 'true'
) AS
SELECT
  patient_id,
  MIN(start_ts)                                   AS first_visit_ts,
  MAX(start_ts)                                   AS last_visit_ts,
  DATEDIFF(MAX(start_ts), MIN(start_ts))          AS lifetime_days,
  COUNT(*)                                        AS visit_count,
  CASE WHEN COUNT(*) = 1 THEN 'new' ELSE 'returning' END AS classification,
  CASE
    WHEN YEAR(current_date()) - birth_year < 20             THEN '<20'
    WHEN YEAR(current_date()) - birth_year BETWEEN 20 AND 39 THEN '20-39'
    WHEN YEAR(current_date()) - birth_year BETWEEN 40 AND 59 THEN '40-59'
    ELSE '60+'
  END AS age_band,
  CASE
    WHEN COUNT(*) > 1
    THEN ROUND(DATEDIFF(MAX(start_ts), MIN(start_ts)) / (COUNT(*) - 1), 2)
    ELSE NULL
  END AS avg_days_between_visits
FROM kardia_silver.silver_encounters_enriched
WHERE birth_year IS NOT NULL
GROUP BY patient_id, birth_year;

In [0]:
%sql
-- 3. Preview: Top 10 patients by visit_count
SELECT *
FROM kardia_gold.gold_patient_lifecycle
ORDER BY visit_count DESC
LIMIT 10;