In [None]:
%sql
/* 03_gold_patient_lifecycle.ipynb
 GOAL 1: gold_patient_lifecycle — time between visits, patient lifetime span, new/returning classification, age‑band utilization

 SOURCE: kardia_silver.silver_encounters_enriched
 OUTPUT: TABLE: gold_patient_lifecycle

 TRIGGER: Full snapshot overwrite each run — fast and simple for small datasets.
          In production, switch to foreachBatch + MERGE to only update changed patients.

 NOTE:
  - lifetime_days = days between first and last visit  
  - classification = 'new' if only one visit, else 'returning'  
  - age_band buckets based on current_date() – birth_year  
  - avg_days_between_visits = lifetime_days ÷ (visit_count–1) 
  - Current dataset has 2,565 distinct patient_id. */

USE kardia_gold;

In [None]:
%sql
-- 2. Patient lifecycle fact table
CREATE OR REPLACE TABLE gold_patient_lifecycle AS
WITH src AS (
  SELECT patient_id, start_ts, birth_year
  FROM   kardia_silver.silver_encounters_enriched
  WHERE  birth_year IS NOT NULL
),
agg AS (
  SELECT
    patient_id,
    MIN(start_ts) AS first_visit_ts,
    MAX(start_ts) AS last_visit_ts,
    COUNT(*)      AS visit_count,
    YEAR(CURRENT_DATE()) - MAX(birth_year) AS age_years
  FROM src
  GROUP BY patient_id
)
SELECT
  patient_id,
  first_visit_ts,
  last_visit_ts,
  DATEDIFF(last_visit_ts, first_visit_ts)                AS lifetime_days,
  visit_count,
  CASE WHEN visit_count = 1 THEN 'new' ELSE 'returning' END AS classification,
  CASE
    WHEN age_years < 20  THEN '<20'
    WHEN age_years BETWEEN 20 AND 39 THEN '20-39'
    WHEN age_years BETWEEN 40 AND 59 THEN '40-59'
    ELSE '60+'
  END AS age_band,
  CASE
    WHEN visit_count > 1
    THEN ROUND(DATEDIFF(last_visit_ts, first_visit_ts) / (visit_count - 1), 2)
    ELSE NULL
  END AS avg_days_between_visits
FROM agg;

In [None]:
%sql
-- 3. Preview: Top 10 patients by visit_count
SELECT *
FROM gold_patient_lifecycle
ORDER BY visit_count DESC
LIMIT 10;