In [None]:
%md
### Kardiaflow - Gold: Provider Rolling Spend

**Source:** `kardia_silver.silver_claims_enriched`

**Targets:**
`gold_provider_daily_spend`
`gold_provider_7d_spend`

**Trigger:** Full snapshot overwrite each run.

**Purpose:** Tracks each patient’s first and last visit, total visits, lifetime span in days, classification (new vs returning), average time between visits, and recency since last encounter.

In [None]:
from kflow.config import GOLD_DB, GOLD_DIR
from kflow.notebook_utils import init, show_history

init()

In [0]:
# Ensure Gold DB exists
spark.sql(f"CREATE DATABASE IF NOT EXISTS {GOLD_DB}")
spark.sql(f"USE {GOLD_DB}")

In [0]:
%sql
-- Simple patient lifecycle
CREATE OR REPLACE TABLE kardia_gold.gold_patient_lifecycle
USING DELTA
LOCATION '${GOLD_DIR}/gold_patient_lifecycle'
AS
WITH patient_bounds AS (
  -- raw per-patient first/last visit and count
  SELECT
    patient_id,
    MIN(start_ts) AS first_visit_ts,
    MAX(start_ts) AS last_visit_ts,
    COUNT(*)      AS visit_count
  FROM kardia_silver.silver_encounters_enriched
  GROUP BY patient_id
),

patient_metrics AS (
  -- derived fields based on bounds
  SELECT
    patient_id,
    first_visit_ts,
    last_visit_ts,
    DATEDIFF(last_visit_ts, first_visit_ts) AS lifetime_days,
    visit_count,
    CASE WHEN visit_count = 1 THEN 'new' ELSE 'returning' END AS classification,
    CASE
      WHEN visit_count > 1 THEN
        ROUND(
          DATEDIFF(last_visit_ts, first_visit_ts) / CAST(visit_count - 1 AS DOUBLE),
          2
        )
    END AS avg_days_between_visits
  FROM patient_bounds
)

SELECT
  patient_id,
  first_visit_ts,
  last_visit_ts,
  lifetime_days,
  visit_count,
  classification,
  avg_days_between_visits,
  DATEDIFF(CURRENT_DATE(), DATE(last_visit_ts)) AS days_since_last_visit
FROM patient_metrics;

In [0]:
%sql
-- 3. Preview: Top 10 patients by visit_count
SELECT *
FROM gold_patient_lifecycle
ORDER BY visit_count DESC
LIMIT 10;