In [None]:
# gold_provider_rolling_spend.ipynb
# GOAL 1: Daily provider spend
# GOAL 2: 7‑day rolling spend & avg per provider
# SOURCE: kardia_silver.silver_claims_enriched
# TARGET: TABLE - gold_provider_daily_spend
#         TABLE - gold_provider_7d_spend
# TRIGGER: Full snapshot overwrite each run — simple for small datasets.
# NOTE: Demonstrates window functions for rolling KPI calculations. */

# Install kflow from local wheel for use during job execution
%pip install -q --no-deps --no-index --find-links=/dbfs/Shared/libs kflow

from kflow.auth_adls import ensure_adls_oauth

# Configure Spark with ADLS OAuth credentials and return base ABFS path
abfss_base = ensure_adls_oauth()

In [0]:
# Ensure Gold DB exists
spark.sql(
  f"""
  CREATE DATABASE IF NOT EXISTS kardia_gold
  LOCATION '{abfss_base}/kardia/gold'
  """
)

spark.sql("USE kardia_gold")

In [0]:
%sql  
-- Daily provider spend  
CREATE OR REPLACE TABLE gold_provider_daily_spend
USING DELTA AS
SELECT
  provider_id,
  CAST(claim_date AS DATE) AS spend_date,
  SUM(claim_amount)        AS daily_spend
FROM kardia_silver.silver_claims_enriched
GROUP BY provider_id, CAST(claim_date AS DATE);

In [0]:
%sql
-- 7-day rolling spend & average per provider
CREATE OR REPLACE TABLE gold_provider_7d_spend
USING DELTA AS

WITH daily_spend_per_provider AS (
  -- sum spend per provider per day
  SELECT
    provider_id,
    CAST(claim_date AS DATE) AS spend_date,
    SUM(claim_amount) AS daily_spend
  FROM kardia_silver.silver_claims_enriched
  GROUP BY provider_id, CAST(claim_date AS DATE)
),

rolling_metrics AS (
  -- 7-day calendar rolling totals and average
  SELECT
    provider_id,
    spend_date,
    daily_spend,
    SUM(daily_spend) OVER (
      PARTITION BY provider_id
      ORDER BY CAST(spend_date AS TIMESTAMP)
      RANGE BETWEEN INTERVAL 6 DAYS PRECEDING AND CURRENT ROW
    ) AS rolling_7d_spend,
    ROUND(
      AVG(daily_spend) OVER (
        PARTITION BY provider_id
        ORDER BY CAST(spend_date AS TIMESTAMP)
        RANGE BETWEEN INTERVAL 6 DAYS PRECEDING AND CURRENT ROW
      ),
      2
    ) AS rolling_7d_avg
  FROM daily_spend_per_provider
)

SELECT *
FROM rolling_metrics;

In [0]:
%sql  
-- Preview: daily spend  
SELECT *  
FROM gold_provider_daily_spend
ORDER BY spend_date DESC  
LIMIT 10;  

In [0]:
%sql  
-- Preview: 7‑day rolling spend  
SELECT *  
FROM gold_provider_7d_spend
ORDER BY spend_date DESC  
LIMIT 10;