In [None]:
%md
### Kardiaflow - Gold: Claim Metrics

**Source:** `kardia_silver.silver_claims_enriched`

**Targets:**
`gold_claim_approval_by_specialty`
`gold_claim_denial_breakdown`
`gold_high_cost_procedures`

**Trigger:** Full snapshot overwrite each run (simple for small datasets).

**Purpose:** Reports approval rates by specialty, denial distributions by diagnosis, and high-cost procedure outliers.

In [None]:
from kflow.config import GOLD_DB, GOLD_DIR
from kflow.notebook_utils import init

init()

In [0]:
# Ensure Gold DB exists
spark.sql(f"CREATE DATABASE IF NOT EXISTS {GOLD_DB}")
spark.sql(f"USE {GOLD_DB}")

In [0]:
%sql
-- Approval rates by specialty
CREATE OR REPLACE TABLE gold_claim_approval_by_specialty
USING DELTA
LOCATION '${GOLD_DIR}/gold_claim_approval_by_specialty'
AS
WITH approval_counts AS (
  SELECT
    COALESCE(provider_specialty, 'Unknown') AS provider_specialty,
    COUNT(*) AS total_claims,
    SUM(CASE WHEN claim_status = 'Approved' THEN 1 ELSE 0 END) AS approved_count
  FROM kardia_silver.silver_claims_enriched
  GROUP BY COALESCE(provider_specialty, 'Unknown')
)

SELECT
  provider_specialty,
  total_claims,
  approved_count,
  ROUND(approved_count / CAST(total_claims AS DOUBLE), 4) AS approval_rate
FROM approval_counts;

In [0]:
%sql  
-- Denial breakdown by specialty
CREATE OR REPLACE TABLE gold_claim_denial_breakdown
USING DELTA
LOCATION '${GOLD_DIR}/gold_claim_denial_breakdown'
AS
WITH denial_counts AS (
  SELECT
    COALESCE(provider_specialty, 'Unknown') AS provider_specialty,
    COALESCE(diagnosis_code, 'Unknown')     AS diagnosis_code,
    COUNT(*)                                AS denial_count
  FROM kardia_silver.silver_claims_enriched
  WHERE claim_status = 'Denied'
  GROUP BY
    COALESCE(provider_specialty, 'Unknown'),
    COALESCE(diagnosis_code, 'Unknown')
),

denial_share AS (
  SELECT
    provider_specialty,
    diagnosis_code,
    denial_count,
    ROUND(
      denial_count
      / CAST(SUM(denial_count) OVER (PARTITION BY provider_specialty) AS DOUBLE),
      4
    ) AS denial_pct
  FROM denial_counts
)

SELECT *
FROM denial_share
ORDER BY provider_specialty, denial_pct DESC, diagnosis_code;

In [0]:
%sql  
-- Top 10 high-cost procedures by avg claim amount (approved-only, min frequency, explicit Delta)
CREATE OR REPLACE TABLE gold_high_cost_procedures
USING DELTA
LOCATION '${GOLD_DIR}/gold_high_cost_procedures'
AS
WITH approved_claims AS (
  SELECT
    procedure_code,
    claim_amount
  FROM kardia_silver.silver_claims_enriched
  WHERE claim_status = 'Approved'
),

procedure_stats AS (
  SELECT
    procedure_code,
    COUNT(*) AS procedure_count,
    ROUND(AVG(claim_amount), 2) AS avg_claim_amount
  FROM approved_claims
  GROUP BY procedure_code
  HAVING COUNT(*) >= 3
)

SELECT
  procedure_code,
  avg_claim_amount,
  procedure_count
FROM procedure_stats
ORDER BY avg_claim_amount DESC
LIMIT 10;

In [0]:
%sql  
-- Preview: approval rates by specialty  
SELECT *
FROM gold_claim_approval_by_specialty
ORDER BY approval_rate DESC
LIMIT 10;

In [0]:
%sql  
-- Preview: denial breakdown  
SELECT provider_specialty,
       sum(denial_count) as total_denial_count
FROM gold_claim_denial_breakdown
GROUP BY provider_specialty
ORDER BY total_denial_count DESC
LIMIT 10;

In [0]:
%sql  
-- Preview: high‑cost procedures  
SELECT *
FROM gold_high_cost_procedures
ORDER BY avg_claim_amount DESC
LIMIT 10;