In [0]:
# 03_gold_view_encounters_by_month
# ----------------------------------------------
# Gold KPI – monthly encounter volume & claim cost
# Sources: kardia_silver.silver_patient_encounters  (append-only)

from pyspark.sql import SparkSession, functions as F

In [0]:
# Spark session
spark = (
    SparkSession.builder
      .appName("gold_encounters_by_month")
      .config("spark.sql.shuffle.partitions", "1")
      .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension")
      .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog")
      .getOrCreate()
)
spark.sparkContext.setLogLevel("ERROR")

In [0]:
# 1️. Register (or refresh) the view
spark.sql("""
CREATE OR REPLACE TEMP VIEW vw_encounters_by_month AS
SELECT
  date_format(START,'yyyy-MM')  AS month,
  GENDER,
  BIRTH_YEAR,
  COUNT(*)          AS encounters_n,
  SUM(TOTAL_CLAIM_COST) AS claim_cost
FROM kardia_silver.silver_patient_encounters
GROUP BY month, GENDER, BIRTH_YEAR
ORDER BY month, GENDER, BIRTH_YEAR;
""")

In [0]:
# 2️. Quick preview (5 most-recent months)
print("vw_encounters_by_month preview:")
display(
    spark.table("vw_encounters_by_month")
         .limit(20)
)