In [0]:
from pyspark.sql import SparkSession, functions as f

# Read the CSV file
cptcodes_df = spark.read.csv("/mnt/landing/cptdata/*.csv", header=True)

# Replace whitespaces in column names with underscores and convert to lowercase
for col in cptcodes_df.columns:
    new_col = col.replace(" ", "_").lower()
    cptcodes_df = cptcodes_df.withColumnRenamed(col, new_col)
cptcodes_df.createOrReplaceTempView("cptcodes")
display(cptcodes_df)

procedure_code_category,cpt_codes,procedure_code_descriptions,code_status
AAA,34830,"Open repair of infrarenal aortic aneurysm or dissection, plus repair of associated arterial trauma, following unsuccessful endovascular repair; tube prosthesis",No Change
AAA,34831,"Open repair of infrarenal aortic aneurysm or dissection, plus repair of associated arterial trauma, following unsuccessful endovascular repair; aorto-bi-iliac prosthesis",No Change
AAA,34832,"Open repair of infrarenal aortic aneurysm or dissection, plus repair of associated arterial trauma, following unsuccessful endovascular repair; aorto-bifemoral prosthesis",No Change
AAA,35081,"Direct repair of aneurysm, pseudoaneurysm, or excision (partial or total) and graft insertion, with or without patch graft; for aneurysm, pseudoaneurysm, and associated occlusive disease, abdominal aorta",No Change
AAA,35082,"Direct repair of aneurysm, pseudoaneurysm, or excision (partial or total) and graft insertion, with or without patch graft; for ruptured aneurysm, abdominal aorta",No Change
AAA,35091,"Direct repair of aneurysm, pseudoaneurysm, or excision (partial or total) and graft insertion, with or without patch graft; for aneurysm, pseudoaneurysm, and associated occlusive disease, abdominal aorta involving visceral vessels (mesenteric, celiac, renal)",No Change
AAA,35092,"Direct repair of aneurysm, pseudoaneurysm, or excision (partial or total) and graft insertion, with or without patch graft; for ruptured aneurysm, abdominal aorta involving visceral vessels (mesenteric, celiac, renal)",No Change
AAA,35102,"Direct repair of aneurysm, pseudoaneurysm, or excision (partial or total) and graft insertion, with or without patch graft; for aneurysm, pseudoaneurysm, and associated occlusive disease, abdominal aorta involving iliac vessels (common, hypogastric, external)",No Change
AAA,35103,"Direct repair of aneurysm, pseudoaneurysm, or excision (partial or total) and graft insertion, with or without patch graft; for ruptured aneurysm, abdominal aorta involving iliac vessels (common, hypogastric, external)",No Change
AMP,23900,Interthoracoscapular amputation (forequarter),No Change


In [0]:
#writing cpt codes to bronze layer
cptcodes_df.write.format("parquet").mode("overwrite").save("/mnt/bronze/cpt_codes")

In [0]:
%sql
CREATE TABLE IF NOT EXISTS cpt_codes USING PARQUET LOCATION '/mnt/bronze/cpt_codes'


In [0]:
%sql 
select * from cpt_codes limit 10

procedure_code_category,cpt_codes,procedure_code_descriptions,code_status
AAA,34830,"Open repair of infrarenal aortic aneurysm or dissection, plus repair of associated arterial trauma, following unsuccessful endovascular repair; tube prosthesis",No Change
AAA,34831,"Open repair of infrarenal aortic aneurysm or dissection, plus repair of associated arterial trauma, following unsuccessful endovascular repair; aorto-bi-iliac prosthesis",No Change
AAA,34832,"Open repair of infrarenal aortic aneurysm or dissection, plus repair of associated arterial trauma, following unsuccessful endovascular repair; aorto-bifemoral prosthesis",No Change
AAA,35081,"Direct repair of aneurysm, pseudoaneurysm, or excision (partial or total) and graft insertion, with or without patch graft; for aneurysm, pseudoaneurysm, and associated occlusive disease, abdominal aorta",No Change
AAA,35082,"Direct repair of aneurysm, pseudoaneurysm, or excision (partial or total) and graft insertion, with or without patch graft; for ruptured aneurysm, abdominal aorta",No Change
AAA,35091,"Direct repair of aneurysm, pseudoaneurysm, or excision (partial or total) and graft insertion, with or without patch graft; for aneurysm, pseudoaneurysm, and associated occlusive disease, abdominal aorta involving visceral vessels (mesenteric, celiac, renal)",No Change
AAA,35092,"Direct repair of aneurysm, pseudoaneurysm, or excision (partial or total) and graft insertion, with or without patch graft; for ruptured aneurysm, abdominal aorta involving visceral vessels (mesenteric, celiac, renal)",No Change
AAA,35102,"Direct repair of aneurysm, pseudoaneurysm, or excision (partial or total) and graft insertion, with or without patch graft; for aneurysm, pseudoaneurysm, and associated occlusive disease, abdominal aorta involving iliac vessels (common, hypogastric, external)",No Change
AAA,35103,"Direct repair of aneurysm, pseudoaneurysm, or excision (partial or total) and graft insertion, with or without patch graft; for ruptured aneurysm, abdominal aorta involving iliac vessels (common, hypogastric, external)",No Change
AMP,23900,Interthoracoscapular amputation (forequarter),No Change


In [0]:
%sql 
CREATE OR REPLACE TEMP VIEW data_quality_check AS 
SELECT 
    cpt_codes,
    procedure_code_category,
    procedure_code_descriptions,
    code_status,
    CASE 
        WHEN cpt_codes IS NULL OR procedure_code_descriptions IS NULL THEN true 
        ELSE false 
    END AS is_quarantined
FROM cpt_codes




In [0]:
%sql 
select * from data_quality_check limit 10

cpt_codes,procedure_code_category,procedure_code_descriptions,code_status,is_quarantined
34830,AAA,"Open repair of infrarenal aortic aneurysm or dissection, plus repair of associated arterial trauma, following unsuccessful endovascular repair; tube prosthesis",No Change,False
34831,AAA,"Open repair of infrarenal aortic aneurysm or dissection, plus repair of associated arterial trauma, following unsuccessful endovascular repair; aorto-bi-iliac prosthesis",No Change,False
34832,AAA,"Open repair of infrarenal aortic aneurysm or dissection, plus repair of associated arterial trauma, following unsuccessful endovascular repair; aorto-bifemoral prosthesis",No Change,False
35081,AAA,"Direct repair of aneurysm, pseudoaneurysm, or excision (partial or total) and graft insertion, with or without patch graft; for aneurysm, pseudoaneurysm, and associated occlusive disease, abdominal aorta",No Change,False
35082,AAA,"Direct repair of aneurysm, pseudoaneurysm, or excision (partial or total) and graft insertion, with or without patch graft; for ruptured aneurysm, abdominal aorta",No Change,False
35091,AAA,"Direct repair of aneurysm, pseudoaneurysm, or excision (partial or total) and graft insertion, with or without patch graft; for aneurysm, pseudoaneurysm, and associated occlusive disease, abdominal aorta involving visceral vessels (mesenteric, celiac, renal)",No Change,False
35092,AAA,"Direct repair of aneurysm, pseudoaneurysm, or excision (partial or total) and graft insertion, with or without patch graft; for ruptured aneurysm, abdominal aorta involving visceral vessels (mesenteric, celiac, renal)",No Change,False
35102,AAA,"Direct repair of aneurysm, pseudoaneurysm, or excision (partial or total) and graft insertion, with or without patch graft; for aneurysm, pseudoaneurysm, and associated occlusive disease, abdominal aorta involving iliac vessels (common, hypogastric, external)",No Change,False
35103,AAA,"Direct repair of aneurysm, pseudoaneurysm, or excision (partial or total) and graft insertion, with or without patch graft; for ruptured aneurysm, abdominal aorta involving iliac vessels (common, hypogastric, external)",No Change,False
23900,AMP,Interthoracoscapular amputation (forequarter),No Change,False


In [0]:
%sql 
CREATE SCHEMA IF NOT EXISTS silver;
CREATE TABLE IF NOT EXISTS silver.cptcodes (
  cpt_codes STRING,
  procedure_code_category STRING,
  procedure_code_descriptions STRING,
  code_status STRING,
  is_quarantined BOOLEAN,
  audit_insertdate TIMESTAMP,
  audit_modifieddate TIMESTAMP,
  is_current BOOLEAN
)
USING DELTA;


Implementing SCD Type 2 


In [0]:
%sql 
MERGE INTO silver.cptcodes AS target
USING data_quality_check AS source
ON target.cpt_codes = source.cpt_codes AND target.is_current = true

-- Step 1: End-date old version (update)
WHEN MATCHED AND (
    target.procedure_code_category != source.procedure_code_category OR
    target.procedure_code_descriptions != source.procedure_code_descriptions OR
    target.code_status != source.code_status OR
    target.is_quarantined != source.is_quarantined
) THEN
  UPDATE SET
    target.is_current = false,
    target.audit_modifieddate = current_timestamp()

-- Step 2: Insert new version (either changed or completely new)
WHEN NOT MATCHED BY TARGET THEN
  INSERT (
    cpt_codes,
    procedure_code_category,
    procedure_code_descriptions,
    code_status,
    is_quarantined,
    audit_insertdate,
    audit_modifieddate,
    is_current
  )
  VALUES (
    source.cpt_codes,
    source.procedure_code_category,
    source.procedure_code_descriptions,
    source.code_status,
    source.is_quarantined,
    current_timestamp(),
    current_timestamp(),
    true
  );


num_affected_rows,num_updated_rows,num_deleted_rows,num_inserted_rows
1161,0,0,1161
