In [0]:
%run ../.././start_up 

In [0]:
logger = create_logger(notebook_name="landing_to_bronze", log_level="DEBUG")
logger.info("🚀 Initializing landing_to_bronze notebook")

# Extract frequently used config values into variables
catalog = pipeline_config["catalog"]
bronze_schema = pipeline_config["schemas"]["bronze"]
bronze_path = pipeline_config["paths"]["bronze_path"]
bronze_volume_path = pipeline_config["paths"]["bronze_volume_path"]
silver_schema = pipeline_config["schemas"]["silver"]
silver_path = pipeline_config["paths"]["silver_path"]
landing_schema= pipeline_config["schemas"]["landing"]
landing_path = pipeline_config["paths"]["landing_path"]
logs_schema = pipeline_config["schemas"]["logs"]
table_name = "dim_doctors"
logger.info("Extracted frequently used config values into variables")

In [0]:
from pyspark.sql.functions import current_timestamp
# ------------------------- Required Tables -------------------------
required_tables = [
    "healthcare.gold.dim_patients",
    "healthcare.gold.dim_visits",
    "healthcare.gold.fact_treatments"
]

# ------------------------- Error Logging Function -------------------------
def log_etl_error(entity_name, failure_stage, error_reason):
    logger.error(f"❌ Logging ETL error: [{entity_name}] - Stage: {failure_stage} - Reason: {error_reason}")
    
    # Create DataFrame with error details
    error_data = [(entity_name, failure_stage, error_reason)]
    error_df = spark.createDataFrame(error_data, ["entity_name", "failure_stage", "error_reason"]) \
                    .withColumn("timestamp", current_timestamp())

    # Check if error table exists; if not, create it
    if not spark.catalog.tableExists("healthcare.gold.etl_error_log"):
        logger.warn("⚠️ Error log table not found, creating it now: healthcare.gold.etl_error_log")
        error_df.write.format("delta") \
            .mode("overwrite") \
            .saveAsTable("healthcare.gold.etl_error_log")
    else:
        error_df.write.format("delta") \
            .mode("append") \
            .saveAsTable("healthcare.gold.etl_error_log")

# ------------------------- Table Existence Check -------------------------
logger.info("🧪 Checking if required Gold tables exist")
missing_tables = [tbl for tbl in required_tables if not spark.catalog.tableExists(tbl)]



In [0]:
# patient_treatment_journey.py (Gold View Builder - Patient Treatment Journey)

if missing_tables:
    reason = ", ".join(missing_tables)
    log_etl_error(
        entity_name="view_patient_treatment_journey",
        failure_stage="check_tables",
        error_reason=reason
    )
    logger.info("🚨 Aborting view creation due to missing input tables")
else:
    logger.info("✅ All required input tables are present")
        # ------------------------- Logging Start -------------------------
    logger.info("🚀 Starting Gold View creation: view_patient_treatment_journey")

# ------------------------- SQL for View -------------------------
    logger.info("🧠 Constructing SQL for patient treatment journey view")

    view_sql = """
    CREATE OR REPLACE VIEW healthcare.gold.view_patient_treatment_journey AS
    SELECT 
        p.patient_id,
        p.patient_name,
        p.gender,
        p.age,
        p.region,

        MIN(v.visit_date) AS first_visit_date,
        MAX(v.visit_date) AS last_visit_date,
        DATEDIFF(MAX(v.visit_date), MIN(v.visit_date)) AS treatment_duration_days,

        COUNT(DISTINCT v.visit_id) AS total_visits,
        COUNT(t.treatment_id) AS total_treatments,

        SUM(t.treatment_cost) AS total_treatment_cost,
        ROUND(AVG(t.treatment_cost), 2) AS avg_treatment_cost

        FROM healthcare.gold.dim_patients p
        LEFT JOIN healthcare.gold.dim_visits v ON p.patient_id = v.patient_id
        LEFT JOIN healthcare.gold.fact_treatments t ON v.visit_id = t.visit_id

        GROUP BY 
        p.patient_id,
        p.patient_name,
        p.gender,
        p.age,
        p.region
    """

# ------------------------- Execute SQL -------------------------
    logger.info("📐 Executing SQL to create or replace view: healthcare.gold.view_patient_treatment_journey")
    spark.sql(view_sql)

# ------------------------- Preview View -------------------------
    logger.info("🔍 Previewing data from vw_patient_treatment_journey")
    preview_df = spark.sql("SELECT * FROM healthcare.gold.view_patient_treatment_journey")
    display(preview_df)

    logger.info("✅ Gold View created successfully: vw_patient_treatment_journey")