In [0]:
%run "/Workspace/Users/shoyofromconcrete@gmail.com/claims_risk_leakage/config"




In [0]:
%run "/Workspace/Users/shoyofromconcrete@gmail.com/claims_risk_leakage/notebooks/silver/utilities"

In [0]:
import time
import logging
import sys
import traceback
import builtins
import pyspark.sql.functions as F
from pyspark.sql.window import Window


In [0]:
logger_name = CONFIG["logging"]["logger_name"]
log_level = CONFIG["logging"]["log_level"]

logger = logging.getLogger(logger_name)

if not logger.handlers:
    handler = logging.StreamHandler(sys.stdout)
    formatter = logging.Formatter(
        "%(asctime)s | %(levelname)s | %(message)s"
    )
    handler.setFormatter(formatter)
    logger.addHandler(handler)

logger.setLevel(getattr(logging, log_level))


In [0]:
spark.sql(f"USE CATALOG {CONFIG['catalog']}")
spark.sql(f"USE SCHEMA {CONFIG['schemas']['silver']}")

In [0]:
logger.info("Starting POLICY Silver Layer")

start_time = time.time()

try:

    # ==========================================================
    # READ BRONZE
    # ==========================================================

    logger.info("Reading policy bronze table")

    policy_bronze = spark.table(
        f"{CONFIG['catalog']}.bronze.{CONFIG['tables']['bronze']['policy']}"
    )

    # ==========================================================
    # TYPING
    # ==========================================================

    logger.info("Applying transformations and typing")

    policy_typed = (
        policy_bronze
        .withColumn("policy_start_date_ts", parse_date("policy_start_date"))
        .withColumn("policy_end_date_ts", parse_date("policy_end_date"))
        .withColumn("coverage_limit_num", parse_amount("coverage_limit"))
        .withColumn("deductible_num", parse_amount("deductible"))
        .withColumn("premium_amount_num", parse_amount("premium_amount"))
        .withColumn("policy_status_std", normalize("policy_status"))
        .withColumn("policy_type_std", normalize("policy_type"))
        .withColumn("risk_region_std", normalize("risk_region"))
    )

    # ==========================================================
    # VALIDATION
    # ==========================================================

    logger.info("Running data quality validation")

    policy_validated = policy_typed.withColumn(
        "dq_status",
        F.when(F.col("policy_id").isNull(), "FAIL_POLICY_ID")
        .when(F.col("policy_start_date_ts").isNull(), "FAIL_START_DATE")
        .when(F.col("policy_end_date_ts").isNull(), "FAIL_END_DATE")
        .when(
            F.col("policy_start_date_ts") > F.col("policy_end_date_ts"),
            "FAIL_INVALID_DATE_RANGE"
        )
        .when(F.col("coverage_limit_num").isNull(), "FAIL_COVERAGE")
        .otherwise("PASS")
    )

    policy_clean = policy_validated.filter(F.col("dq_status") == "PASS")
    policy_quarantine = policy_validated.filter(F.col("dq_status") != "PASS")

    # ==========================================================
    # WRITE CLEAN TABLE
    # ==========================================================

    logger.info("Writing policy_clean_detailed")

    policy_clean.write \
        .format("delta") \
        .mode("overwrite") \
        .option("overwriteSchema", "true") \
        .saveAsTable(CONFIG["tables"]["silver_clean"]["policy"])

    # ==========================================================
    # WRITE QUARANTINE
    # ==========================================================

    # logger.info("Writing policy_quarantine")

    # policy_quarantine.write \
    #     .format("delta") \
    #     .mode("overwrite") \
    #     .option("overwriteSchema", "true") \
    #     .saveAsTable(CONFIG["tables"]["silver_quarantine"]["policy"])

    # ==========================================================
    # SUCCESS LOG
    # ==========================================================

    end_time = time.time()
    duration = builtins.round(end_time - start_time, 2)

    logger.info(
        f"Policy Silver completed successfully | "
        f"duration = {duration} sec"
    )

except Exception as e:

    logger.error("Policy Silver failed")
    logger.error(traceback.format_exc())
    raise e

In [0]:
%sql
select * from claims_leakage.silver.policy_clean_detailed