In [0]:
from pyspark.sql.functions import*
from pyspark.sql.types import*
from pyspark.sql.window import*

In [0]:
%sql
use catalog claims_leakage;
use schema silver;

In [0]:
%run "/Workspace/Users/shoyofromconcrete@gmail.com/claims risk and leakage/silver/utilities"

In [0]:
policy_bronze = spark.table("bronze.policy_bronze")


In [0]:
policy_typed = (
    policy_bronze
    .withColumn("policy_start_date_ts", parse_date("policy_start_date"))
    .withColumn("policy_end_date_ts", parse_date("policy_end_date"))
    .withColumn("coverage_limit_num", parse_amount("coverage_limit"))
    .withColumn("deductible_num", parse_amount("deductible"))
    .withColumn("premium_amount_num", parse_amount("premium_amount"))
    .withColumn("policy_status_std", normalize("policy_status"))
    .withColumn("policy_type_std", normalize("policy_type"))
    .withColumn("risk_region_std", normalize("risk_region"))
)

display(policy_typed)


In [0]:
policy_validated = policy_typed.withColumn(
    "dq_status",
    when(col("policy_id").isNull(), "FAIL_POLICY_ID")
    .when(col("policy_start_date_ts").isNull(), "FAIL_START_DATE")
    .when(col("policy_end_date_ts").isNull(), "FAIL_END_DATE")
    .when(col("policy_start_date_ts") > col("policy_end_date_ts"),
          "FAIL_INVALID_DATE_RANGE")
    .when(col("coverage_limit_num").isNull(), "FAIL_COVERAGE")
    .otherwise("PASS")
)
display(policy_validated)


In [0]:
policy_clean = policy_validated.filter(col("dq_status") == "PASS")
policy_quarantine = policy_validated.filter(col("dq_status") != "PASS")


In [0]:
display(policy_clean)
display(policy_quarantine)

In [0]:
policy_clean_detailed=policy_clean
policy_clean_detailed.write.format("delta").mode("overwrite").saveAsTable("policy_clean_detailed")

In [0]:
(
    policy_clean
    .select(
        "policy_id",
        "policy_type_std",
        "policy_status_std",
        "coverage_limit_num",
        "deductible_num",
        "premium_amount_num",
        "risk_region_std",
        "start_ts",
        "end_ts",
        "is_current"
    )
    .write
    .format("delta")
    .mode("append")
    .saveAsTable("policy_clean")
)


In [0]:
%sql
SELECT  COUNT(*)
FROM policy_clean_detailed


