In [0]:
from pyspark.sql.functions import*
from pyspark.sql.types import*
from pyspark.sql.window import*

In [0]:
%sql
use catalog claims_leakage;
use schema silver;

In [0]:
%run "/Workspace/Users/shoyofromconcrete@gmail.com/claims risk and leakage/silver/utilities"

In [0]:
claims_bronze = spark.table("bronze.claims_bronze")


In [0]:
claims_typed = (
    claims_bronze
    .withColumn("settlement_date_ts", parse_date("settlement_date"))
    .withColumn("approved_amount_num", parse_amount("approved_amount"))
    .withColumn("paid_amount_num", parse_amount("paid_amount"))
    .withColumn("claim_status_std", normalize("claim_status"))
)


In [0]:
claims_validated = claims_typed.withColumn(
    "dq_status",
    when(col("claim_id").isNull(), "FAIL_CLAIM_ID")
    .when(col("policy_id").isNull(), "FAIL_POLICY_ID")
    .when(col("approved_amount_num").isNull(), "FAIL_APPROVED_AMOUNT")
    .otherwise("PASS")
)


In [0]:
claims_clean = claims_validated.filter(col("dq_status") == "PASS")
claims_quarantine = claims_validated.filter(col("dq_status") != "PASS")


In [0]:
window_claim = Window.partitionBy("claim_id").orderBy(col("start_ts").desc())

claims_snapshot = (
    claims_clean
    .withColumn("rn", row_number().over(window_claim))
    .filter(col("rn") == 1)
    .drop("rn")
)


In [0]:
claims_snapshot.write \
    .format("delta") \
    .mode("overwrite") \
    .saveAsTable("silver.claims_clean")


In [0]:
claims_quarantine.write \
    .format("delta") \
    .mode("overwrite") \
    .saveAsTable("silver.claims_quarantine")


In [0]:
%sql
select * from claims_clean

In [0]:
%sql
SELECT 
    COUNT(*) AS total_claims,
    SUM(CASE WHEN fnol_id IS NULL THEN 1 ELSE 0 END) AS null_fnol_count,
    SUM(CASE WHEN fnol_id IS NOT NULL THEN 1 ELSE 0 END) AS linked_claims
FROM silver.claims_clean;



In [0]:
%sql
SELECT COUNT(*) AS mismatch_count
FROM silver.claims_clean c
JOIN silver.fnol_clean f
  ON c.fnol_id = f.fnol_id
WHERE c.policy_id != f.policy_id_canonical;


In [0]:
%sql
SELECT COUNT(*) 
FROM silver.claims_clean
WHERE fnol_id IS NULL;




In [0]:
%sql
SELECT 
    COUNT(*) AS total_claims,
    SUM(CASE WHEN fnol_id IS NULL THEN 1 ELSE 0 END) AS null_fnol_count,
    SUM(CASE WHEN fnol_id IS NOT NULL THEN 1 ELSE 0 END) AS linked_claims
FROM silver.claims_clean;
