In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *
import builtins
import traceback


print("Unit Test Execution Started")


In [0]:
%run "/Workspace/Repos/shoyofromconcrete@gmail.com/Real_Time_Claims_Risk_Leakage_Detection_Platform-P_C_Insurance-/notebooks/silver/utilities"

In [0]:
%run "/Workspace/Repos/shoyofromconcrete@gmail.com/Real_Time_Claims_Risk_Leakage_Detection_Platform-P_C_Insurance-/config"

SECTION 1 — Utilities Tests

In [0]:
def test_parse_amount():
    data = [
    {"amount": "3L"},
    {"amount": "10K"},
    {"amount": "1000"},
    {"amount": "1,000"},
    {"amount": ""}
    ]
    df = spark.createDataFrame(data)

    result = df.withColumn("parsed", parse_amount("amount")).collect()

    assert result[0]["parsed"] == 300000
    assert result[1]["parsed"] == 10000
    assert result[2]["parsed"] == 1000
    assert result[3]["parsed"] == 1000
    assert result[4]["parsed"]== None

    print("✔ parse_amount test passed")


In [0]:
def test_normalize():
    df = spark.createDataFrame(
        [(" theft ",), ("Fire",)],
        ["val"]
    )

    result = df.withColumn("norm", normalize("val")).collect()

    assert result[0]["norm"] == "THEFT"
    assert result[1]["norm"] == "FIRE"

    print("✔ normalize test passed")


SECTION 2 — Risk Engine Tests

In [0]:
def test_risk_score_logic():

    sample = spark.createDataFrame([
        (1,1,1,1,1,1)   # all flags on
    ], [
        "late_reporting_flag",
        "high_fnol_amount_flag",
        "risky_loss_type_flag",
        "risky_geo_flag",
        "paid_gt_approved_flag",
        "paid_gt_coverage_flag"
    ])

    weights = CONFIG["risk"]["weights"]

    result = (
        sample
        .withColumn(
            "risk_score",
            col("late_reporting_flag") * weights["late_reporting"] +
            col("high_fnol_amount_flag") * weights["high_amount"] +
            col("risky_loss_type_flag") * weights["risky_loss_type"] +
            col("risky_geo_flag") * weights["risky_geo"] +
            col("paid_gt_approved_flag") * weights["paid_gt_approved"] +
            col("paid_gt_coverage_flag") * weights["paid_gt_coverage"]
        )
    ).collect()[0]["risk_score"]

    assert result >= 70

    print("✔ risk scoring test passed")


SECTION 3 — Claims Generator Logic Test

In [0]:
from aws_lambda.claims_generator import generate_claim_from_fnol

def test_claim_generator():

    fnol = {
        "fnol_id": "FNOL123",
        "policy_id": "POL123"
    }

    claim = generate_claim_from_fnol(fnol)

    assert claim["fnol_id"] == "FNOL123"
    assert claim["policy_id"] == "POL123"
    assert "claim_id" in claim

    print("✔ claims generator test passed")


SECTION 4 — Data Quality Logic Test

In [0]:
def test_dq_logic():

    df = spark.createDataFrame([
        (None, "POL123", 1000.0),
        ("CLM1", None, 1000.0)
    ], ["claim_id", "policy_id", "approved_amount_num"])

    validated = df.withColumn(
        "dq_status",
        when(col("claim_id").isNull(), "FAIL_CLAIM_ID")
        .when(col("policy_id").isNull(), "FAIL_POLICY_ID")
        .otherwise("PASS")
    )

    results = [r["dq_status"] for r in validated.collect()]

    assert "FAIL_CLAIM_ID" in results
    assert "FAIL_POLICY_ID" in results

    print("✔ DQ validation test passed")


In [0]:
try:
    test_parse_amount()
    test_normalize()
    test_risk_score_logic()
    test_dq_logic()

    print("\nALL TESTS PASSED SUCCESSFULLY")

except AssertionError as e:
    print("TEST FAILED")
    traceback.print_exc()
