In [None]:
import dlt
from pyspark.sql.functions import col, to_timestamp, coalesce, monotonically_increasing_id, year, current_timestamp

@dlt.table(
    name="flood_dev.silver.floods_clean",
    comment="Flattened and cleaned version of the UK Environment Agency flood data.",
    table_properties={"quality": "silver"}
)
@dlt.expect_all_or_drop({
    "non_null_alert_description": "alert_description IS NOT NULL",
    "non_null_severity": "severity IS NOT NULL",
    "valid_severity_level": "severityLevel BETWEEN 1 AND 5",
    "recent_event_time": "event_time IS NOT NULL AND year(event_time) >= 2010"
})
def floods_clean():
    bronze_df = dlt.read("floods_raw")

    cleaned = (
        bronze_df
        .select(
            col("description").alias("alert_description"),
            col("severity"),
            col("severityLevel").cast("integer"),
            to_timestamp(coalesce(col("timeSeverityChanged"), col("timeMessageChanged"), col("timeRaised"))).alias("event_time"),
            col("eaAreaName").alias("area_name"),
            col("eaRegionName").alias("region_name"),
            col("floodAreaID").alias("flood_area_id"),
            col("isTidal").alias("is_tidal"),
            col("message").alias("alert_message"),
            col("ingestion_time"),
            col("ingestion_id")
        )
        .withColumn("record_id", monotonically_increasing_id())
    )

    return cleaned
