# Refresh DQ expectations

This notebooks represents the initial creation/refresh of a set of data quality rules that are applied as DLT expectations in the data pipeline. This is executed as a task prior to each invocation of the DLT pipeline, but in reality could either be run just once, or managed by a data quality process/framework that stands alone from the pipeline.

In [0]:
# Create the names and expressions for the DQ expectations/rules with accompanying failure action
schema = "target_table STRING, name STRING, expr STRING, failure_action STRING"

silver_table = "silver_turbine"
silver_expectations = [
    (silver_table, "Turbine id is non-null", "(turbine_id IS NOT NULL)", "drop"),
    (silver_table, "Wind speed is positive", "(wind_speed IS NOT NULL) AND (wind_speed >= 0)", "drop"),
    (silver_table, "Wind direction between 0-360", "(wind_direction IS NOT NULL) AND (wind_direction >= 0) AND (wind_direction < 360)", "drop"),
    (silver_table, "Power output is non-null", "power_output IS NOT NULL", "drop")
]

# Store this in a delta table
df = spark.createDataFrame(silver_expectations, schema)
df.write.format("delta").mode("overwrite").saveAsTable("workspace.turbines.expectations")