In [0]:
# 02_silver_processing: Cleaned and enriched sensor data
# DLT reads from Bronze tables using dlt.read_stream() - dependencies handled automatically

import dlt
from pyspark.sql.functions import col

In [0]:
import dlt
from pyspark.sql.functions import col, broadcast

@dlt.table(
    name="machine_metadata",
    comment="Static metadata for production line machines and their monitored characteristics",
    table_properties={"quality": "silver"}
)
def machine_metadata():
    machines = [
        {
            "machine_id": 1,
            "machine_name": "Main Drive Motor",
            "machine_type": "Electric motor",
            "location": "Production Line A",
            "function": "Primary power source for the production line",
            "monitored_sensors": "temperature, vibration, power",
            "material_processed": "Steel / Aluminum alloys",
            "criticality": "High"
        },
        {
            "machine_id": 2,
            "machine_name": "Conveyor Belt Motor",
            "machine_type": "Electric motor",
            "location": "Material transport section",
            "function": "Moves raw and semi-finished parts between stations",
            "monitored_sensors": "temperature, vibration, speed",
            "material_processed": "Steel / Aluminum alloys",
            "criticality": "Medium"
        },
        {
            "machine_id": 3,
            "machine_name": "Hydraulic Press",
            "machine_type": "Hydraulic press system",
            "location": "Forming station",
            "function": "Applies high pressure for shaping metal components",
            "monitored_sensors": "temperature, pressure, cycle_count",
            "material_processed": "Steel / Aluminum alloys",
            "criticality": "High"
        },
        {
            "machine_id": 4,
            "machine_name": "CNC Spindle",
            "machine_type": "CNC machining spindle",
            "location": "Machining center",
            "function": "High-precision machining of metal parts",
            "monitored_sensors": "temperature, vibration, rpm",
            "material_processed": "Steel / Aluminum alloys",
            "criticality": "High"
        },
        {
            "machine_id": 5,
            "machine_name": "Cooling System Pump",
            "machine_type": "Industrial cooling pump",
            "location": "Cooling subsystem",
            "function": "Circulates coolant to maintain safe operating temperatures",
            "monitored_sensors": "temperature, flow_rate",
            "material_processed": "Coolant fluids",
            "criticality": "Medium"
        },
        {
            "machine_id": 6,
            "machine_name": "Packaging Unit Motor",
            "machine_type": "Electric motor",
            "location": "Packaging station",
            "function": "Drives packaging and final product handling",
            "monitored_sensors": "temperature, vibration, current",
            "material_processed": "Finished metal components",
            "criticality": "Medium"
        }
    ]
    return spark.createDataFrame(machines)

@dlt.table(
    name="silver_temperature",
    comment="Temperature readings enriched with machine metadata",
    table_properties={"quality": "silver"}
)
@dlt.expect_or_drop("valid_event_time", "event_time IS NOT NULL")
@dlt.expect("valid_temperature_range", "temperature BETWEEN 20 AND 90")
def silver_temperature():
    metadata_df = dlt.read("machine_metadata")
    
    return (
        dlt.read_stream("bronze_temperature")
           .withColumn("event_time", col("event_time").cast("timestamp"))
           .withColumnRenamed("value", "temperature")
           .join(broadcast(metadata_df), "machine_id", "left")
           .select(
               "machine_id",
               "machine_name",
               "location",
               "event_time",
               "temperature"
           )
    )

@dlt.table(
    name="silver_vibration",
    comment="Vibration readings enriched with machine metadata",
    table_properties={"quality": "silver"}
)
@dlt.expect_or_drop("valid_event_time", "event_time IS NOT NULL")
@dlt.expect("valid_vibration_range", "vibration BETWEEN 0 AND 0.08")
def silver_vibration():
    metadata_df = dlt.read("machine_metadata")
    
    return (
        dlt.read_stream("bronze_vibration")
           .withColumn("event_time", col("event_time").cast("timestamp"))
           .withColumnRenamed("value", "vibration")
           .join(broadcast(metadata_df), "machine_id", "left")
           .select(
               "machine_id",
               "machine_name",
               "location",
               "event_time",
               "vibration"
           )
    )

@dlt.table(
    name="silver_power",
    comment="Power consumption readings enriched with machine metadata",
    table_properties={"quality": "silver"}
)
@dlt.expect_or_drop("valid_event_time", "event_time IS NOT NULL")
@dlt.expect("valid_power_range", "power_kw BETWEEN 5 AND 50")
def silver_power():
    metadata_df = dlt.read("machine_metadata")
    
    return (
        dlt.read_stream("bronze_power")
           .withColumn("event_time", col("event_time").cast("timestamp"))
           .withColumnRenamed("value", "power_kw")
           .join(broadcast(metadata_df), "machine_id", "left")
           .select(
               "machine_id",
               "machine_name",
               "location",
               "event_time",
               "power_kw"
           )
    )

@dlt.table(
    name="silver_pressure",
    comment="Pressure readings enriched with machine metadata",
    table_properties={"quality": "silver"}
)
@dlt.expect_or_drop("valid_event_time", "event_time IS NOT NULL")
@dlt.expect("valid_pressure_range", "pressure_bar BETWEEN 100 AND 300")
def silver_pressure():
    metadata_df = dlt.read("machine_metadata")
    
    return (
        dlt.read_stream("bronze_pressure")
           .withColumn("event_time", col("event_time").cast("timestamp"))
           .withColumnRenamed("value", "pressure_bar")
           .join(broadcast(metadata_df), "machine_id", "left")
           .select(
               "machine_id",
               "machine_name",
               "location",
               "event_time",
               "pressure_bar"
           )
    )

@dlt.table(
    name="silver_flow_rate",
    comment="Flow rate readings enriched with machine metadata",
    table_properties={"quality": "silver"}
)
@dlt.expect_or_drop("valid_event_time", "event_time IS NOT NULL")
@dlt.expect("valid_flow_range", "flow_rate_l_min BETWEEN 50 AND 200")
def silver_flow_rate():
    metadata_df = dlt.read("machine_metadata")
    
    return (
        dlt.read_stream("bronze_flow_rate")
           .withColumn("event_time", col("event_time").cast("timestamp"))
           .withColumnRenamed("value", "flow_rate_l_min")
           .join(broadcast(metadata_df), "machine_id", "left")
           .select(
               "machine_id",
               "machine_name",
               "location",
               "event_time",
               "flow_rate_l_min"
           )
    )

@dlt.table(
    name="silver_rpm",
    comment="RPM readings enriched with machine metadata",
    table_properties={"quality": "silver"}
)
@dlt.expect_or_drop("valid_event_time", "event_time IS NOT NULL")
@dlt.expect("valid_rpm_range", "rpm BETWEEN 500 AND 5000")
def silver_rpm():
    metadata_df = dlt.read("machine_metadata")
    
    return (
        dlt.read_stream("bronze_rpm")
           .withColumn("event_time", col("event_time").cast("timestamp"))
           .withColumnRenamed("value", "rpm")
           .join(broadcast(metadata_df), "machine_id", "left")
           .select(
               "machine_id",
               "machine_name",
               "location",
               "event_time",
               "rpm"
           )
    )

@dlt.table(
    name="silver_speed",
    comment="Speed readings enriched with machine metadata",
    table_properties={"quality": "silver"}
)
@dlt.expect_or_drop("valid_event_time", "event_time IS NOT NULL")
@dlt.expect("valid_speed_range", "speed_m_s BETWEEN 0.5 AND 2.5")
def silver_speed():
    metadata_df = dlt.read("machine_metadata")
    
    return (
        dlt.read_stream("bronze_speed")
           .withColumn("event_time", col("event_time").cast("timestamp"))
           .withColumnRenamed("value", "speed_m_s")
           .join(broadcast(metadata_df), "machine_id", "left")
           .select(
               "machine_id",
               "machine_name",
               "location",
               "event_time",
               "speed_m_s"
           )
    )

@dlt.table(
    name="silver_current",
    comment="Current readings enriched with machine metadata",
    table_properties={"quality": "silver"}
)
@dlt.expect_or_drop("valid_event_time", "event_time IS NOT NULL")
@dlt.expect("valid_current_range", "current_a BETWEEN 5 AND 20")
def silver_current():
    metadata_df = dlt.read("machine_metadata")
    
    return (
        dlt.read_stream("bronze_current")
           .withColumn("event_time", col("event_time").cast("timestamp"))
           .withColumnRenamed("value", "current_a")
           .join(broadcast(metadata_df), "machine_id", "left")
           .select(
               "machine_id",
               "machine_name",
               "location",
               "event_time",
               "current_a"
           )
    )