In [0]:
# 02_silver_processing: Cleaned and enriched sensor data
# DLT reads from Bronze tables using dlt.read_stream() - dependencies handled automatically

import dlt
from pyspark.sql.functions import col

# -----------------------------
# Machine Metadata (static reference table)
# Using dlt.read() because this is static table
# -----------------------------
@dlt.table(
    name="machine_metadata",
    comment="Static metadata for production line machines and their monitored characteristics",
    table_properties={"quality": "silver"}
)
def machine_metadata():
    machines = [
        {
            "machine_id": 1,
            "machine_name": "Main Drive Motor",
            "machine_type": "Electric motor",
            "location": "Production Line A",
            "function": "Primary power source for the production line",
            "monitored_sensors": "temperature, vibration, power",
            "material_processed": "Steel / Aluminum alloys",
            "criticality": "High"
        },
        {
            "machine_id": 2,
            "machine_name": "Conveyor Belt Motor",
            "machine_type": "Electric motor",
            "location": "Material transport section",
            "function": "Moves raw and semi-finished parts between stations",
            "monitored_sensors": "temperature, vibration, speed",
            "material_processed": "Steel / Aluminum alloys",
            "criticality": "Medium"
        },
        {
            "machine_id": 3,
            "machine_name": "Hydraulic Press",
            "machine_type": "Hydraulic press system",
            "location": "Forming station",
            "function": "Applies high pressure for shaping metal components",
            "monitored_sensors": "temperature, pressure, cycle_count",
            "material_processed": "Steel / Aluminum alloys",
            "criticality": "High"
        },
        {
            "machine_id": 4,
            "machine_name": "CNC Spindle",
            "machine_type": "CNC machining spindle",
            "location": "Machining center",
            "function": "High-precision machining of metal parts",
            "monitored_sensors": "temperature, vibration, rpm",
            "material_processed": "Steel / Aluminum alloys",
            "criticality": "High"
        },
        {
            "machine_id": 5,
            "machine_name": "Cooling System Pump",
            "machine_type": "Industrial cooling pump",
            "location": "Cooling subsystem",
            "function": "Circulates coolant to maintain safe operating temperatures",
            "monitored_sensors": "temperature, flow_rate",
            "material_processed": "Coolant fluids",
            "criticality": "Medium"
        },
        {
            "machine_id": 6,
            "machine_name": "Packaging Unit Motor",
            "machine_type": "Electric motor",
            "location": "Packaging station",
            "function": "Drives packaging and final product handling",
            "monitored_sensors": "temperature, vibration, current",
            "material_processed": "Finished metal components",
            "criticality": "Medium"
        }
    ]
    return spark.createDataFrame(machines)

# -----------------------------
# Temperature
# -----------------------------
@dlt.table(
    name="silver_temperature",
    comment="Temperature readings enriched with machine metadata",
    table_properties={"quality": "silver"}
)
@dlt.expect_or_drop("valid_event_time", "event_time IS NOT NULL")
@dlt.expect("valid_temperature_range", "value BETWEEN 20 AND 90")
def silver_temperature():
    return (
        dlt.read_stream("bronze_temperature")  # ✅ DLT tietää että bronze_temperature pitää olla ensin
           .withColumn("event_time", col("event_time").cast("timestamp"))
           .withColumnRenamed("value", "temperature")
           .join(dlt.read("machine_metadata"), "machine_id", "left")  # ✅ dlt.read() staattiselle datalle
           .select(
               "machine_id",
               "machine_name",
               "location",
               "event_time",
               "temperature"
           )
    )

# -----------------------------
# Vibration
# -----------------------------
@dlt.table(
    name="silver_vibration",
    comment="Vibration readings enriched with machine metadata",
    table_properties={"quality": "silver"}
)
@dlt.expect_or_drop("valid_event_time", "event_time IS NOT NULL")
@dlt.expect("valid_vibration_range", "value BETWEEN 0 AND 0.08")
def silver_vibration():
    return (
        dlt.read_stream("bronze_vibration")
           .withColumn("event_time", col("event_time").cast("timestamp"))
           .withColumnRenamed("value", "vibration")
           .join(dlt.read("machine_metadata"), "machine_id", "left")
           .select(
               "machine_id",
               "machine_name",
               "location",
               "event_time",
               "vibration"
           )
    )

# -----------------------------
# Power
# -----------------------------
@dlt.table(
    name="silver_power",
    comment="Power consumption readings enriched with machine metadata",
    table_properties={"quality": "silver"}
)
@dlt.expect_or_drop("valid_event_time", "event_time IS NOT NULL")
@dlt.expect("valid_power_range", "value BETWEEN 5 AND 50")
def silver_power():
    return (
        dlt.read_stream("bronze_power")
           .withColumn("event_time", col("event_time").cast("timestamp"))
           .withColumnRenamed("value", "power_kw")
           .join(dlt.read("machine_metadata"), "machine_id", "left")
           .select(
               "machine_id",
               "machine_name",
               "location",
               "event_time",
               "power_kw"
           )
    )

# -----------------------------
# Pressure
# -----------------------------
@dlt.table(
    name="silver_pressure",
    comment="Pressure readings enriched with machine metadata",
    table_properties={"quality": "silver"}
)
@dlt.expect_or_drop("valid_event_time", "event_time IS NOT NULL")
@dlt.expect("valid_pressure_range", "value BETWEEN 100 AND 300")
def silver_pressure():
    return (
        dlt.read_stream("bronze_pressure")
           .withColumn("event_time", col("event_time").cast("timestamp"))
           .withColumnRenamed("value", "pressure_bar")
           .join(dlt.read("machine_metadata"), "machine_id", "left")
           .select(
               "machine_id",
               "machine_name",
               "location",
               "event_time",
               "pressure_bar"
           )
    )

# -----------------------------
# Flow Rate
# -----------------------------
@dlt.table(
    name="silver_flow_rate",
    comment="Flow rate readings enriched with machine metadata",
    table_properties={"quality": "silver"}
)
@dlt.expect_or_drop("valid_event_time", "event_time IS NOT NULL")
@dlt.expect("valid_flow_range", "value BETWEEN 50 AND 200")
def silver_flow_rate():
    return (
        dlt.read_stream("bronze_flow_rate")
           .withColumn("event_time", col("event_time").cast("timestamp"))
           .withColumnRenamed("value", "flow_rate_l_min")
           .join(dlt.read("machine_metadata"), "machine_id", "left")
           .select(
               "machine_id",
               "machine_name",
               "location",
               "event_time",
               "flow_rate_l_min"
           )
    )

# -----------------------------
# RPM
# -----------------------------
@dlt.table(
    name="silver_rpm",
    comment="RPM readings enriched with machine metadata",
    table_properties={"quality": "silver"}
)
@dlt.expect_or_drop("valid_event_time", "event_time IS NOT NULL")
@dlt.expect("valid_rpm_range", "value BETWEEN 500 AND 5000")
def silver_rpm():
    return (
        dlt.read_stream("bronze_rpm")
           .withColumn("event_time", col("event_time").cast("timestamp"))
           .withColumnRenamed("value", "rpm")
           .join(dlt.read("machine_metadata"), "machine_id", "left")
           .select(
               "machine_id",
               "machine_name",
               "location",
               "event_time",
               "rpm"
           )
    )

# -----------------------------
# Speed
# -----------------------------
@dlt.table(
    name="silver_speed",
    comment="Speed readings enriched with machine metadata",
    table_properties={"quality": "silver"}
)
@dlt.expect_or_drop("valid_event_time", "event_time IS NOT NULL")
@dlt.expect("valid_speed_range", "value BETWEEN 0.5 AND 2.5")
def silver_speed():
    return (
        dlt.read_stream("bronze_speed")
           .withColumn("event_time", col("event_time").cast("timestamp"))
           .withColumnRenamed("value", "speed_m_s")
           .join(dlt.read("machine_metadata"), "machine_id", "left")
           .select(
               "machine_id",
               "machine_name",
               "location",
               "event_time",
               "speed_m_s"
           )
    )

# -----------------------------
# Current
# -----------------------------
@dlt.table(
    name="silver_current",
    comment="Current readings enriched with machine metadata",
    table_properties={"quality": "silver"}
)
@dlt.expect_or_drop("valid_event_time", "event_time IS NOT NULL")
@dlt.expect("valid_current_range", "value BETWEEN 5 AND 20")
def silver_current():
    return (
        dlt.read_stream("bronze_current")
           .withColumn("event_time", col("event_time").cast("timestamp"))
           .withColumnRenamed("value", "current_a")
           .join(dlt.read("machine_metadata"), "machine_id", "left")
           .select(
               "machine_id",
               "machine_name",
               "location",
               "event_time",
               "current_a"
           )
    )