In [0]:
# ===========================================================
# 02_silver_processing.py
#
# Silver Layer - Enrich and clean Bronze PLC streams
# -----------------------------------------------------------
# Joins Bronze data with static machine_registry,
# performs data type standardization and derives useful fields.
# ===========================================================

import dlt
from pyspark.sql import Row
from pyspark.sql.functions import col, when, round as spark_round

# -----------------------------------------------------------
# STATIC TABLE: MACHINE REGISTRY
# -----------------------------------------------------------
@dlt.table(
    name="02_silver.machine_registry",
    comment="Static lookup table mapping plc_id to line and machine"
)
def machine_registry():
    data = [
        Row(plc_id=1,  line_id='L1', machine_type='Feeder',      machine_name='Feeder_L1'),
        Row(plc_id=2,  line_id='L1', machine_type='DrillCutter', machine_name='DrillCutter_L1'),
        Row(plc_id=3,  line_id='L1', machine_type='Polisher',    machine_name='Polisher_L1'),
        Row(plc_id=4,  line_id='L1', machine_type='Inspector',   machine_name='Inspector_L1'),
        Row(plc_id=5,  line_id='L2', machine_type='Feeder',      machine_name='Feeder_L2'),
        Row(plc_id=6,  line_id='L2', machine_type='DrillCutter', machine_name='DrillCutter_L2'),
        Row(plc_id=7,  line_id='L2', machine_type='Polisher',    machine_name='Polisher_L2'),
        Row(plc_id=8,  line_id='L2', machine_type='Inspector',   machine_name='Inspector_L2'),
        Row(plc_id=9,  line_id='L3', machine_type='Feeder',      machine_name='Feeder_L3'),
        Row(plc_id=10, line_id='L3', machine_type='DrillCutter', machine_name='DrillCutter_L3'),
        Row(plc_id=11, line_id='L3', machine_type='Polisher',    machine_name='Polisher_L3'),
        Row(plc_id=12, line_id='L3', machine_type='Inspector',   machine_name='Inspector_L3')
    ]
    return spark.createDataFrame(data)

# ===========================================================
# 1️⃣  FEEDER ENRICHED
# -----------------------------------------------------------
@dlt.table(
    name="02_silver.feeder_enriched",
    comment="Feeder PLC data joined with registry and cleaned."
)
def feeder_enriched():
    bronze = dlt.read_stream("01_bronze.feeder_raw")

    return (
        bronze
          .join(dlt.read("02_silver.machine_registry"), on="plc_id", how="left")
          .select(
              "event_time",
              "ingest_timestamp",
              "plc_id",
              "line_id",
              "machine_type",
              "machine_name",
              col("blank_count").alias("feed_count")
          )
    )

# ===========================================================
# 2️⃣  DRILL CUTTER ENRICHED
# -----------------------------------------------------------
@dlt.table(
    name="02_silver.drillcutter_enriched",
    comment="DrillCutter PLC data joined with registry and cleaned."
)
@dlt.expect("valid_temp", "temperature_c >= 0 AND temperature_c <= 700")
def drillcutter_enriched():
    bronze = dlt.read_stream("01_bronze.drillcutter_raw")

    return (
        bronze
          .join(dlt.read("02_silver.machine_registry"), on="plc_id", how="left")
          .select(
              "event_time",
              "ingest_timestamp",
              "plc_id",
              "line_id",
              "machine_type",
              "machine_name",
              "temperature_c",
              "vibration_mms"
          )
          # Replace impossible negative or extreme values
          .withColumn("temperature_c", when(col("temperature_c") < 0, None).otherwise(col("temperature_c")))
          .withColumn("vibration_mms", when(col("vibration_mms") < 0, None).otherwise(col("vibration_mms")))
    )

# ===========================================================
# 3️⃣  POLISHER ENRICHED
# -----------------------------------------------------------
@dlt.table(
    name="02_silver.polisher_enriched",
    comment="Polisher PLC data joined with registry and cleaned."
)
@dlt.expect("valid_temp", "temperature_c >= 0 AND temperature_c<= 80")
def polisher_enriched():
    bronze = dlt.read_stream("01_bronze.polisher_raw")

    return (
        bronze
          .join(dlt.read("02_silver.machine_registry"), on="plc_id", how="left")
          .select(
              "event_time",
              "ingest_timestamp",
              "plc_id",
              "line_id",
              "machine_type",
              "machine_name",
              "temperature_c",
              "vibration_mms"
          )
          .withColumn("temperature_c", when(col("temperature_c") < 0, None).otherwise(col("temperature_c")))
          .withColumn("vibration_mms", when(col("vibration_mms") < 0, None).otherwise(col("vibration_mms")))
    )

# ===========================================================
# 4️⃣  INSPECTOR ENRICHED
# -----------------------------------------------------------
@dlt.table(
    name="02_silver.inspector_enriched",
    comment="Inspector PLC data joined with registry and enriched with defect rate."
)
def inspector_enriched():
    bronze = dlt.read_stream("01_bronze.inspector_raw")

    return (
        bronze
          .join(dlt.read("02_silver.machine_registry"), on="plc_id", how="left")
          .select(
              "event_time",
              "ingest_timestamp",
              "plc_id",
              "line_id",
              "machine_type",
              "machine_name",
              col("produced_count"),
              col("defective_count"),
              # derived metric
              (col("defective_count") / when(col("produced_count") > 0, col("produced_count")).otherwise(None)).alias("defect_rate")
          )
          .withColumn("defect_rate", spark_round(col("defect_rate"), 4))
    )
