In [0]:
import dlt
from pyspark.sql.functions import col

In [0]:
import dlt

@dlt.table(
    name="machine_metadata",
    comment="Static metadata for production line machines and their monitored characteristics",
    table_properties={
        "quality": "silver"
    }
)
def machine_metadata():
    machines = [
        {
            "machine_id": 1,
            "machine_name": "Main Drive Motor",
            "machine_type": "Electric motor",
            "location": "Production Line A",
            "function": "Primary power source for the production line",
            "monitored_sensors": "temperature, vibration, power",
            "material_processed": "Steel / Aluminum alloys",
            "criticality": "High"
        },
        {
            "machine_id": 2,
            "machine_name": "Conveyor Belt Motor",
            "machine_type": "Electric motor",
            "location": "Material transport section",
            "function": "Moves raw and semi-finished parts between stations",
            "monitored_sensors": "temperature, vibration, speed",
            "material_processed": "Steel / Aluminum alloys",
            "criticality": "Medium"
        },
        {
            "machine_id": 3,
            "machine_name": "Hydraulic Press",
            "machine_type": "Hydraulic press system",
            "location": "Forming station",
            "function": "Applies high pressure for shaping metal components",
            "monitored_sensors": "temperature, pressure, cycle_count",
            "material_processed": "Steel / Aluminum alloys",
            "criticality": "High"
        },
        {
            "machine_id": 4,
            "machine_name": "CNC Spindle",
            "machine_type": "CNC machining spindle",
            "location": "Machining center",
            "function": "High-precision machining of metal parts",
            "monitored_sensors": "temperature, vibration, rpm",
            "material_processed": "Steel / Aluminum alloys",
            "criticality": "High"
        },
        {
            "machine_id": 5,
            "machine_name": "Cooling System Pump",
            "machine_type": "Industrial cooling pump",
            "location": "Cooling subsystem",
            "function": "Circulates coolant to maintain safe operating temperatures",
            "monitored_sensors": "temperature, flow_rate",
            "material_processed": "Coolant fluids",
            "criticality": "Medium"
        },
        {
            "machine_id": 6,
            "machine_name": "Packaging Unit Motor",
            "machine_type": "Electric motor",
            "location": "Packaging station",
            "function": "Drives packaging and final product handling",
            "monitored_sensors": "temperature, vibration, current",
            "material_processed": "Finished metal components",
            "criticality": "Medium"
        }
    ]

    return spark.createDataFrame(machines)


In [0]:
# Temperature

@dlt.table(
    name="temperature",
    comment="Temperature readings enriched with machine metadata",
    table_properties={"quality": "silver"}
)
@dlt.expect_or_drop("valid_event_time", "event_time IS NOT NULL")
@dlt.expect("valid_temperature_range", "temperature BETWEEN 20 AND 90")
def silver_temperature():
    return (
        dlt.read_stream("01_bronze.temperature")
           .withColumn("event_time", col("event_time").cast("timestamp"))
           .withColumnRenamed("device_id", "machine_id")
           .join(dlt.read("02_silver.machine_metadata"), on="machine_id", how="left")
           .select(
               col("machine_id"),
               col("machine_name"),
               col("location"),
               col("event_time"),
               col("temperature")
           )
    )

In [0]:
# Vibration

@dlt.table(
    name="vibration",
    comment="Vibration readings enriched with machine metadata",
    table_properties={"quality": "silver"}
)
@dlt.expect_or_drop("valid_event_time", "event_time IS NOT NULL")
@dlt.expect("valid_vibration_range", "vibration BETWEEN 0 AND 0.1")
def silver_vibration():
    return (
        dlt.read_stream("01_bronze.vibration")
           .withColumn("event_time", col("event_time").cast("timestamp"))
           .withColumnRenamed("device_id", "machine_id")
           .join(dlt.read("02_silver.machine_metadata"), on="machine_id", how="left")
           .select(
               col("machine_id"),
               col("machine_name"),
               col("location"),
               col("event_time"),
               col("vibration")
           )
    )


In [0]:
# Power

@dlt.table(
    name="power",
    comment="Power consumption readings enriched with machine metadata",
    table_properties={"quality": "silver"}
)
@dlt.expect_or_drop("valid_event_time", "event_time IS NOT NULL")
@dlt.expect("valid_power_range", "power BETWEEN 0 AND 100")
def silver_power():
    return (
        dlt.read_stream("01_bronze.power")
           .withColumn("event_time", col("event_time").cast("timestamp"))
           .withColumnRenamed("device_id", "machine_id")
           .join(dlt.read("02_silver.machine_metadata"), on="machine_id", how="left")
           .select(
               col("machine_id"),
               col("machine_name"),
               col("location"),
               col("event_time"),
               col("power")
           )
    )


In [0]:
# Pressure

@dlt.table(
    name="pressure",
    comment="Pressure readings enriched with machine metadata",
    table_properties={"quality": "silver"}
)
@dlt.expect_or_drop("valid_event_time", "event_time IS NOT NULL")
@dlt.expect("valid_pressure_range", "pressure BETWEEN 50 AND 400")
def silver_pressure():
    return (
        dlt.read_stream("01_bronze.pressure")
           .withColumn("event_time", col("event_time").cast("timestamp"))
           .withColumnRenamed("device_id", "machine_id")
           .join(dlt.read("02_silver.machine_metadata"), on="machine_id", how="left")
           .select(
               col("machine_id"),
               col("machine_name"),
               col("location"),
               col("event_time"),
               col("pressure")
           )
    )

In [0]:
# Flow rate

@dlt.table(
    name="flow_rate",
    comment="Flow rate readings enriched with machine metadata",
    table_properties={"quality": "silver"}
)
@dlt.expect_or_drop("valid_event_time", "event_time IS NOT NULL")
@dlt.expect("valid_flow_rate_range", "flow_rate BETWEEN 0 AND 300")
def silver_flow_rate():
    return (
        dlt.read_stream("01_bronze.flow_rate")
           .withColumn("event_time", col("event_time").cast("timestamp"))
           .withColumnRenamed("device_id", "machine_id")
           .join(dlt.read("02_silver.machine_metadata"), on="machine_id", how="left")
           .select(
               col("machine_id"),
               col("machine_name"),
               col("location"),
               col("event_time"),
               col("flow_rate")
           )
    )

In [0]:
# RPM

@dlt.table(
    name="rpm",
    comment="RPM readings enriched with machine metadata",
    table_properties={"quality": "silver"}
)
@dlt.expect_or_drop("valid_event_time", "event_time IS NOT NULL")
@dlt.expect("valid_rpm_range", "rpm BETWEEN 0 AND 6000")
def silver_rpm():
    return (
        dlt.read_stream("01_bronze.rpm")
           .withColumn("event_time", col("event_time").cast("timestamp"))
           .withColumnRenamed("device_id", "machine_id")
           .join(dlt.read("02_silver.machine_metadata"), on="machine_id", how="left")
           .select(
               col("machine_id"),
               col("machine_name"),
               col("location"),
               col("event_time"),
               col("rpm")
           )
    )

In [0]:
# Speed

@dlt.table(
    name="speed",
    comment="Speed readings enriched with machine metadata",
    table_properties={"quality": "silver"}
)
@dlt.expect_or_drop("valid_event_time", "event_time IS NOT NULL")
@dlt.expect("valid_speed_range", "speed BETWEEN 0 AND 5")
def silver_speed():
    return (
        dlt.read_stream("01_bronze.speed")
           .withColumn("event_time", col("event_time").cast("timestamp"))
           .withColumnRenamed("device_id", "machine_id")
           .join(dlt.read("02_silver.machine_metadata"), on="machine_id", how="left")
           .select(
               col("machine_id"),
               col("machine_name"),
               col("location"),
               col("event_time"),
               col("speed")
           )
    )

In [0]:
# Current

@dlt.table(
    name="current",
    comment="Motor current readings enriched with machine metadata",
    table_properties={"quality": "silver"}
)
@dlt.expect_or_drop("valid_event_time", "event_time IS NOT NULL")
@dlt.expect("valid_current_range", "current BETWEEN 0 AND 30")
def silver_current():
    return (
        dlt.read_stream("01_bronze.current")
           .withColumn("event_time", col("event_time").cast("timestamp"))
           .withColumnRenamed("device_id", "machine_id")
           .join(dlt.read("02_silver.machine_metadata"), on="machine_id", how="left")
           .select(
               col("machine_id"),
               col("machine_name"),
               col("location"),
               col("event_time"),
               col("current")
           )
    )