In [0]:
import dlt
from pyspark.sql.functions import avg, max

# ============================================
# Gold Table: Aggregated Machine Metrics
# ============================================
@dlt.table(
    name="gold_machine_metrics",
    comment="Aggregated machine metrics with metadata (avg temp + max vibration)"
)
def gold_machine_metrics():
    
    # -----------------------------
    # Streaming read: temperature
    # -----------------------------
    temp_stream = (
        dlt.read_stream("silver_temperature")
        .withWatermark("event_time", "2 minutes")  # append-mode requires watermark
    )

    # Aggregoi lämpötilat
    temp_agg = (
        temp_stream
        .groupBy("machine_id", "machine_name")
        .agg(avg("temperature").alias("avg_temperature"))
    )

    # -----------------------------
    # Staattinen read: vibration (tehdään batchiksi)
    # -----------------------------
    # Tämä on kriittinen muutos: vibration tehdään batch-lukuna, ei stream-to-stream
    vib_df = dlt.read("silver_vibration").groupBy("machine_id").agg(max("vibration").alias("max_vibration"))

    # -----------------------------
    # Join temp-agg ja vibration batch
    # -----------------------------
    metrics_df = temp_agg.join(vib_df, on="machine_id", how="left")

    # -----------------------------
    # Staattinen metadata
    # -----------------------------
    metadata_df = dlt.read("machine_metadata").drop("machine_name")  # estä duplicate-sarake

    gold_df = metrics_df.join(metadata_df, on="machine_id", how="left")

    return gold_df
