In [None]:
from pyspark.sql.functions import col, to_timestamp, when, coalesce, lit

# Load Bronze from Unity Catalog (update catalog/schema as needed)
df_bronze = spark.read.format("delta").table("main.default.bronze_machine_failure")

print(f"Bronze records: {df_bronze.count()}")

# Clean and transform
df_silver = df_bronze \
    .withColumn("timestamp", to_timestamp(col("timestamp"))) \
    .withColumn("is_failure", 
                when(col("error_code").isNotNull() & (col("error_code") != ""), 1)
                .otherwise(0)) \
    .withColumn("temperature", col("temperature").cast("double")) \
    .filter(col("machine_id").isNotNull() & col("timestamp").isNotNull())

# Show data quality metrics
print(f"Silver records: {df_silver.count()}")
print(f"Failures detected: {df_silver.filter(col('is_failure') == 1).count()}")
display(df_silver.limit(10))

# Save to Unity Catalog
df_silver.write \
    .format("delta") \
    .mode("overwrite") \
    .saveAsTable("main.default.silver_machine_failure_clean")