In [None]:
from pyspark.ml.feature import VectorAssembler
from pyspark.ml.classification import RandomForestClassifier
from pyspark.ml.evaluation import BinaryClassificationEvaluator
from pyspark.ml import Pipeline

# Load Gold
df_gold = spark.read.format("delta").load("/delta/gold/machine_features")

# Select features and label
features = ["temperature", "pressure", "vibration", "rpm", 
            "temp_roll_avg", "temp_diff", "pressure_roll_avg", "vibration_roll_avg"]

df_ml = df_gold.select("is_failure", *features).na.drop()

# Assemble features
assembler = VectorAssembler(inputCols=features, outputCol="features")

# Classifier
rf = RandomForestClassifier(labelCol="is_failure", featuresCol="features")

pipeline = Pipeline(stages=[assembler, rf])

# Train/test split
train, test = df_ml.randomSplit([0.8, 0.2], seed=42)

# Train
model = pipeline.fit(train)

# Predict
predictions = model.transform(test)

# Evaluate
evaluator = BinaryClassificationEvaluator(labelCol="is_failure")
auc = evaluator.evaluate(predictions)
print(f"Test AUC: {auc}")
