# ✅ 1. Load Your Best Model (Random Forest)

In [0]:
from pyspark.ml import PipelineModel

rf_model = PipelineModel.load("dbfs:/FileStore/models/rf_fraud_model")

# Load test data
test_df = spark.read.parquet("dbfs:/FileStore/tables/creditcard_balanced.parquet")  # or reload split if saved
_, test_df = test_df.randomSplit([0.8, 0.2], seed=42)

# ✅ 2. Generate Predictions

In [0]:
predictions = rf_model.transform(test_df)
predictions.select("prediction", "probability", "Class").show(20)


+----------+--------------------+-----+
|prediction|         probability|Class|
+----------+--------------------+-----+
|       0.0|[0.97451749948561...|    0|
|       0.0|[0.97265965591382...|    0|
|       0.0|[0.97425569773591...|    0|
|       0.0|[0.97451749948561...|    0|
|       0.0|[0.97265965591382...|    0|
|       0.0|[0.97451749948561...|    0|
|       0.0|[0.96898303127787...|    0|
|       0.0|[0.98177760493561...|    0|
|       0.0|[0.97486593991005...|    0|
|       0.0|[0.95843885569027...|    0|
|       0.0|[0.55685669360349...|    0|
|       0.0|[0.77312075219004...|    0|
|       0.0|[0.97624282729789...|    0|
|       0.0|[0.97006268524934...|    0|
|       0.0|[0.97070835909016...|    0|
|       0.0|[0.97486593991005...|    0|
|       0.0|[0.97451749948561...|    0|
|       0.0|[0.97624282729789...|    0|
|       0.0|[0.97451749948561...|    0|
|       0.0|[0.97451749948561...|    0|
+----------+--------------------+-----+
only showing top 20 rows



# ✅ 3. Evaluation Metrics: Precision, Recall, F1

In [0]:
from pyspark.ml.evaluation import MulticlassClassificationEvaluator

evaluator = MulticlassClassificationEvaluator(labelCol="Class", predictionCol="prediction")

precision = evaluator.evaluate(predictions, {evaluator.metricName: "precisionByLabel"})
recall = evaluator.evaluate(predictions, {evaluator.metricName: "recallByLabel"})
f1 = evaluator.evaluate(predictions, {evaluator.metricName: "f1"})

print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")


Precision: 0.9691
Recall: 0.9973
F1 Score: 0.9725



Precision: Out of all transactions the model flagged as fraud, 96.91% were actually fraud. High precision means few false alarms.
Recall: Out of all actual fraud transactions, the model correctly detected 99.73% of them. High recall means very few missed frauds.
F1 Score: The harmonic mean of precision and recall. It balances the two, giving you a single number to understand overall accuracy regarding fraud classification.

# ✅ 4. Confusion Matrix

In [0]:
from pyspark.sql.functions import col

predictions.groupBy("Class", "prediction").count().orderBy("Class", "prediction").show()


+-----+----------+-----+
|Class|prediction|count|
+-----+----------+-----+
|    0|       0.0|  376|
|    0|       1.0|    1|
|    1|       0.0|   12|
|    1|       1.0|   93|
+-----+----------+-----+



# ✅ 5. ROC Curve

In [0]:
from pyspark.ml.evaluation import BinaryClassificationEvaluator

evaluator_roc = BinaryClassificationEvaluator(labelCol="Class", metricName="areaUnderROC")
roc_auc = evaluator_roc.evaluate(predictions)
print(f"ROC AUC: {roc_auc:.4f}")


ROC AUC: 0.9859
