In [0]:
# READING THE SILVER TRANSACTIONS DATA
transactions_silver_stream = (
    spark.readStream
         .table("finance_fraudworkspace.silver_managed.transactions_silver")
)

In [0]:
# RULES-BASED FRAUD FEATURES

from pyspark.sql.functions import col, when, hour, window, count, sum

features_df = (
    transactions_silver_stream
        .withColumn("is_high_amount", when(col("amount") > 2500, 1).otherwise(0))
        .withColumn("is_foreign_txn", when(col("country") != "US", 1).otherwise(0))
        .withColumn("txn_hour", hour(col("transaction_ts")))
        .withColumn("is_night_txn", when((col("txn_hour") >= 0) & (col("txn_hour") <= 5), 1).otherwise(0))
)

In [0]:
from pyspark.sql.functions import col, when

fraud_scored_stream = (
    features_df
    .withColumn(
        "fraud_score",
        col("is_high_amount") * 0.6 +
        col("is_foreign_txn") * 0.4
    )
    .withColumn(
        "is_fraud",
        when(col("fraud_score") > 0.7, 1).otherwise(0)
    )
)

In [0]:
# WRITING TO THE GOLD DELTA TABLE
checkpoint_path = "/Volumes/finance_fraudworkspace/gold_managed/gold_volume/fraud_detection_checkpoints"

query = (
    fraud_scored_stream.writeStream
        .format("delta")
        .outputMode("append")
        .option("checkpointLocation", checkpoint_path)
        .trigger(availableNow=True)
        .toTable("finance_fraudworkspace.gold_managed.fraud_detection_gold")
)

In [0]:
# DISPLAY REAL-TIME FRAUD TRANSACTIONS
spark.read.table("finance_fraudworkspace.gold_managed.fraud_detection_gold").display()