In [0]:
# CREATE RAW PATH
raw_path = "/Volumes/finance_fraudworkspace/bronze/raw_volume"

In [0]:
display(dbutils.fs.ls("/Volumes/finance_fraudworkspace/bronze/raw_volume"))

path,name,size,modificationTime
dbfs:/Volumes/finance_fraudworkspace/bronze/raw_volume/accounts/,accounts/,0,1769273137000
dbfs:/Volumes/finance_fraudworkspace/bronze/raw_volume/customers/,customers/,0,1769273117000
dbfs:/Volumes/finance_fraudworkspace/bronze/raw_volume/merchants/,merchants/,0,1769273125000
dbfs:/Volumes/finance_fraudworkspace/bronze/raw_volume/raw_volume/,raw_volume/,0,1769602018000
dbfs:/Volumes/finance_fraudworkspace/bronze/raw_volume/transactions/,transactions/,0,1769273145000


In [0]:
# CREATE CUSTOMERS DATASET
customers_df = spark.read.format("csv") \
    .option("header", True) \
    .option("inferSchema", True) \
    .load(f"{raw_path}/customers")

customers_df.write.format("delta") \
    .mode("overwrite") \
    .saveAsTable("finance_fraudworkspace.bronze_managed.customers_bronze")

display(customers_df)


Customer_id,Full_name,DOB,Email,Risk_level,Created_timestamp
C001,Alice Brown,1988-05-12,alice_brown@outlook.com,LOW,10-01-2023 09:15
C002,Bob Smith,1975-09-22,,MEDIUM,14-02-2023 11:20
C003,Charlie Lee,,charlielee@gmail.com,HIGH,01-03-2023 14:05
C004,David Wilson,1985-03-18,davidwilson1985@gmail.com,,12-04-2023 16:45
C005,,2000-07-25,evadunne@gmail.com,HIGH,20-05-2023 10:30
C006,Fatima Khan,1998-12-01,fatimakhxn@gmail.com,LOW,18-06-2023 08:10
C006,Fatima Khan,1998-12-01,fatimakhxn@gmail.com,LOW,18-06-2023 08:10


In [0]:
# CREATE ACCOUNTS DATASET
accounts_df = spark.read.format("csv") \
    .option("header", True) \
    .option("inferSchema", True) \
    .load(f"{raw_path}/accounts")

accounts_df.write.format("delta") \
    .mode("overwrite") \
    .saveAsTable("finance_fraudworkspace.bronze_managed.accounts_bronze")

display(accounts_df)

Account_id,Customer_id,Account_status,Credit_limit,Updated_ts
A001,C001,ACTIVE,10000.0,2024-01-01
A002,C002,ACTIVE,5000.0,2024-01-01
A003,C003,BLOCKED,3000.0,2024-01-01
A004,C004,ACTIVE,,2024-01-01
A005,C005,ACTIVE,7000.0,2024-01-01
A002,C002,BLOCKED,5000.0,2024-02-15
A006,C211,ACTIVE,4000.0,2024-01-01


In [0]:
# CREATE ACCOUNTS DATASET
merchants_df = spark.read.format("csv") \
    .option("header", True) \
    .option("inferSchema", True) \
    .load(f"{raw_path}/merchants")

merchants_df.write.format("delta") \
    .mode("overwrite") \
    .saveAsTable("finance_fraudworkspace.bronze_managed.merchants_bronze")

display(merchants_df)

Merchant_id,Merchant_name,Merchant_category,Merchant_country_code,Risk_level
M001,Amazon,Retail,USA,LOW
M002,OnlineCasino,Gambling,MT,HIGH
M003,Airline,Travel,UK,MEDIUM
M004,ElectronicShop,Retail,DE,LOW
M005,CryptoExhange,Finance,,HIGH
M006,UnknownStore,,USA,


In [0]:
# CREATE STREAMING TRANSACTIONS  

raw_path = "/Volumes/finance_fraudworkspace/bronze/raw_volume/transactions"
checkpoint_path = "/Volumes/finance_fraudworkspace/bronze/raw_volume/checkpoints/transactions_bronze"
schema_path = "/Volumes/finance_fraudworkspace/bronze/raw_volume/schemas/transactions_bronze"

transactions_stream = (
    spark.readStream
        .format("cloudFiles")  # Auto Loader
        .option("cloudFiles.format", "json")
        .option("cloudFiles.schemaLocation", schema_path)
        .load(raw_path)
)

query = (
    transactions_stream.writeStream
        .format("delta")
        .outputMode("append")
        .option("checkpointLocation", checkpoint_path)
        .trigger(availableNow=True)
        .toTable("finance_fraudworkspace.bronze_managed.transactions_bronze")
)

In [0]:
# DISPLAY STREAMING TRANSACTIONS
display(spark.table("finance_fraudworkspace.bronze_managed.transactions_bronze"))

account_id,amount,channel,country,currency,merchant_id,transaction_id,transaction_ts,_rescued_data
A777,850.0,ONLINE,US,USD,M009,T9101,2026-01-27T18:00:00,
A777,1250.0,ONLINE,US,USD,M009,T9102,2026-01-27T18:01:30,
A888,95.0,POS,US,USD,M010,T9103,2026-01-27T18:02:10,
A777,2100.0,ONLINE,US,USD,M011,T9104,2026-01-27T18:03:00,
A999,20.0,POS,US,USD,M012,T9105,2026-01-27T18:04:20,
A777,3300.0,ONLINE,US,USD,M009,T9106,2026-01-27T18:05:10,
A888,4700.0,ONLINE,US,USD,M010,T9107,2026-01-27T18:06:40,
A777,5200.0,ONLINE,US,USD,M009,T9108,2026-01-27T18:07:30,
A999,45.0,POS,US,USD,M012,T9109,2026-01-27T18:08:15,
A777,7600.0,ONLINE,US,USD,M011,T9110,2026-01-27T18:09:50,
