In [0]:
%run /Workspace/Users/saijananig4@gmail.com/real-time-banking-etl/src/config/00_config_and_helpers

**Define **Schema****

In [0]:
dbutils.widgets.text("env", "dev")
env = dbutils.widgets.get("env")


cfg = get_env_config(env)
print("Running env:", cfg["ENV"])


In [0]:
from pyspark.sql import functions as F

RAW_SOURCE = f"{cfg['RAW_BUCKET']}/transactions/incoming/"

BRONZE_PATH = path(cfg, "bronze", "transactions")
BRONZE_TBL  = tbl(cfg, "bronze_transactions")

CHECKPOINT  = checkpoint(cfg, "bronze_transactions")
SCHEMA_LOC  = checkpoint(cfg, "bronze_transactions_schema")

schema = """
transaction_id string,
account_id string,
transaction_timestamp string,
transaction_type string,
amount double,
merchant_category string,
channel string,
city string,
ingest_time timestamp,
batch_id long
"""

print("RAW_SOURCE:", RAW_SOURCE)
print("BRONZE_PATH:", BRONZE_PATH)
print("BRONZE_TBL:", BRONZE_TBL)
print("CHECKPOINT:", CHECKPOINT)
print("SCHEMA_LOC:", SCHEMA_LOC)


**Auto Loader Stream -> Bronze Delta**

In [0]:
bronze_stream = (
    spark.readStream.format("cloudFiles")
    .option("cloudFiles.format", "json")
    .option("cloudFiles.schemaLocation", SCHEMA_LOC)
    .schema(schema)
    .load(RAW_SOURCE)
)

bronze_out = (
    bronze_stream
    .withColumn("transaction_ts", F.to_timestamp("transaction_timestamp"))
    .drop("transaction_timestamp")
    .withColumn("bronze_ingest_time", F.current_timestamp())
)

q = (
    bronze_out.writeStream
    .format("delta")
    .outputMode("append")
    .option("checkpointLocation", CHECKPOINT)
    .trigger(availableNow=True)   # ✅ finishes automatically
    .start(BRONZE_PATH)
)

q.awaitTermination()
print("✅ Bronze ingestion complete for env:", cfg["ENV"])


In [0]:
df_bronze = spark.read.format("delta").load(BRONZE_PATH)

(df_bronze.write
 .format("delta")
 .mode("overwrite")
 .option("overwriteSchema", "true")
 .saveAsTable(BRONZE_TBL))

print("✅ Bronze UC table updated:", BRONZE_TBL)
print("Bronze row count:", df_bronze.count())

In [0]:
%sql 

SELECT COUNT(*) FROM finance_data.dev.bronze_transactions;