In [0]:
from pyspark.sql.functions import current_timestamp, current_date, lit

BRONZE_DB = "bronze"

spark.sql(f"CREATE DATABASE IF NOT EXISTS {BRONZE_DB}")

print("Bronze database ready.")

In [0]:
def promote_to_bronze(
    source_table: str,
    target_table: str,
    partition_col: str = None
):

    print(f"Promoting default.{source_table} to bronze.{target_table}")

    raw_df = spark.table(f"default.{source_table}")

    bronze_df = (
        raw_df
        .withColumn("ingestion_timestamp", current_timestamp())
        .withColumn("ingestion_date", current_date())
        .withColumn("source_table", lit(source_table))
    )

    writer = (
        bronze_df.write
        .format("delta")
        .mode("append")
        .option("mergeSchema", "true")
    )

    if partition_col:
        writer = writer.partitionBy(partition_col)

    writer.saveAsTable(f"{BRONZE_DB}.{target_table}")

    print(f"Appended to bronze.{target_table}")

In [0]:
promote_to_bronze("customers", "customers")
promote_to_bronze("accounts", "accounts")
promote_to_bronze("merchants", "merchants")
promote_to_bronze("exchange_rates", "exchange_rates")

promote_to_bronze(
    "transactions",
    "transactions",
    partition_col="ingestion_date"
)

print("Bronze ingestion complete.")