In [0]:
from pyspark.sql.types import *
from pyspark.sql.functions import col, current_timestamp, lit

BRONZE_DB = "bronze"

spark.sql(f"CREATE DATABASE IF NOT EXISTS {BRONZE_DB}")

print("Bronze database ready.")

In [0]:
customer_schema = {
    "customer_id": StringType(),
    "first_name": StringType(),
    "last_name": StringType(),
    "date_of_birth": DateType(),
    "country": StringType(),
    "created_at": DateType()
}

account_schema = {
    "account_id": StringType(),
    "customer_id": StringType(),
    "account_type": StringType(),
    "currency": StringType(),
    "opened_at": DateType(),
    "status": StringType()
}

merchant_schema = {
    "merchant_id": StringType(),
    "merchant_name": StringType(),
    "category": StringType(),
    "country": StringType()
}

exchange_schema = {
    "date": DateType(),
    "currency": StringType(),
    "rate_to_usd": DoubleType()
}

transaction_schema = {
    "transaction_id": StringType(),
    "account_id": StringType(),
    "merchant_id": StringType(),
    "amount": DoubleType(),
    "currency": StringType(),
    "transaction_type": StringType(),
    "transaction_timestamp": TimestampType()
}

In [0]:
def enforce_schema(
    df,
    schema_dict
):
    from pyspark.sql.functions import col
    return df.select([
        col(col_name).cast(dtype)
        for col_name, dtype in schema_dict.items()
    ])

In [0]:

def promote_to_bronze(source_table, target_table, schema_dict, partition_col=None):
    
    print(f"Promoting default.{source_table} to bronze.{target_table}")

    raw_df = spark.table(f"default.{source_table}")

    structured_df = enforce_schema(raw_df, schema_dict)

    final_df = (
        structured_df
            .withColumn("ingestion_timestamp", current_timestamp())
            .withColumn("source_table", lit(source_table))
    )

    writer = (
        final_df.write
            .format("delta")
            .mode("overwrite")
    )

    if partition_col:
        writer = writer.partitionBy(partition_col)

    writer.saveAsTable(f"{BRONZE_DB}.{target_table}")

    print(f"Created bronze.{target_table}")

In [0]:
promote_to_bronze("customers", "customers", customer_schema)
promote_to_bronze("accounts", "accounts", account_schema)
promote_to_bronze("merchants", "merchants", merchant_schema)
promote_to_bronze("exchange_rates", "exchange_rates", exchange_schema)
promote_to_bronze(
    "transactions",
    "transactions",
    transaction_schema,
    partition_col="transaction_timestamp"
)

print("Bronze ingestion complete.")