In [0]:
%run ../functions/utils_silver

In [0]:
table_name="milbom_bronze.bakehouse_sales_customers"
target_table = "milbom_silver.bakehouse_customers"
business_keys=["customerID"]
order_column="_ingestion_timestamp"


In [0]:
%sql
-- CREATE SCHEMA IF NOT EXISTS milbom_silver;


#(LGPD-Compliant)
##Sensitive Columns (REMOVED in Silver)
- first_name
- last_name
- email_address
- phone_number
- address

In [0]:
spark.sql(f"""
CREATE TABLE IF NOT EXISTS {target_table} (
    customerID BIGINT PRIMARY KEY COMMENT 'Unique identifier for each customer',
    city STRING COMMENT 'Customer city of residence',
    state STRING COMMENT 'Customer state of residence',
    country STRING COMMENT 'Customer country of residence',
    continent STRING COMMENT 'Customer continent of residence',
    postal_zip_code BIGINT COMMENT 'Postal or ZIP code of the customer address',
    gender STRING COMMENT 'Customer gender',
    _ingestion_timestamp TIMESTAMP COMMENT 'Timestamp of data ingestion',
    _ingestion_date DATE COMMENT 'Date of data ingestion'
)
USING DELTA
COMMENT 'Silver layer table containing master data for sales customers';
""")



In [0]:
df_bronze = read_latest_bronze_table(
    table_name=table_name,
    business_keys=business_keys,
    order_column=order_column
)
df_silver = (
    df_bronze
    .select(
        col("customerID").cast("bigint"),
        col("city"),
        col("state"),
        col("country"),
        col("continent"),
        col("postal_zip_code").cast("string"),
        col("gender"),
        col("_ingestion_timestamp"),
        col("_ingestion_date")
    )
    .dropDuplicates(["customerID"])
)

merge_condition="target.customerID = source.customerID"

write_silver_table(
    df=df_silver,
    target_table=target_table,
    merge_condition=merge_condition,
    optimize=True
)
