In [0]:
%run ../functions/utils_silver

In [0]:
table_name="milbom_bronze.bakehouse_sales_franchises"
target_table = "milbom_silver.bakehouse_store"
business_keys=["franchiseID"]
order_column="_ingestion_timestamp"
partition_column=" "

In [0]:
%sql
CREATE SCHEMA IF NOT EXISTS milbom_silver;


In [0]:
spark.sql(f"""
CREATE TABLE IF NOT EXISTS {target_table} (
    franchiseID BIGINT PRIMARY KEY COMMENT 'Unique identifier for each store',
    name STRING COMMENT 'Name of the store',
    city STRING COMMENT 'City where the store is located',
    district STRING COMMENT 'District where the store is located',
    zipcode STRING COMMENT 'ZIP code of the store location',
    country STRING COMMENT 'Country of the store',
    size STRING COMMENT 'Size of the store (e.g., small, medium, large)',
    longitude DOUBLE COMMENT 'Longitude coordinate of the store location',
    latitude DOUBLE COMMENT 'Latitude coordinate of the store location',
    supplierID BIGINT COMMENT 'Unique identifier for the store supplier',
    _ingestion_timestamp TIMESTAMP COMMENT 'Timestamp of data ingestion',
    _ingestion_date DATE COMMENT 'Date of data ingestion'
)
USING DELTA
COMMENT 'Silver layer table containing master data for store';
""")



In [0]:
df_bronze = read_latest_bronze_table(
    table_name=table_name,
    business_keys=business_keys,
    order_column=order_column
)
df_silver = (
    df_bronze
    .select(
        col("franchiseID").cast("bigint"),
        col("name"),
        col("city"),
        col("district"),
        col("zipcode"),
        col("country"),
        col("size"),
        col("supplierID").cast("bigint"),
        col("latitude").cast("double"),
        col("longitude").cast("double"),
        col("_ingestion_timestamp"),
        col("_ingestion_date")
    )
    .dropDuplicates(["franchiseID"]))

merge_condition="target.franchiseID = source.franchiseID"

write_silver_table(
    df=df_silver,
    target_table=target_table,
    merge_condition=merge_condition,
    optimize=True
)
