# 01_bronze_to_silver - Bronze to Silver curation (Microsoft Fabric Lakehouse)

**Purpose**  
Curate Dataverse/Commerce data replicated by **Link to Microsoft Fabric** (Bronze, DV-managed Delta) into clean, standardised **Silver** Delta tables in the same Lakehouse.  
Silver is designed to be stable, type-consistent, CDC-aware, and ready for **Gold** denormalisation.

**Key characteristics**  
- Minimal, modular PySpark.  
- Idempotent writes (overwrite snapshot for simplicity; easy to switch to merge/upsert).  
- UTC timestamps, schema normalisation, light dedupe.  
- Works even if some Bronze tables are not present (skips gracefully).

> **Note:** The exact source column names can vary between environments. This notebook follows common Commerce/F&O patterns and your POC logic. Adjust mappings in one place if needed.


In [15]:
# ==============================
# Configuration & parameters
# ==============================

# OneLake Connection
TARGET_DB = "ci_lakehouse"
SOURCE_DB = "ci_lakehouse"
spark.sql(f"USE `{TARGET_DB}`") # type: ignore

# Output root for Silver Delta tables inside the Lakehouse. Fabric treats "Tables" specially; here we keep Silver
# under a separate root and register each table with an explicit LOCATION.
SILVER_ROOT = "Files/Tables_silver"   # folder under the Lakehouse
SILVER_SUFFIX = "_silver"       # table name suffix in the Lakehouse metastore

# Toggle behaviour: use snapshot overwrite (simple) vs future-proof merge/upsert.
USE_OVERWRITE_SNAPSHOT = True   # set False when switching to merge/upsert pattern  # TODO: replace overwrite with merge/upsert when schema is stable

# Tables we expect from Bronze (as created by Link to Fabric). These reflect your POC.
BRONZE_TABLES = {
    "logisticslocation": {},
    "logisticspostaladdress": {},
    "logisticselectronicaddress": {},
    "dirpartytable": {},
    "dirpersonname": {},
    "custtable": {},
    "salestable": {},
    "salesline": {},
    "retailchanneltable": {},
    "retailloyaltycard": {},
    "retailloyaltycardtier": {},
    "retailloyaltycardrewardpointtrans": {},
    "retailaffiliation": {},
    # Optionally available in some environments; left here for future Silver expansions:
    # "inventtable": {},
    # "retailstoretable": {},
}

# Column naming conventions: Silver uses snake_case for readability/consistency.
# Soft-typing helpers: set standard precisions for numeric fields.
DECIMAL_PRECISION = 18
DECIMAL_SCALE = 4


StatementMeta(, 3915ba56-80c0-46fd-9659-7433a84a5a5b, 4, Finished, Available, Finished)

In [16]:
# ==============================
# Utilities
# ==============================
from pyspark.sql import functions as F, Window as W # type: ignore
from pyspark.sql import types as T # type: ignore

def log(msg: str) -> None:
    """Lightweight logger for notebook steps."""
    print(f"[silver] {msg}")

def table_exists(name: str) -> bool:
    """Return True if a table is available in the current Lakehouse database."""
    try:
        return spark.catalog.tableExists(name) # type: ignore
    except Exception:
        return False

def safe_read_table(name: str):
    """Read a Bronze table or return None if it does not exist."""
    if not table_exists(name):
        log(f"SKIP: table not found -> {name}")
        return None
    return spark.table(name) # type: ignore

def ensure_dir(path: str) -> None:
    """Ensure the target folder exists (best-effort; works in Fabric)."""
    try:
        # In Fabric, writing creates folders automatically; this is defensive.
        spark._jsparkSession.sessionState().newHadoopConf() # type: ignore
    except Exception:
        pass

def to_utc_ts(col):
    """Cast to timestamp (UTC), assuming source is already UTC or castable."""
    return F.col(col).cast("timestamp")

def cast_decimal(col, precision=DECIMAL_PRECISION, scale=DECIMAL_SCALE):
    return F.col(col).cast(T.DecimalType(precision, scale))

def dedupe_latest(df, pk_cols, order_cols):
    """Keep one row per PK (latest by order_cols). order_cols may be a list of columns."""
    if not isinstance(order_cols, (list, tuple)):
        order_cols = [order_cols]
    window = W.partitionBy(*[F.col(c) for c in pk_cols]).orderBy(*[F.col(c).desc() for c in order_cols])
    return (df
            .withColumn("_rn", F.row_number().over(window))
            .filter(F.col("_rn") == 1)
            .drop("_rn")
           )



def exclude_soft_deleted(df):
    """Drop rows flagged as soft deleted (`IsDelete` = 1 / true)."""
    if "IsDelete" not in df.columns:
        return df
    is_delete = F.col("IsDelete")
    normalized = (
        F.when(is_delete.isNull(), F.lit(False))
         .otherwise(is_delete.cast("boolean"))
    )
    return df.filter(~normalized)


def register_delta_table(table_name: str, path: str) -> None:
    spark.sql(f"""
        CREATE TABLE IF NOT EXISTS `{TARGET_DB}`.`{table_name}`
        USING DELTA
        LOCATION '{path}'
    """) # type: ignore


def write_delta(df, table_name: str, partition_cols=None) -> None:
    """Write DataFrame to Silver Delta at fixed LOCATION and register/refresh the table."""
    path = f"{SILVER_ROOT}/{table_name}"
    ensure_dir(path)
    writer = (df.write
              .format("delta")
              .option("mergeSchema", "true"))
    if USE_OVERWRITE_SNAPSHOT:
        writer = writer.mode("overwrite")
    else:
        # In a future iteration we can switch to MERGE logic per PK; for now we keep snapshot overwrite.
        writer = writer.mode("overwrite")
    if partition_cols:
        writer = writer.partitionBy(*partition_cols)
    writer.save(path)
    register_delta_table(table_name, path)
    spark.sql(f"REFRESH TABLE `{table_name}`") # type: ignore
    log(f"Wrote Silver table: {table_name} -> {path}")

def assert_pk_unique(df, pk_cols, table_label: str):
    """Sanity check: primary key uniqueness."""
    dup = (df.groupBy(*[F.col(c) for c in pk_cols])
             .count()
             .filter(F.col("count") > 1))
    n_dup = dup.count()
    if n_dup > 0:
        log(f"WARN: {table_label} has {n_dup} duplicate PK(s). Consider investigating before proceeding.")
    else:
        log(f"OK: {table_label} PK uniqueness holds.")

# Mapping from instanceRelationType to party type per your POC
PARTY_TYPE_MAP = {
    13271: "Person",
    2077:  "Organization",
    6926:  "Team",
    8363:  "OperatingUnit",
    9027:  "LegalEntity",
}


StatementMeta(, 1da2d576-075a-4cba-b017-6df6df2006e7, 7, Finished, Available, Finished)

StatementMeta(, 3915ba56-80c0-46fd-9659-7433a84a5a5b, 5, Finished, Available, Finished)

## Logistics: Locations & Postal Addresses - `postal_address_silver`

- Join `logisticslocation` with `logisticspostaladdress` on **location recid**.  
- Persist all address versions with `valid_from`/`valid_to` and a computed `is_current` flag.

In [17]:
loc_df = safe_read_table("logisticslocation")
pad_df = safe_read_table("logisticspostaladdress")

if loc_df is not None and pad_df is not None:
    locations = (loc_df
        .select(
            F.col("locationid").alias("location_id"),
            F.col("description").alias("location_description"),
            F.col("ispostaladdress").alias("is_postal_address"),
            F.col("recid").alias("location_recid")
        )
    )

    postal = (pad_df
        .select(
            F.col("address").alias("address_line"),
            F.col("city").alias("city"),
            F.col("countryregionid").alias("country_region_id"),
            F.col("county").alias("county"),
            F.col("districtname").alias("district_name"),
            F.col("isprivate").alias("is_private"),
            F.col("latitude").alias("latitude"),
            F.col("longitude").alias("longitude"),
            F.col("postbox").alias("post_box"),
            F.col("state").alias("state"),
            F.col("street").alias("street"),
            F.col("streetnumber").alias("street_number"),
            F.col("timezone").alias("timezone"),
            to_utc_ts("validfrom").alias("valid_from_utc"),
            to_utc_ts("validto").alias("valid_to_utc"),
            F.col("zipcode").alias("postal_code"),
            F.col("privateforparty").alias("private_for_party"),
            F.col("location").alias("location_recid")
        )
    )

    postal_entity = (locations.alias("L")
        .join(postal.alias("P"), on="location_recid", how="inner")
        .withColumn("is_current", (F.col("valid_from_utc") <= F.current_timestamp()) & (F.col("valid_to_utc") >= F.current_timestamp()))
    )

    write_delta(postal_entity, f"postal_address{SILVER_SUFFIX}")

else:
    log("SKIP: postal_address_silver - required Bronze tables missing.")


StatementMeta(, 1da2d576-075a-4cba-b017-6df6df2006e7, 8, Finished, Available, Finished)

[silver] Wrote Silver table: postal_address_silver -> Files/Tables_silver/postal_address_silver


StatementMeta(, 3915ba56-80c0-46fd-9659-7433a84a5a5b, 6, Finished, Available, Finished)

[silver] Wrote Silver table: postal_address_silver -> Files/Tables_silver/postal_address_silver


## Party: Current Person Name - helper view `dirpersonname_current_silver`

- Keep **current** name per `person` based on validity window and the **latest** `validfrom`.

In [18]:
dpn_df = safe_read_table("dirpersonname")

if dpn_df is not None:
    dpn_cur = (dpn_df
        .select(
            F.col("firstname").alias("person_first_name"),
            F.col("middlename").alias("person_middle_name"),
            F.col("lastnameprefix").alias("person_last_name_prefix"),
            F.col("lastname").alias("person_last_name"),
            to_utc_ts("validto").alias("valid_to_utc"),
            to_utc_ts("validfrom").alias("valid_from_utc"),
            F.col("recid").alias("dirpersonname_recid"),
            F.col("person").alias("person_recid"),
        )
        .filter((F.col("valid_from_utc") <= F.current_timestamp()) & (F.col("valid_to_utc") >= F.current_timestamp()))
    )
    dpn_cur = dedupe_latest(dpn_cur, pk_cols=["person_recid"], order_cols=["valid_from_utc"])

    write_delta(dpn_cur, f"dirpersonname_current{SILVER_SUFFIX}")
else:
    log("SKIP: dirpersonname_current_silver - source table missing.")


StatementMeta(, 3915ba56-80c0-46fd-9659-7433a84a5a5b, 7, Finished, Available, Finished)

[silver] Wrote Silver table: dirpersonname_current_silver -> Files/Tables_silver/dirpersonname_current_silver


## Party: DirParty base - `dirparty_silver`

- Base from `dirpartytable`.  
- Left-join **primary** email/phone by reference keys stored on `dirpartytable`.  
- Enrich with **current** person name if `party` is a person.


In [19]:
dpt_df = safe_read_table("dirpartytable")
lea_df = safe_read_table("logisticselectronicaddress")  # email + phone in the same table
dpn_current = safe_read_table(f"dirpersonname_current{SILVER_SUFFIX}")  # produced above

if dpt_df is not None:
    # Prepare email & phone lookups from the same electronic address table.
    email_lkp = None
    phone_lkp = None
    if lea_df is not None:
        email_lkp = (lea_df
            .select(
                F.col("recid").alias("email_recid"),
                F.col("locator").alias("primary_email"),
                F.col("description").alias("primary_email_description"),
                F.col("isinstantmessage").alias("primary_email_is_im"),
                F.col("electronicaddressroles").alias("primary_email_purpose"),
                F.col("isprimary").alias("email_is_primary")
            )
        )
        phone_lkp = (lea_df
            .select(
                F.col("recid").alias("phone_recid"),
                F.col("locator").alias("primary_phone"),
                F.col("description").alias("primary_phone_description"),
                F.col("locatorextension").alias("primary_phone_extension"),
                F.col("ismobilephone").alias("primary_phone_is_mobile"),
                F.col("electronicaddressroles").alias("primary_phone_purpose")
            )
        )

    party = (dpt_df
        .select(
            F.col("partynumber").alias("party_number"),
            F.col("name").alias("name"),
            F.col("namealias").alias("name_alias"),
            F.col("knownas").alias("known_as"),
            F.col("addressbooknames").alias("address_books"),
            F.col("languageid").alias("language_id"),
            F.col("instancerelationtype").alias("instance_relation_type"),
            F.col("dataareaid").alias("data_area_id"),
            F.col("primaryaddresslocation").alias("primary_address_location_recid"),
            F.col("recid").alias("party_recid"),
            F.col("primarycontactphone").alias("primary_contact_phone_recid"),
            F.col("primarycontactemail").alias("primary_contact_email_recid"),
        )
        .withColumn(
            "party_type",
            F.expr(
                """
                CASE
                    WHEN instance_relation_type = 13271 THEN 'Person'
                    WHEN instance_relation_type = 2077  THEN 'Organization'
                    WHEN instance_relation_type = 6926  THEN 'Team'
                    WHEN instance_relation_type = 8363  THEN 'OperatingUnit'
                    WHEN instance_relation_type = 9027  THEN 'LegalEntity'
                    ELSE ''
                END
                """
            )
        )
    )

    if email_lkp is not None:
        party = party.join(email_lkp, party["primary_contact_email_recid"] == email_lkp["email_recid"], "left")
    if phone_lkp is not None:
        party = party.join(phone_lkp, party["primary_contact_phone_recid"] == phone_lkp["phone_recid"], "left")
    if dpn_current is not None:
        party = party.join(dpn_current, party["party_recid"] == dpn_current["person_recid"], "left")

    write_delta(party, f"dirparty{SILVER_SUFFIX}")

else:
    log("SKIP: dirparty_silver - source table missing.")


StatementMeta(, 1da2d576-075a-4cba-b017-6df6df2006e7, 10, Finished, Available, Finished)

[silver] Wrote Silver table: dirparty_silver -> Files/Tables_silver/dirparty_silver


StatementMeta(, 3915ba56-80c0-46fd-9659-7433a84a5a5b, 8, Finished, Available, Finished)

[silver] Wrote Silver table: dirparty_silver -> Files/Tables_silver/dirparty_silver


## Customer: `customer_silver`

- `custtable` joined to `dirparty_silver` (via `party` -> `party_recid`).  
- **Current** postal address joined by `primary_address_location_recid` and `is_current = true`.


In [20]:
# Customer - Silver curation (prod)

cust_df = safe_read_table("custtable").alias("C")
dirparty_silver = safe_read_table(f"dirparty{SILVER_SUFFIX}")
postal_silver = safe_read_table(f"postal_address{SILVER_SUFFIX}")

if cust_df is not None and dirparty_silver is not None:
    C = cust_df.select(
        F.col("accountnum").alias("account_num"),
        F.col("custgroup").alias("cust_group"),
        F.col("party").alias("party_recid"),
        F.col("recid").alias("customer_recid"),
        F.col("dataareaid").alias("data_area_id"),
    )

    P = (dirparty_silver.alias("P")
         .select(
             F.col("party_recid").alias("p_party_recid"),
             F.col("party_type"),
             F.col("name"),
             F.col("known_as"),
             F.col("language_id"),
             F.col("primary_email"),
             F.col("primary_phone"),
             F.col("primary_address_location_recid"),
             # bring person names through so Gold can use them
             F.col("person_first_name"),
             F.col("person_middle_name"),
             F.col("person_last_name"),
         ))

    customers = (C
        .join(P, C.party_recid == P.p_party_recid, "inner")
        .select(
            C.account_num,
            C.cust_group,
            C.party_recid,
            C.customer_recid,
            C.data_area_id,
            P.party_type,
            P.name,
            P.known_as,
            P.language_id,
            P.primary_email,
            P.primary_phone,
            P.primary_address_location_recid,
            P.person_first_name,
            P.person_middle_name,
            P.person_last_name,
        ))

    if postal_silver is not None:
        A = postal_silver.alias("A").select(
            "location_recid", "is_current",
            "address_line", "street", "street_number", "city", "state",
            "county", "postal_code", "country_region_id", "latitude", "longitude",
        )
        customers = customers.join(
            A,
            (customers.primary_address_location_recid == A.location_recid) &
            (A.is_current == F.lit(True)),
            "left",
        )

    customers = dedupe_latest(
        customers,
        pk_cols=["data_area_id", "account_num"],
        order_cols=["party_recid"],
    )

    assert_pk_unique(customers, ["data_area_id", "account_num"], "customer_silver")
    write_delta(customers, f"customer{SILVER_SUFFIX}")

else:
    log("SKIP: customer_silver - required sources missing.")


StatementMeta(, 1da2d576-075a-4cba-b017-6df6df2006e7, 11, Finished, Available, Finished)

[silver] OK: customer_silver PK uniqueness holds.
[silver] Wrote Silver table: customer_silver -> Files/Tables_silver/customer_silver


StatementMeta(, 3915ba56-80c0-46fd-9659-7433a84a5a5b, 9, Finished, Available, Finished)

[silver] OK: customer_silver PK uniqueness holds.
[silver] Wrote Silver table: customer_silver -> Files/Tables_silver/customer_silver


## Sales: `salestable_silver`, `salesline_silver`, `retailchannel_silver`

- Light standardisation, UTC casting, modest dedupe.  
- Keep raw header/line structure for flexible joins in `02_silver_to_gold`.

In [21]:
st_df = safe_read_table("salestable")
sl_df = safe_read_table("salesline")
rc_df = safe_read_table("retailchanneltable")

if st_df is not None:
    sales_hdr = (st_df
        .select(
            F.col("custaccount").alias("cust_account"),
            F.col("salesid").alias("sales_id"),
            F.col("retailchanneltable").alias("retail_channel_recid"),
            F.col("dataareaid").alias("data_area_id"),
        )
    )
    sales_hdr = dedupe_latest(sales_hdr, pk_cols=["sales_id", "data_area_id"], order_cols=["cust_account"])
    write_delta(sales_hdr, f"salestable{SILVER_SUFFIX}")
else:
    log("SKIP: salestable_silver - source table missing.")

if sl_df is not None:
    sales_line = (sl_df
        .select(
            F.col("itemid").alias("item_id"),
            F.col("recid").alias("line_recid"),
            cast_decimal("lineamount").alias("line_amount"),
            cast_decimal("linedisc").alias("line_discount"),
            cast_decimal("costprice").alias("cost_price"),
            F.col("currencycode").alias("currency_code"),
            F.col("linenum").alias("line_num"),
            F.col("priceunit").alias("price_unit"),
            F.col("salesid").alias("sales_id"),
            cast_decimal("salesprice").alias("sales_price"),
            cast_decimal("salesqty").alias("sales_qty"),
            F.col("salesunit").alias("sales_unit"),
            F.col("salesstatus").alias("sales_status"),
            F.col("dataareaid").alias("data_area_id"),
            to_utc_ts("createddatetime").alias("created_datetime_utc"),
            F.lit("Purchase").alias("event_type"),
            F.col("name").alias("product_name")
        )
    )
    sales_line = dedupe_latest(sales_line, pk_cols=["data_area_id", "sales_id", "line_recid"], order_cols=["created_datetime_utc"])
    write_delta(sales_line, f"salesline{SILVER_SUFFIX}")
else:
    log("SKIP: salesline_silver - source table missing.")

if rc_df is not None:
    retail_channel = (rc_df
        .select(
            F.col("retailchannelid").alias("channel_id"),
            F.col("recid").alias("channel_recid")
        )
        .dropDuplicates(["channel_recid"])  # stable key for joins
    )
    write_delta(retail_channel, f"retailchannel{SILVER_SUFFIX}")
else:
    log("SKIP: retailchannel_silver - source table missing.")


StatementMeta(, 1da2d576-075a-4cba-b017-6df6df2006e7, 12, Finished, Available, Finished)

[silver] Wrote Silver table: salestable_silver -> Files/Tables_silver/salestable_silver
[silver] Wrote Silver table: salesline_silver -> Files/Tables_silver/salesline_silver
[silver] Wrote Silver table: retailchannel_silver -> Files/Tables_silver/retailchannel_silver


StatementMeta(, 3915ba56-80c0-46fd-9659-7433a84a5a5b, 10, Finished, Available, Finished)

[silver] Wrote Silver table: salestable_silver -> Files/Tables_silver/salestable_silver
[silver] Wrote Silver table: salesline_silver -> Files/Tables_silver/salesline_silver
[silver] Wrote Silver table: retailchannel_silver -> Files/Tables_silver/retailchannel_silver


## Loyalty: `loyalty_card_silver`

- Source `retailloyaltycard`; drop soft-deleted rows.  
- Link to `customer_silver` for `customer_id` via `party_recid` + `data_area_id`.  
- Retain enrolment/block timestamps, operating unit, and dedupe by latest card change.


In [22]:
# Loyalty card - Silver curation
loyalty_card_df = safe_read_table("retailloyaltycard")
customer_silver_df = spark.table(f"customer{SILVER_SUFFIX}") if table_exists(f"customer{SILVER_SUFFIX}") else None # type: ignore

if loyalty_card_df is not None:
    # Remove soft-deleted rows up-front to keep Silver tables clean.
    loyalty_card_df = exclude_soft_deleted(loyalty_card_df)

    loyalty_cards = (
        loyalty_card_df.select(
            F.col("recid").alias("card_recid"),
            F.col("cardnumber").alias("card_number"),
            F.col("party").alias("party_recid"),
            F.col("dataareaid").alias("data_area_id"),
            F.col("omoperatingunitid").alias("om_operating_unit_id"),
            to_utc_ts("loyaltyenrollmentdate").alias("enrolment_date_utc"),
            to_utc_ts("loyaltyblockeddate").alias("blocked_date_utc"),
            to_utc_ts("modifieddatetime").alias("row_modified_utc"),
            to_utc_ts("createddatetime").alias("created_datetime_utc"),
        )
    )

    if customer_silver_df is not None:
        cust_lookup = (
            customer_silver_df
            .withColumn("customer_id", F.lower(F.concat_ws("_", F.col("data_area_id"), F.col("account_num"))))
            .select(
                F.col("party_recid").alias("cust_party_recid"),
                F.col("customer_id")
            )
            .dropDuplicates(["cust_party_recid"])
        )
        loyalty_cards = (
            loyalty_cards
            .join(
                cust_lookup,
                loyalty_cards.party_recid == cust_lookup.cust_party_recid,
                "left",
            )
            .drop("cust_party_recid")
        )
    else:
        loyalty_cards = loyalty_cards.withColumn("customer_id", F.lit(None).cast("string"))

    loyalty_cards = loyalty_cards.withColumn(
        "_order_ts",
        F.coalesce(F.col("row_modified_utc"), F.col("created_datetime_utc"), F.col("enrolment_date_utc"))
    )
    loyalty_cards = dedupe_latest(loyalty_cards, ["card_recid"], ["_order_ts"])
    loyalty_cards = loyalty_cards.drop("_order_ts", "created_datetime_utc")

    assert_pk_unique(loyalty_cards, ["card_recid"], "loyalty_card_silver")
    write_delta(loyalty_cards, f"loyalty_card{SILVER_SUFFIX}")
else:
    log("SKIP: loyalty_card_silver - source table missing.")


StatementMeta(, 1da2d576-075a-4cba-b017-6df6df2006e7, 13, Finished, Available, Finished)

[silver] OK: loyalty_card_silver PK uniqueness holds.
[silver] Wrote Silver table: loyalty_card_silver -> Files/Tables_silver/loyalty_card_silver


StatementMeta(, 3915ba56-80c0-46fd-9659-7433a84a5a5b, 11, Finished, Available, Finished)

[silver] OK: loyalty_card_silver PK uniqueness holds.
[silver] Wrote Silver table: loyalty_card_silver -> Files/Tables_silver/loyalty_card_silver


## Loyalty: `loyalty_card_tier_silver`

- Source `retailloyaltycardtier`; keep history, normalise open-ended `valid_to`.  
- Compute `is_current` snapshot using UTC now.  
- Preserve affiliation reference for later program lookups.


In [23]:
# Loyalty card tier - Silver curation
loyalty_tier_df = safe_read_table("retailloyaltycardtier")

if loyalty_tier_df is not None:
    loyalty_tier_df = exclude_soft_deleted(loyalty_tier_df)

    tiers = (
        loyalty_tier_df.select(
            F.col("recid").alias("rec_id"),
            F.col("loyaltycard").alias("card_recid"),
            F.col("loyaltytier").alias("loyalty_tier"),
            F.col("affiliation").alias("affiliation_id"),
            F.col("dataareaid").alias("data_area_id"),
            to_utc_ts("validfrom").alias("valid_from_utc"),
            to_utc_ts("validto").alias("valid_to_utc_raw"),
            to_utc_ts("modifieddatetime").alias("row_modified_utc"),
        )
    )

    sentinel_low = F.to_timestamp(F.lit("1900-01-01 00:00:00"))
    sentinel_high = F.to_timestamp(F.lit("9999-12-30 00:00:00"))
    tiers = tiers.withColumn(
        "valid_to_utc",
        F.when(
            (F.col("valid_to_utc_raw").isNull()) |
            (F.col("valid_to_utc_raw") <= sentinel_low) |
            (F.col("valid_to_utc_raw") >= sentinel_high),
            F.lit(None).cast("timestamp")
        ).otherwise(F.col("valid_to_utc_raw"))
    ).drop("valid_to_utc_raw")

    current_ts = F.current_timestamp()
    tiers = tiers.withColumn(
        "is_current",
        (F.col("valid_from_utc") <= current_ts) &
        (F.coalesce(F.col("valid_to_utc"), current_ts) >= current_ts)
    )

    tiers = tiers.withColumn(
        "_order_ts",
        F.coalesce(F.col("row_modified_utc"), F.col("valid_to_utc"), F.col("valid_from_utc"))
    )
    tiers = dedupe_latest(tiers, ["rec_id"], ["_order_ts"])
    tiers = tiers.drop("_order_ts")

    assert_pk_unique(tiers, ["rec_id"], "loyalty_card_tier_silver")
    write_delta(tiers, f"loyalty_card_tier{SILVER_SUFFIX}")
else:
    log("SKIP: loyalty_card_tier_silver - source table missing.")


StatementMeta(, 1da2d576-075a-4cba-b017-6df6df2006e7, 14, Finished, Available, Finished)

[silver] OK: loyalty_card_tier_silver PK uniqueness holds.
[silver] Wrote Silver table: loyalty_card_tier_silver -> Files/Tables_silver/loyalty_card_tier_silver


StatementMeta(, 3915ba56-80c0-46fd-9659-7433a84a5a5b, 12, Finished, Available, Finished)

[silver] OK: loyalty_card_tier_silver PK uniqueness holds.
[silver] Wrote Silver table: loyalty_card_tier_silver -> Files/Tables_silver/loyalty_card_tier_silver


## Loyalty: `loyalty_point_trans_silver`

- Source `retailloyaltycardrewardpointtrans`; clean soft deletes and cast decimals.  
- Join cards (by `card_number`, `data_area_id`) to carry `card_recid`.  
- Derive `event_timestamp_utc` from entry/start/modification times.


In [24]:
# Loyalty point transactions - Silver curation
loyalty_trans_df = safe_read_table("retailloyaltycardrewardpointtrans")
loyalty_card_silver_df = spark.table(f"loyalty_card{SILVER_SUFFIX}") if table_exists(f"loyalty_card{SILVER_SUFFIX}") else None # type: ignore

if loyalty_trans_df is not None:
    loyalty_trans_df = exclude_soft_deleted(loyalty_trans_df)

    trans = (
        loyalty_trans_df.select(
            F.col("recid").alias("rec_id"),
            F.col("cardnumber").alias("card_number"),
            F.col("dataareaid").alias("data_area_id"),
            F.col("rewardpoint").alias("reward_point_id"),
            cast_decimal("rewardpointamountqty").alias("points_delta"),
            cast_decimal("remaining").alias("remaining_balance"),
            to_utc_ts("expirationdate").alias("expiration_date"),
            F.col("salesid").alias("sales_id"),
            F.col("storeid").alias("store_id"),
            F.col("terminalid").alias("terminal_id"),
            F.col("transactionid").alias("transaction_id"),
            F.col("staffid").alias("staff_id"),
            F.col("loyaltytransactiontype").alias("loyalty_transaction_type"),
            F.col("loyaltytier").alias("loyalty_tier"),
            to_utc_ts("entrydate").alias("entry_date_utc"),
            to_utc_ts("startdate").alias("start_date_utc"),
            to_utc_ts("modifieddatetime").alias("row_modified_utc"),
        )
    )

    if loyalty_card_silver_df is not None:
        card_lookup = (
            loyalty_card_silver_df
            .select("card_recid", "card_number")
            .dropDuplicates(["card_number"])
        )
        trans = trans.join(card_lookup, on="card_number", how="left")
    else:
        trans = trans.withColumn("card_recid", F.lit(None).cast("long"))

    trans = trans.withColumn(
        "event_timestamp_utc",
        F.coalesce(F.col("entry_date_utc"), F.col("start_date_utc"), F.col("row_modified_utc"))
    )

    trans = trans.drop("entry_date_utc", "start_date_utc")

    trans = trans.select(
        "rec_id",
        "card_recid",
        "reward_point_id",
        "points_delta",
        "remaining_balance",
        "expiration_date",
        "sales_id",
        "store_id",
        "terminal_id",
        "transaction_id",
        "staff_id",
        "loyalty_transaction_type",
        "loyalty_tier",
        "data_area_id",
        "event_timestamp_utc",
        "row_modified_utc",
    )

    trans = trans.withColumn(
        "_order_ts",
        F.coalesce(F.col("row_modified_utc"), F.col("event_timestamp_utc"))
    )
    trans = dedupe_latest(trans, ["rec_id"], ["_order_ts"])
    trans = trans.drop("_order_ts")

    assert_pk_unique(trans, ["rec_id"], "loyalty_point_trans_silver")
    write_delta(trans, f"loyalty_point_trans{SILVER_SUFFIX}")
else:
    log("SKIP: loyalty_point_trans_silver - source table missing.")


StatementMeta(, 1da2d576-075a-4cba-b017-6df6df2006e7, 15, Finished, Available, Finished)

[silver] OK: loyalty_point_trans_silver PK uniqueness holds.
[silver] Wrote Silver table: loyalty_point_trans_silver -> Files/Tables_silver/loyalty_point_trans_silver


StatementMeta(, 3915ba56-80c0-46fd-9659-7433a84a5a5b, 13, Finished, Available, Finished)

[silver] OK: loyalty_point_trans_silver PK uniqueness holds.
[silver] Wrote Silver table: loyalty_point_trans_silver -> Files/Tables_silver/loyalty_point_trans_silver


## Loyalty: `loyalty_program_affiliation_silver`

- Source `retailaffiliation`; drop soft deletes.  
- Surface program name, type, and key flags per `recid`.  
- Provides lookup for Gold `supporting_LoyaltyPrograms`.


In [25]:
# Loyalty program affiliation - Silver curation (optional)
loyalty_program_df = safe_read_table("retailaffiliation")

if loyalty_program_df is not None:
    loyalty_program_df = exclude_soft_deleted(loyalty_program_df)

    programs = (
        loyalty_program_df.select(
            F.col("recid").alias("program_id"),
            F.col("name").alias("program_name"),
            F.col("affiliationtype").alias("affiliation_type"),
            F.col("poolrelatedcards").alias("pool_related_cards"),
            F.col("hideinchannels").alias("hide_in_channels"),
            F.col("dataareaid").alias("data_area_id"),
            to_utc_ts("modifieddatetime").alias("row_modified_utc"),
        )
    )

    programs = programs.dropDuplicates(["program_id"])

    assert_pk_unique(programs, ["program_id"], "loyalty_program_affiliation_silver")
    write_delta(programs, f"loyalty_program_affiliation{SILVER_SUFFIX}")
else:
    log("SKIP: loyalty_program_affiliation_silver - source table missing.")


StatementMeta(, 1da2d576-075a-4cba-b017-6df6df2006e7, 16, Finished, Available, Finished)

[silver] OK: loyalty_program_affiliation_silver PK uniqueness holds.
[silver] Wrote Silver table: loyalty_program_affiliation_silver -> Files/Tables_silver/loyalty_program_affiliation_silver


StatementMeta(, 3915ba56-80c0-46fd-9659-7433a84a5a5b, 14, Finished, Available, Finished)

[silver] OK: loyalty_program_affiliation_silver PK uniqueness holds.
[silver] Wrote Silver table: loyalty_program_affiliation_silver -> Files/Tables_silver/loyalty_program_affiliation_silver


## Quality checks & summary (quick)

In [26]:
from itertools import chain

def count_or_zero(name: str) -> int:
    return spark.table(name).count() if table_exists(name) else 0 # type: ignore

summary = []
for t in [f"postal_address{SILVER_SUFFIX}",
          f"dirpersonname_current{SILVER_SUFFIX}",
          f"dirparty{SILVER_SUFFIX}",
          f"customer{SILVER_SUFFIX}",
          f"salestable{SILVER_SUFFIX}",
          f"salesline{SILVER_SUFFIX}",
          f"retailchannel{SILVER_SUFFIX}",
          f"loyalty_card{SILVER_SUFFIX}",
          f"loyalty_card_tier{SILVER_SUFFIX}",
          f"loyalty_point_trans{SILVER_SUFFIX}",
          f"loyalty_program_affiliation{SILVER_SUFFIX}"]:
    if table_exists(t):
        c = count_or_zero(t)
        summary.append((t, c))
        log(f"Table {t} count = {c}")
    else:
        log(f"Table {t} not created (skipped).")

# Example PK uniqueness checks (non-fatal warnings)
try:
    if table_exists(f"customer{SILVER_SUFFIX}"):
        assert_pk_unique(spark.table(f"customer{SILVER_SUFFIX}"), ["data_area_id", "account_num"], "customer_silver") # type: ignore
    if table_exists(f"salestable{SILVER_SUFFIX}"):
        assert_pk_unique(spark.table(f"salestable{SILVER_SUFFIX}"), ["data_area_id", "sales_id"], "salestable_silver") # type: ignore
    if table_exists(f"salesline{SILVER_SUFFIX}"):
        assert_pk_unique(spark.table(f"salesline{SILVER_SUFFIX}"), ["data_area_id", "sales_id", "line_recid"], "salesline_silver") # type: ignore
    if table_exists(f"loyalty_card{SILVER_SUFFIX}"):
        assert_pk_unique(spark.table(f"loyalty_card{SILVER_SUFFIX}"), ["card_recid"], "loyalty_card_silver") # type: ignore
    if table_exists(f"loyalty_card_tier{SILVER_SUFFIX}"):
        assert_pk_unique(spark.table(f"loyalty_card_tier{SILVER_SUFFIX}"), ["rec_id"], "loyalty_card_tier_silver") # type: ignore
    if table_exists(f"loyalty_point_trans{SILVER_SUFFIX}"):
        assert_pk_unique(spark.table(f"loyalty_point_trans{SILVER_SUFFIX}"), ["rec_id"], "loyalty_point_trans_silver") # type: ignore
    if table_exists(f"loyalty_program_affiliation{SILVER_SUFFIX}"):
        assert_pk_unique(spark.table(f"loyalty_program_affiliation{SILVER_SUFFIX}"), ["program_id"], "loyalty_program_affiliation_silver") # type: ignore
except Exception as e:
    log(f"PK check error (non-fatal): {e}")


StatementMeta(, 1da2d576-075a-4cba-b017-6df6df2006e7, 17, Finished, Available, Finished)

[silver] Table postal_address_silver count = 1927
[silver] Table dirpersonname_current_silver count = 777
[silver] Table dirparty_silver count = 1478
[silver] Table customer_silver count = 338
[silver] Table salestable_silver count = 14618
[silver] Table salesline_silver count = 185716
[silver] Table retailchannel_silver count = 49
[silver] Table loyalty_card_silver count = 6
[silver] Table loyalty_card_tier_silver count = 8
[silver] Table loyalty_point_trans_silver count = 4
[silver] Table loyalty_program_affiliation_silver count = 5
[silver] OK: customer_silver PK uniqueness holds.
[silver] OK: salestable_silver PK uniqueness holds.
[silver] OK: salesline_silver PK uniqueness holds.
[silver] OK: loyalty_card_silver PK uniqueness holds.
[silver] OK: loyalty_card_tier_silver PK uniqueness holds.
[silver] OK: loyalty_point_trans_silver PK uniqueness holds.
[silver] OK: loyalty_program_affiliation_silver PK uniqueness holds.


StatementMeta(, 3915ba56-80c0-46fd-9659-7433a84a5a5b, 15, Finished, Available, Finished)

[silver] Table postal_address_silver count = 1927
[silver] Table dirpersonname_current_silver count = 777
[silver] Table dirparty_silver count = 1478
[silver] Table customer_silver count = 338
[silver] Table salestable_silver count = 14618
[silver] Table salesline_silver count = 185716
[silver] Table retailchannel_silver count = 49
[silver] Table loyalty_card_silver count = 6
[silver] Table loyalty_card_tier_silver count = 8
[silver] Table loyalty_point_trans_silver count = 4
[silver] Table loyalty_program_affiliation_silver count = 5
[silver] OK: customer_silver PK uniqueness holds.
[silver] OK: salestable_silver PK uniqueness holds.
[silver] OK: salesline_silver PK uniqueness holds.
[silver] OK: loyalty_card_silver PK uniqueness holds.
[silver] OK: loyalty_card_tier_silver PK uniqueness holds.
[silver] OK: loyalty_point_trans_silver PK uniqueness holds.
[silver] OK: loyalty_program_affiliation_silver PK uniqueness holds.
