### Read required Silver tables

In [0]:
from pyspark.sql import functions as F

SILVER_DB = "olist_ecommerce.silver"
GOLD_DB = "olist_ecommerce.gold"

order_items_df = spark.table(f"{SILVER_DB}.order_items")
orders_df      = spark.table(f"{SILVER_DB}.orders")
products_df    = spark.table(f"{SILVER_DB}.products")
sellers_df     = spark.table(f"{SILVER_DB}.sellers")
customers_df   = spark.table(f"{SILVER_DB}.customers")

### Join core tables

In [0]:
fact_sales_df = (
    order_items_df
    .join(orders_df.select(
        "order_id",
        "customer_id",
        "purchase_timestamp",
        "order_status",
        "is_late_delivery"
    ), "order_id", "left")
    .join(products_df.select(
        "product_id",
        "product_category_name",
        "product_weight_g",
        "product_volume_cm3"
    ), "product_id", "left")
    .join(sellers_df.select(
        "seller_id",
        "seller_state"
    ), "seller_id", "left")
    .join(customers_df.select(
        "customer_id",
        "customer_unique_id"
    ), "customer_id", "left")
)

### Add audit & partition columns

In [0]:
fact_sales_df = (
    fact_sales_df
    .withColumn("sales_date", F.to_date("purchase_timestamp"))
    .withColumn("ingestion_ts", F.current_timestamp())
)

### Final column selection

In [0]:
fact_sales_final = fact_sales_df.select(
    "order_id",
    "order_item_id",
    "product_id",
    "seller_id",
    "customer_id",
    "customer_unique_id",

    "product_category_name",
    "seller_state",

    "price",
    "freight_value",
    "item_total_value",
    "freight_ratio",

    "product_weight_g",
    "product_volume_cm3",

    "order_status",
    "is_late_delivery",
    "sales_date",
    "ingestion_ts"
)

### Write to Gold

In [0]:
fact_sales_final.write \
    .format("delta") \
    .mode("overwrite") \
    .saveAsTable("olist_ecommerce.gold.fact_sales")