### Read Silver tables

In [0]:
from pyspark.sql import functions as F

SILVER_DB = "olist_ecommerce.silver"

orders_df = spark.table(f"{SILVER_DB}.orders")
order_items_df = spark.table(f"{SILVER_DB}.order_items")
payments_df = spark.table(f"{SILVER_DB}.payments")
customers_df = spark.table(f"{SILVER_DB}.customers")

### Aggregate order_items → order level

In [0]:
order_items_agg = (
    order_items_df
    .groupBy("order_id")
    .agg(
        F.sum("price").alias("order_value"),
        F.sum("freight_value").alias("total_freight"),
        F.count("order_item_id").alias("total_items")
    )
)

### Aggregate payments → order level

In [0]:
payments_agg = (
    payments_df
    .groupBy("order_id")
    .agg(
        F.sum("payment_value").alias("total_payment"),
        F.countDistinct("payment_type").alias("payment_methods_used"),
        F.max("payment_installments").alias("installments_count")
    )
)

### Join everything to orders

In [0]:
fact_orders_df = (
    orders_df
    .join(order_items_agg, "order_id", "left")
    .join(payments_agg, "order_id", "left")
    .join(
        customers_df.select("customer_id", "customer_unique_id"),
        "customer_id",
        "left"
    )
)

### Derived business metrics

In [0]:
fact_orders_df = (
    fact_orders_df
    .withColumn(
        "avg_freight_ratio",
        F.when(F.col("order_value") > 0,
               F.col("total_freight") / F.col("order_value"))
    ))

### Write to Gold

In [0]:
fact_orders_df.write \
    .format("delta") \
    .mode("overwrite") \
    .saveAsTable("olist_ecommerce.gold.fact_orders")
