#Gold Layer - Fato Detail

In [0]:
%run ./00_Setup_Environment

In [0]:
df_sales_detail = spark.table("silver.sales_detail")

df_fact_sales = spark.table("gold.fact_sales")
df_dim_product = spark.table("gold.dim_product")


In [0]:
df_fact_detail = (
    df_sales_detail.alias("sd")
    .join(
        df_fact_sales.select("order_key", "order_id").alias("fs"),
        col("sd.order_id") == col("fs.order_id"),
        "inner"
    )
    .join(
        df_dim_product.alias("p"),
        col("sd.product_id") == col("p.product_id"),
        "inner"
    )
    .select(
        # Detail surrogate key
        row_number().over(Window.orderBy(col("sd.detail_id"))).alias("detail_key"),

        # Order surrogate key consistente
        col("fs.order_key"),
        col("p.product_key"),

        # Detail columns
        col("sd.order_id"),
        col("sd.quantity"),
        col("sd.unit_price"),
        col("sd.discount_percent"),
        col("sd.subtotal"),
        col("sd.discount_amount"),
        col("sd.line_total"),
        col("sd.detail_modified_date"),

        # Gross profit
        (
            col("sd.line_total") -
            (col("p.standard_cost") * col("sd.quantity"))
        ).alias("gross_profit"),

        # Profit margin %
        when(
            (col("p.standard_cost") > 0) & (col("sd.quantity") > 0),
            (
                (
                    col("sd.line_total") -
                    (col("p.standard_cost") * col("sd.quantity"))
                ) /
                (col("p.standard_cost") * col("sd.quantity")) * 100
            )
        ).otherwise(None).alias("profit_margin_percentage")
    )
)

In [0]:
#df_fact_detail.limit(10).display()

In [0]:
path = f"{gold_path}/fact_detail"

df_fact_detail.write \
    .mode("overwrite") \
    .format("delta") \
    .option("overwriteSchema", "true") \
    .save(path)

df_fact_detail.write.mode("overwrite").option("overwriteSchema", "true").saveAsTable("gold.fact_detail")

count = df_fact_detail.count()
log_etl("fact_detail", "gold", "SUCCESS", count)

print(f"gold.fact_detail criada com {count} registros")