# Gold Layer - Data Marts e Agregações

In [0]:
%run ./00_Setup_Environment

In [0]:
spark.sql("""
    CREATE OR REPLACE TEMPORARY VIEW temp_date_customer AS
    SELECT 
        s.order_key,
        s.order_id,
        s.order_date_key,
        s.is_online_order,
        s.order_status,
        d.year,
        d.month,
        d.month_name,
        d.date,
        d.quarter,
        d.year_quarter,
        c.customer_key,
        c.customer_id,
        c.is_current
    FROM gold.fact_sales s
    JOIN gold.dim_date d
        ON s.order_date_key = d.date_key
    JOIN gold.dim_customer c
        ON s.customer_key = c.customer_key
""")

In [0]:
df_sales_by_category = spark.sql("""
    SELECT 
        p.product_category,
        d.year,
        d.month,
        d.month_name,
        COUNT(DISTINCT f.order_id) AS total_orders,
        SUM(f.quantity) AS total_units,
        ROUND(SUM(f.line_total), 2) AS total_revenue,
        ROUND(SUM(f.gross_profit), 2) AS total_profit,
        ROUND(AVG(f.profit_margin_percentage), 2) AS avg_margin
    FROM gold.fact_detail f
    JOIN gold.dim_product p
        ON f.product_key = p.product_key
    JOIN temp_date_customer d
        ON f.order_key = d.order_key
    GROUP BY p.product_category, d.year, d.month, d.month_name
    ORDER BY d.year DESC, d.month DESC, total_revenue DESC
""")

#df_sales_by_category.limit(10).display()

In [0]:
# Salvar
path = f"{gold_path}/mart_sales_by_category"
df_sales_by_category.write \
    .mode("overwrite") \
    .format("delta") \
    .option("overwriteSchema", "true") \
    .save(path)

df_sales_by_category.write.mode("overwrite").option("overwriteSchema", "true").saveAsTable("gold.mart_sales_by_category")

In [0]:
df_top_customers = spark.sql("""
    SELECT 
        c.customer_id,
        COUNT(DISTINCT d.order_id) AS total_orders,
        SUM(d.quantity) AS total_units,
        ROUND(SUM(d.line_total), 2) AS total_spent,
        ROUND(AVG(d.line_total), 2) AS avg_order_value,
        MAX(c.date) AS last_purchase_date,
        DATEDIFF(CURRENT_DATE(), MAX(c.date)) AS days_since_last_purchase
    FROM gold.fact_detail d
    JOIN temp_date_customer c
        ON d.order_key = c.order_key
    GROUP BY c.customer_id, c.is_current
    ORDER BY total_spent DESC
    LIMIT 100
""")
df_top_customers.limit(10).display()

In [0]:
# Salvar
path = f"{gold_path}/mart_top_customers"
df_top_customers.write \
    .mode("overwrite") \
    .format("delta") \
    .option("overwriteSchema", "true") \
    .save(path)

df_top_customers.write.mode("overwrite").option("overwriteSchema", "true").saveAsTable("gold.mart_top_customers")
print("gold.mart_top_customers criada")

In [0]:
df_monthly_performance = spark.sql("""
    SELECT 
        d.year,
        d.month,
        d.month_name,
        d.quarter,
        d.year_quarter,
        COUNT(DISTINCT f.order_id) AS orders,
        COUNT(DISTINCT d.customer_key) AS unique_customers,
        SUM(f.quantity) AS units_sold,
        ROUND(SUM(f.line_total), 2) AS revenue,
        ROUND(SUM(f.gross_profit), 2) AS gross_profit,
        ROUND((SUM(f.gross_profit) / NULLIF(SUM(f.line_total), 0)) * 100, 2) AS profit_margin,
        SUM(CASE WHEN d.is_online_order THEN 1 ELSE 0 END) AS online_orders,
        SUM(CASE WHEN NOT d.is_online_order THEN 1 ELSE 0 END) AS store_orders
    FROM gold.fact_detail f
    JOIN temp_date_customer d ON f.order_key = d.order_key
    GROUP BY d.year, d.month, d.month_name, d.quarter, d.year_quarter
    ORDER BY d.year DESC, d.month DESC
""")
df_monthly_performance.limit(10).display()

In [0]:
# Salvar
path = f"{gold_path}/mart_monthly_performance"
df_monthly_performance.write \
    .mode("overwrite") \
    .format("delta") \
    .option("overwriteSchema", "true") \
    .save(path)

df_monthly_performance.write.mode("overwrite").option("overwriteSchema", "true").saveAsTable("gold.mart_monthly_performance")
print("gold.mart_monthly_performance criada")