# Silver Layer - Transformação de Vendas: Order

In [0]:
%run ./00_Setup_Environment

In [0]:
df_header = spark.table("bronze.sales_order_header")

# Carregar Silver já transformadas
df_customer_address = spark.table("silver.customer_address")

In [0]:
#df_header.limit(10).display()

In [0]:
# Juntar cabeçalho e detalhes
df_silver_orders = df_header.alias("h") \
    .join(df_customer_address.alias("ca"), 
          (col("h.CustomerID") == col("ca.customer_id")),
          "left") \
    .select(
        # Identificadores
        col("h.SalesOrderID").alias("order_id"),

        # Chaves estrangeiras
        col("h.CustomerID").alias("customer_id"),
        col("h.ShipToAddressID").alias("ship_to_address_id"),
        col("h.BillToAddressID").alias("bill_to_address_id"),
        
        # Datas
        col("h.OrderDate").cast("date").alias("order_date"),
        col("h.ShipDate").cast("date").alias("ship_date"),
        col("h.DueDate").cast("date").alias("due_date"),
        year(col("h.OrderDate")).alias("order_year"),
        month(col("h.OrderDate")).alias("order_month"),        

        #locais
        when(col("h.BillToAddressID") == col("ca.main_address_id"), col("ca.main_address")).alias("bill_address"),
        when(col("h.ShipToAddressID") == col("ca.shipping_address_id"), col("ca.shipping_address")).otherwise(col("ca.main_address")).alias("ship_address"),
        

        
        # Informações do pedido
        col("h.Status").alias("order_status"),
        col("h.OnlineOrderFlag").alias("is_online_order"),
        col("h.PurchaseOrderNumber").alias("purchase_order_number"),
        
        # Totais do pedido (do cabeçalho)
        regexp_replace(col("h.SubTotal"), ",", ".").cast("decimal(18,2)").alias("order_subtotal"),
        regexp_replace(col("h.TaxAmt"), ",", ".").cast("decimal(18,2)").alias("order_tax"),
        regexp_replace(col("h.Freight"), ",", ".").cast("decimal(18,2)").alias("order_freight"),
        regexp_replace(col("h.TotalDue"), ",", ".").cast("decimal(18,2)").alias("order_total"),
        
        # Metadados
        col("h.ModifiedDate").cast("timestamp").alias("header_modified_date"),
        current_timestamp().alias("processed_timestamp"),
        
        when(col("h.OrderDate") <= current_date(), True).otherwise(False).alias("valid_date")
    )


In [0]:
df_silver_orders.display()

In [0]:
path = f"{silver_path}/sales_orders"
df_silver_orders.write \
    .mode("overwrite") \
    .format("delta") \
    .option("overwriteSchema", "true") \
    .partitionBy("order_year", "order_month") 
    .save(path)

spark.sql("""USE adventureworks.silver""")
df_silver_orders.write.mode("overwrite").option("overwriteSchema", "true").saveAsTable("sales_orders")

count = df_silver_orders.count()
log_etl("orders", "silver", "SUCCESS", count)

print(f" Silver Sales Orders: {count} registros")