# Bronze to Silver ELT: Order Data Transformation

This notebook cleans and enriches order data from the bronze layer to create the silver layer order tables. The transformation includes:

- Data cleaning and standardization
- Temporal analysis (year, month, quarter, day of week)
- Order status and payment categorization
- Order value analysis
- Discount analysis
- Data quality metrics calculation
- Table optimization


In [0]:
-- Set up the environment
USE catalog apjtechup;
use database silver;


In [0]:
-- Insert cleaned order data into silver layer
CREATE OR REPLACE TABLE orders_clean
SELECT 
    o.order_id,
    o.customer_id,
    o.order_date,
    DATE(o.order_date) as order_date_only,
    YEAR(o.order_date) as order_year,
    MONTH(o.order_date) as order_month,
    QUARTER(o.order_date) as order_quarter,
    CASE DAYOFWEEK(o.order_date)
        WHEN 1 THEN 'Sunday'
        WHEN 2 THEN 'Monday'
        WHEN 3 THEN 'Tuesday'
        WHEN 4 THEN 'Wednesday'
        WHEN 5 THEN 'Thursday'
        WHEN 6 THEN 'Friday'
        WHEN 7 THEN 'Saturday'
    END as order_day_of_week,
    HOUR(o.order_date) as order_hour,
    UPPER(TRIM(o.order_status)) as order_status,
    -- Order status categorization
    CASE 
        WHEN UPPER(TRIM(o.order_status)) IN ('PENDING', 'PROCESSING') THEN 'In Progress'
        WHEN UPPER(TRIM(o.order_status)) IN ('SHIPPED', 'DELIVERED') THEN 'Completed'
        WHEN UPPER(TRIM(o.order_status)) IN ('CANCELLED', 'RETURNED') THEN 'Cancelled/Returned'
        ELSE 'Other'
    END as order_status_category,
    LOWER(TRIM(o.payment_method)) as payment_method,
    -- Payment method categorization
    CASE 
        WHEN LOWER(TRIM(o.payment_method)) IN ('credit_card', 'debit_card') THEN 'Card Payment'
        WHEN LOWER(TRIM(o.payment_method)) IN ('paypal', 'apple_pay', 'google_pay') THEN 'Digital Wallet'
        ELSE 'Other'
    END as payment_method_category,
    UPPER(TRIM(o.payment_status)) as payment_status,
    TRIM(o.shipping_address) as shipping_address,
    TRIM(o.billing_address) as billing_address,
    -- Check if billing and shipping addresses are the same
    CASE 
        WHEN TRIM(o.shipping_address) = TRIM(o.billing_address) THEN TRUE
        ELSE FALSE
    END as same_billing_shipping,
    o.total_amount,
    o.tax_amount,
    o.shipping_cost,
    o.discount_amount,
    -- Calculate net amount (total - tax for comparison purposes)
    ROUND(o.total_amount - COALESCE(o.tax_amount, 0), 2) as net_amount,
    -- Order value tier
    CASE 
        WHEN o.total_amount < 50 THEN 'Small'
        WHEN o.total_amount < 150 THEN 'Medium'
        WHEN o.total_amount < 500 THEN 'Large'
        ELSE 'Enterprise'
    END as order_value_tier,
    CASE WHEN COALESCE(o.discount_amount, 0) > 0 THEN TRUE ELSE FALSE END as has_discount,
    -- Calculate discount percentage
    CASE 
        WHEN o.total_amount > 0 AND COALESCE(o.discount_amount, 0) > 0 
        THEN ROUND((o.discount_amount / o.total_amount) * 100, 2)
        ELSE 0
    END as discount_percentage,
    o.created_at,
    o.updated_at,
    CURRENT_TIMESTAMP() as processing_timestamp
FROM apjtechup.bronze.orders_raw o
WHERE o.order_id IS NOT NULL
  AND o.customer_id IS NOT NULL
  AND o.order_date IS NOT NULL
  AND o.total_amount >= 0;


In [0]:
-- Create temporary views for order analytics
CREATE OR REPLACE TEMPORARY VIEW order_quality_metrics AS
SELECT 
    COUNT(*) as total_orders,
    COUNT(CASE WHEN order_status_category = 'Completed' THEN 1 END) as completed_orders,
    COUNT(CASE WHEN order_status_category = 'In Progress' THEN 1 END) as in_progress_orders,
    COUNT(CASE WHEN order_status_category = 'Cancelled/Returned' THEN 1 END) as cancelled_orders,
    ROUND(AVG(total_amount), 2) as avg_order_value,
    ROUND(SUM(total_amount), 2) as total_revenue,
    COUNT(CASE WHEN has_discount = TRUE THEN 1 END) as orders_with_discount,
    ROUND(AVG(CASE WHEN has_discount = TRUE THEN discount_percentage END), 2) as avg_discount_percentage,
    COUNT(CASE WHEN order_value_tier = 'Small' THEN 1 END) as small_orders,
    COUNT(CASE WHEN order_value_tier = 'Medium' THEN 1 END) as medium_orders,
    COUNT(CASE WHEN order_value_tier = 'Large' THEN 1 END) as large_orders,
    COUNT(CASE WHEN order_value_tier = 'Enterprise' THEN 1 END) as enterprise_orders
FROM orders_clean;


In [0]:
-- Payment method analysis
CREATE OR REPLACE TEMPORARY VIEW payment_method_analysis AS
SELECT 
    payment_method_category,
    payment_method,
    COUNT(*) as order_count,
    ROUND(AVG(total_amount), 2) as avg_order_value,
    COUNT(CASE WHEN order_status_category = 'Completed' THEN 1 END) as completed_count,
    ROUND(COUNT(CASE WHEN order_status_category = 'Completed' THEN 1 END) * 100.0 / COUNT(*), 2) as completion_rate
FROM orders_clean
GROUP BY payment_method_category, payment_method
ORDER BY order_count DESC;


In [0]:
-- Temporal patterns
CREATE OR REPLACE TEMPORARY VIEW temporal_patterns AS
SELECT 
    order_day_of_week,
    COUNT(*) as order_count,
    ROUND(AVG(total_amount), 2) as avg_order_value,
    ROUND(SUM(total_amount), 2) as total_revenue
FROM orders_clean
GROUP BY order_day_of_week
ORDER BY 
    CASE order_day_of_week
        WHEN 'Monday' THEN 1
        WHEN 'Tuesday' THEN 2
        WHEN 'Wednesday' THEN 3
        WHEN 'Thursday' THEN 4
        WHEN 'Friday' THEN 5
        WHEN 'Saturday' THEN 6
        WHEN 'Sunday' THEN 7
    END;


In [0]:
-- Hourly patterns
CREATE OR REPLACE TEMPORARY VIEW hourly_patterns AS
SELECT 
    order_hour,
    COUNT(*) as order_count,
    ROUND(AVG(total_amount), 2) as avg_order_value
FROM orders_clean
GROUP BY order_hour
ORDER BY order_hour;


In [0]:
-- Monthly trends
CREATE OR REPLACE TEMPORARY VIEW monthly_trends AS
SELECT 
    order_year,
    order_month,
    COUNT(*) as order_count,
    ROUND(AVG(total_amount), 2) as avg_order_value,
    ROUND(SUM(total_amount), 2) as total_revenue,
    COUNT(DISTINCT customer_id) as unique_customers
FROM orders_clean
GROUP BY order_year, order_month
ORDER BY order_year, order_month;


In [0]:
-- Display analytics
SELECT 'Order Quality Metrics' as metric_type;
SELECT * FROM order_quality_metrics;

SELECT 'Payment Method Analysis' as metric_type;
SELECT * FROM payment_method_analysis;

SELECT 'Day of Week Patterns' as metric_type;
SELECT * FROM temporal_patterns;

SELECT 'Hourly Patterns (Top 10)' as metric_type;
SELECT * FROM hourly_patterns ORDER BY order_count DESC LIMIT 10;

SELECT 'Monthly Trends' as metric_type;
SELECT * FROM monthly_trends ORDER BY order_year DESC, order_month DESC LIMIT 12;


In [0]:
-- Optimize table
OPTIMIZE orders_clean;

-- Update table statistics
ANALYZE TABLE orders_clean COMPUTE STATISTICS;
