# Master Bronze to Silver ELT Process

This notebook orchestrates the entire Bronze to Silver data transformation process. It executes all the individual transformation notebooks in the correct order and handles additional tables not covered in the individual notebooks.

The process includes:
1. Customer data transformation
2. Product data transformation
3. Order data transformation
4. Order items transformation
5. Inventory transformation
6. Reviews transformation
7. Web events transformation

Each step includes data quality checks and table optimization.


In [0]:
-- Log transformation start
SELECT 'Starting Bronze to Silver ELT Process' as status, CURRENT_TIMESTAMP() as start_time;


In [0]:
-- 1. Transform Customers (Reference data first)
SELECT 'Transforming Customers...' as step, CURRENT_TIMESTAMP() as timestamp;


In [0]:
%run ./01_bronze_to_silver_customers

In [0]:
-- 2. Transform Products (Reference data)
SELECT 'Transforming Products...' as step, CURRENT_TIMESTAMP() as timestamp;


Run the products transformation notebook


In [0]:
%run ./02_bronze_to_silver_products

In [0]:
-- 3. Transform Orders (Transaction data)
SELECT 'Transforming Orders...' as step, CURRENT_TIMESTAMP() as timestamp;


Run the orders transformation notebook


In [0]:
%run ./03_bronze_to_silver_orders

In [0]:
-- 4. Transform Order Items (Large transaction table)
SELECT 'Transforming Order Items...' as step, CURRENT_TIMESTAMP() as timestamp;

USE catalog apjtechup;
use database silver;

CREATE OR REPLACE TABLE order_items_clean
SELECT 
    oi.order_item_id,
    oi.order_id,
    oi.product_id,
    TRIM(p.product_name) as product_name,
    TRIM(c.category_name) as category_name,
    TRIM(p.brand) as brand,
    oi.quantity,
    oi.unit_price,
    oi.discount_amount,
    oi.total_amount,
    p.cost as unit_cost,
    ROUND(oi.total_amount - (p.cost * oi.quantity), 2) as item_profit,
    CASE 
        WHEN oi.total_amount > 0 
        THEN ROUND(((oi.total_amount - (p.cost * oi.quantity)) / oi.total_amount) * 100, 2)
        ELSE 0
    END as item_margin,
    CASE WHEN COALESCE(oi.discount_amount, 0) > 0 THEN TRUE ELSE FALSE END as is_discounted,
    CASE 
        WHEN oi.quantity = 1 THEN 'Single'
        WHEN oi.quantity <= 3 THEN 'Small'
        WHEN oi.quantity <= 10 THEN 'Medium'
        ELSE 'Bulk'
    END as quantity_tier,
    oi.created_at,
    CURRENT_TIMESTAMP() as processing_timestamp
FROM apjtechup.bronze.order_items_raw oi
LEFT JOIN apjtechup.bronze.products_raw p ON oi.product_id = p.product_id
LEFT JOIN apjtechup.bronze.categories_raw c ON p.category_id = c.category_id
WHERE oi.order_item_id IS NOT NULL
  AND oi.order_id IS NOT NULL
  AND oi.product_id IS NOT NULL
  AND oi.quantity > 0
  AND oi.unit_price >= 0;


In [0]:
-- 5. Transform Inventory
SELECT 'Transforming Inventory...' as step, CURRENT_TIMESTAMP() as timestamp;

CREATE OR REPLACE TABLE inventory_clean
SELECT 
    i.inventory_id,
    i.product_id,
    TRIM(p.product_name) as product_name,
    TRIM(c.category_name) as category_name,
    i.warehouse_id,
    CASE 
        WHEN i.warehouse_id <= 5 THEN 'East'
        WHEN i.warehouse_id <= 10 THEN 'Central'
        WHEN i.warehouse_id <= 15 THEN 'West'
        ELSE 'International'
    END as warehouse_region,
    i.quantity_on_hand,
    i.quantity_reserved,
    i.quantity_available,
    i.reorder_level,
    CASE 
        WHEN i.quantity_available <= 0 THEN 'Out of Stock'
        WHEN i.quantity_available <= i.reorder_level THEN 'Low Stock'
        WHEN i.quantity_available <= i.reorder_level * 2 THEN 'Normal Stock'
        ELSE 'High Stock'
    END as stock_status,
    CASE 
        WHEN p.cost > 0 AND i.quantity_available > 0
        THEN ROUND(i.quantity_available / (p.cost * 30), 1) -- Rough days of supply estimate
        ELSE 0
    END as days_of_supply,
    CASE 
        WHEN i.quantity_available <= i.reorder_level * 0.5 THEN 'Fast Moving'
        WHEN i.quantity_available <= i.reorder_level * 2 THEN 'Normal Moving'
        ELSE 'Slow Moving'
    END as turnover_category,
    CASE WHEN i.quantity_available <= i.reorder_level THEN TRUE ELSE FALSE END as needs_reorder,
    CASE WHEN i.quantity_available >= i.reorder_level * 5 THEN TRUE ELSE FALSE END as overstocked,
    i.last_updated,
    i.created_at,
    CURRENT_TIMESTAMP() as processing_timestamp
FROM apjtechup.bronze.inventory_raw i
LEFT JOIN apjtechup.bronze.products_raw p ON i.product_id = p.product_id
LEFT JOIN apjtechup.bronze.categories_raw c ON p.category_id = c.category_id
WHERE i.inventory_id IS NOT NULL
  AND i.product_id IS NOT NULL
  AND i.quantity_on_hand >= 0;


In [0]:
-- 6. Transform Reviews
SELECT 'Transforming Reviews...' as step, CURRENT_TIMESTAMP() as timestamp;

CREATE OR REPLACE TABLE reviews_clean
SELECT 
    r.review_id,
    r.product_id,
    TRIM(p.product_name) as product_name,
    TRIM(c.category_name) as category_name,
    r.customer_id,
    r.rating,
    CASE 
        WHEN r.rating >= 4 THEN 'Positive'
        WHEN r.rating >= 3 THEN 'Neutral'
        ELSE 'Negative'
    END as rating_category,
    r.review_text,
    LENGTH(COALESCE(r.review_text, '')) as review_text_length,
    -- Simple sentiment scoring based on rating
    CASE 
        WHEN r.rating = 5 THEN 1.0
        WHEN r.rating = 4 THEN 0.5
        WHEN r.rating = 3 THEN 0.0
        WHEN r.rating = 2 THEN -0.5
        WHEN r.rating = 1 THEN -1.0
        ELSE 0.0
    END as sentiment_score,
    CASE 
        WHEN r.rating >= 4 THEN 'Positive'
        WHEN r.rating >= 3 THEN 'Neutral'
        ELSE 'Negative'
    END as sentiment_category,
    r.review_date,
    YEAR(r.review_date) as review_year,
    MONTH(r.review_date) as review_month,
    r.verified_purchase,
    r.helpful_votes,
    CASE 
        WHEN r.helpful_votes >= 20 THEN 'Very Helpful'
        WHEN r.helpful_votes >= 5 THEN 'Helpful'
        WHEN r.helpful_votes >= 1 THEN 'Somewhat Helpful'
        ELSE 'Not Helpful'
    END as helpfulness_tier,
    DATEDIFF(r.review_date, '2023-01-01') as days_since_purchase, -- Placeholder
    CASE 
        WHEN DATEDIFF(CURRENT_DATE(), r.review_date) <= 30 THEN 'Recent'
        WHEN DATEDIFF(CURRENT_DATE(), r.review_date) <= 90 THEN 'Moderate'
        ELSE 'Old'
    END as review_recency,
    r.created_at,
    CURRENT_TIMESTAMP() as processing_timestamp
FROM apjtechup.bronze.reviews_raw r
LEFT JOIN apjtechup.bronze.products_raw p ON r.product_id = p.product_id
LEFT JOIN apjtechup.bronze.categories_raw c ON p.category_id = c.category_id
WHERE r.review_id IS NOT NULL
  AND r.product_id IS NOT NULL
  AND r.rating BETWEEN 1 AND 5;


In [0]:
-- 7. Transform Web Events
SELECT 'Transforming Web Events...' as step, CURRENT_TIMESTAMP() as timestamp;

CREATE OR REPLACE TABLE web_events_clean
SELECT 
    we.event_id,
    we.session_id,
    we.customer_id,
    LOWER(TRIM(we.event_type)) as event_type,
    CASE 
        WHEN LOWER(TRIM(we.event_type)) IN ('page_view', 'product_view') THEN 'Browsing'
        WHEN LOWER(TRIM(we.event_type)) IN ('add_to_cart', 'remove_from_cart') THEN 'Cart Activity'
        WHEN LOWER(TRIM(we.event_type)) IN ('checkout_start', 'purchase') THEN 'Purchase'
        WHEN LOWER(TRIM(we.event_type)) IN ('search', 'filter') THEN 'Search'
        ELSE 'Other'
    END as event_category,
    we.page_url,
    CASE 
        WHEN we.page_url LIKE '%/product%' THEN 'Product Page'
        WHEN we.page_url LIKE '%/category%' THEN 'Category Page'
        WHEN we.page_url LIKE '%/cart%' THEN 'Cart Page'
        WHEN we.page_url LIKE '%/checkout%' THEN 'Checkout Page'
        WHEN we.page_url LIKE '%/search%' THEN 'Search Page'
        WHEN we.page_url = '/home' THEN 'Home Page'
        ELSE 'Other Page'
    END as page_category,
    we.product_id,
    TRIM(c.category_name) as product_category,
    we.timestamp,
    DATE(we.timestamp) as event_date,
    HOUR(we.timestamp) as event_hour,
    CASE DAYOFWEEK(we.timestamp)
        WHEN 1 THEN 'Sunday'
        WHEN 2 THEN 'Monday'
        WHEN 3 THEN 'Tuesday'
        WHEN 4 THEN 'Wednesday'
        WHEN 5 THEN 'Thursday'
        WHEN 6 THEN 'Friday'
        WHEN 7 THEN 'Saturday'
    END as event_day_of_week,
    we.ip_address,
    we.user_agent,
    CASE 
        WHEN LOWER(we.user_agent) LIKE '%mobile%' OR LOWER(we.user_agent) LIKE '%android%' OR LOWER(we.user_agent) LIKE '%iphone%' THEN 'Mobile'
        WHEN LOWER(we.user_agent) LIKE '%tablet%' OR LOWER(we.user_agent) LIKE '%ipad%' THEN 'Tablet'
        ELSE 'Desktop'
    END as device_type,
    CASE 
        WHEN LOWER(we.user_agent) LIKE '%chrome%' THEN 'Chrome'
        WHEN LOWER(we.user_agent) LIKE '%firefox%' THEN 'Firefox'
        WHEN LOWER(we.user_agent) LIKE '%safari%' THEN 'Safari'
        WHEN LOWER(we.user_agent) LIKE '%edge%' THEN 'Edge'
        ELSE 'Other'
    END as browser,
    CASE 
        WHEN LOWER(we.user_agent) LIKE '%windows%' THEN 'Windows'
        WHEN LOWER(we.user_agent) LIKE '%mac%' THEN 'MacOS'
        WHEN LOWER(we.user_agent) LIKE '%linux%' THEN 'Linux'
        WHEN LOWER(we.user_agent) LIKE '%android%' THEN 'Android'
        WHEN LOWER(we.user_agent) LIKE '%ios%' THEN 'iOS'
        ELSE 'Other'
    END as operating_system,
    we.referrer,
    CASE 
        WHEN we.referrer IS NULL THEN 'Direct'
        WHEN LOWER(we.referrer) LIKE '%google%' THEN 'Google'
        WHEN LOWER(we.referrer) LIKE '%facebook%' THEN 'Facebook'
        WHEN LOWER(we.referrer) LIKE '%twitter%' THEN 'Twitter'
        WHEN LOWER(we.referrer) LIKE '%instagram%' THEN 'Instagram'
        ELSE 'Other Referrer'
    END as referrer_category,
    CASE 
        WHEN LOWER(we.user_agent) LIKE '%mobile%' OR LOWER(we.user_agent) LIKE '%android%' OR LOWER(we.user_agent) LIKE '%iphone%' THEN TRUE
        ELSE FALSE
    END as is_mobile,
    'Unknown' as session_duration_bucket, -- Would need session analysis for this
    we.created_at,
    CURRENT_TIMESTAMP() as processing_timestamp
FROM apjtechup.bronze.web_events_raw we
LEFT JOIN apjtechup.bronze.products_raw p ON we.product_id = p.product_id
LEFT JOIN apjtechup.bronze.categories_raw c ON p.category_id = c.category_id
WHERE we.event_id IS NOT NULL
  AND we.timestamp IS NOT NULL;


In [0]:
-- Final summary and optimization
SELECT 'Optimizing all Silver tables...' as step, CURRENT_TIMESTAMP() as timestamp;

OPTIMIZE customers_clean;
OPTIMIZE products_clean;
OPTIMIZE orders_clean;
OPTIMIZE order_items_clean;
OPTIMIZE inventory_clean;
OPTIMIZE reviews_clean;
OPTIMIZE web_events_clean;

-- Update statistics
ANALYZE TABLE customers_clean COMPUTE STATISTICS;
ANALYZE TABLE products_clean COMPUTE STATISTICS;
ANALYZE TABLE orders_clean COMPUTE STATISTICS;
ANALYZE TABLE order_items_clean COMPUTE STATISTICS;
ANALYZE TABLE inventory_clean COMPUTE STATISTICS;
ANALYZE TABLE reviews_clean COMPUTE STATISTICS;
ANALYZE TABLE web_events_clean COMPUTE STATISTICS;

In [0]:
-- Final summary
SELECT 'Bronze to Silver ELT Process Complete' as status, CURRENT_TIMESTAMP() as end_time;
SELECT * FROM silver_table_summary ORDER BY record_count DESC;
