# Bronze to Silver ELT: Customer Data Transformation

This notebook cleans and enriches customer data from the bronze layer to create the silver layer customer tables. The transformation includes:

- Data cleaning and standardization
- Field enrichment and derivation
- Data quality metrics calculation
- Table optimization


In [0]:
-- Set up the environment
USE catalog apjtechup;
use database silver;


In [0]:
-- Insert cleaned customer data into silver layer
CREATE OR REPLACE TABLE customers_clean
SELECT 
    c.customer_id,
    TRIM(UPPER(c.first_name)) as first_name,
    TRIM(UPPER(c.last_name)) as last_name,
    CONCAT(TRIM(UPPER(c.first_name)), ' ', TRIM(UPPER(c.last_name))) as full_name,
    LOWER(TRIM(c.email)) as email,
    CASE 
        WHEN c.email LIKE '%@%' THEN SPLIT(LOWER(TRIM(c.email)), '@')[1]
        ELSE NULL 
    END as email_domain,
    c.phone,
    -- Clean phone numbers (remove non-numeric characters)
    REGEXP_REPLACE(c.phone, '[^0-9]', '') as phone_cleaned,
    c.registration_date,
    c.birth_date,
    -- Calculate age
    CASE 
        WHEN c.birth_date IS NOT NULL 
        THEN YEAR(CURRENT_DATE()) - YEAR(c.birth_date) - 
             CASE WHEN MONTH(CURRENT_DATE()) < MONTH(c.birth_date) 
                  OR (MONTH(CURRENT_DATE()) = MONTH(c.birth_date) AND DAY(CURRENT_DATE()) < DAY(c.birth_date))
                  THEN 1 ELSE 0 END
        ELSE NULL
    END as age,
    -- Age grouping
    CASE 
        WHEN c.birth_date IS NULL THEN 'Unknown'
        WHEN YEAR(CURRENT_DATE()) - YEAR(c.birth_date) < 25 THEN '18-24'
        WHEN YEAR(CURRENT_DATE()) - YEAR(c.birth_date) < 35 THEN '25-34'
        WHEN YEAR(CURRENT_DATE()) - YEAR(c.birth_date) < 45 THEN '35-44'
        WHEN YEAR(CURRENT_DATE()) - YEAR(c.birth_date) < 55 THEN '45-54'
        WHEN YEAR(CURRENT_DATE()) - YEAR(c.birth_date) < 65 THEN '55-64'
        ELSE '65+'
    END as age_group,
    COALESCE(UPPER(c.gender), 'Unknown') as gender,
    TRIM(c.address_line1) as address_line1,
    TRIM(c.address_line2) as address_line2,
    TRIM(UPPER(c.city)) as city,
    TRIM(UPPER(c.state)) as state,
    TRIM(UPPER(c.country)) as country,
    c.postal_code,
    YEAR(c.registration_date) as registration_year,
    MONTH(c.registration_date) as registration_month,
    DATEDIFF(CURRENT_DATE(), c.registration_date) as days_since_registration,
    -- Customer tier based on registration tenure
    CASE 
        WHEN DATEDIFF(CURRENT_DATE(), c.registration_date) >= 730 THEN 'Gold'
        WHEN DATEDIFF(CURRENT_DATE(), c.registration_date) >= 365 THEN 'Silver'
        WHEN DATEDIFF(CURRENT_DATE(), c.registration_date) >= 90 THEN 'Bronze'
        ELSE 'New'
    END as customer_tier,
    TRIM(LOWER(c.source_system)) as source_system,
    c.created_at,
    CURRENT_TIMESTAMP() as updated_at
FROM apjtechup.bronze.customers_raw c
WHERE c.customer_id IS NOT NULL
  AND c.email IS NOT NULL
  AND c.email LIKE '%@%'  -- Basic email validation
  AND LENGTH(TRIM(c.first_name)) > 0
  AND LENGTH(TRIM(c.last_name)) > 0;


In [0]:
-- Add data quality metrics
CREATE OR REPLACE TEMPORARY VIEW customer_quality_metrics AS
SELECT 
    COUNT(*) as total_customers,
    COUNT(CASE WHEN email_domain IS NOT NULL THEN 1 END) as valid_emails,
    COUNT(CASE WHEN phone_cleaned IS NOT NULL AND LENGTH(phone_cleaned) >= 10 THEN 1 END) as valid_phones,
    COUNT(CASE WHEN age IS NOT NULL THEN 1 END) as customers_with_age,
    COUNT(CASE WHEN age_group != 'Unknown' THEN 1 END) as customers_with_age_group,
    AVG(days_since_registration) as avg_days_since_registration,
    COUNT(CASE WHEN customer_tier = 'Gold' THEN 1 END) as gold_customers,
    COUNT(CASE WHEN customer_tier = 'Silver' THEN 1 END) as silver_customers,
    COUNT(CASE WHEN customer_tier = 'Bronze' THEN 1 END) as bronze_customers,
    COUNT(CASE WHEN customer_tier = 'New' THEN 1 END) as new_customers
FROM customers_clean;


In [0]:
-- Display quality metrics
SELECT * FROM customer_quality_metrics;


In [0]:
-- Optimize table
OPTIMIZE customers_clean;

-- Update table statistics
ANALYZE TABLE customers_clean COMPUTE STATISTICS;
