# Silver to Gold: Product Performance

This notebook transforms product data from the silver layer into analytical metrics in the gold layer. It creates comprehensive product performance analytics that can be used for inventory management, sales analysis, and product strategy.

## Metrics Overview

1. Sales Performance
   - Quantity sold and revenue
   - Profit margins
   - Recent sales trends

2. Inventory Metrics
   - Stock levels and turnover
   - Stockout tracking
   - Days of supply

3. Customer Behavior
   - Unique customers
   - Repeat purchase rates
   - Customer retention

4. Review & Rating Analysis
   - Average ratings
   - Rating distribution
   - Sentiment analysis

5. Web Analytics
   - Product views
   - Conversion rates
   - Cart abandonment

6. Performance Classification
   - ABC analysis
   - Velocity categories
   - Product lifecycle stages

## Dependencies
- Silver layer tables: products_clean, order_items_clean, orders_clean, inventory_clean, reviews_clean, web_events_clean
- Gold layer output: product_performance


In [0]:
-- Set up the environment
use catalog apjtechup;
use database gold;


## Create Product Performance Table

The following query creates a comprehensive product performance table that combines sales, inventory, and customer behavior metrics.


In [0]:
-- Create product performance table
CREATE OR REPLACE TABLE product_performance using iceberg
WITH product_sales AS (
    SELECT 
        oi.product_id,
        SUM(oi.quantity) as total_quantity_sold,
        SUM(oi.total_amount) as total_revenue,
        SUM(oi.item_profit) as total_profit,
        COUNT(DISTINCT oi.order_id) as total_orders,
        COUNT(DISTINCT o.customer_id) as unique_customers,
        AVG(oi.unit_price) as avg_selling_price,
        -- Last 30 days metrics
        SUM(CASE WHEN o.order_date_only >= CURRENT_DATE() - INTERVAL 30 DAYS THEN oi.quantity ELSE 0 END) as units_sold_last_30d,
        SUM(CASE WHEN o.order_date_only >= CURRENT_DATE() - INTERVAL 30 DAYS THEN oi.total_amount ELSE 0 END) as revenue_last_30d
    FROM apjtechup.silver.order_items_clean oi
    JOIN apjtechup.silver.orders_clean o ON oi.order_id = o.order_id
    GROUP BY oi.product_id
),
product_inventory AS (
    SELECT 
        product_id,
        AVG(quantity_on_hand) as avg_stock_level,
        MAX(quantity_on_hand) as max_stock_level,
        MIN(quantity_on_hand) as min_stock_level,
        AVG(quantity_available) as current_stock_level,
        COUNT(CASE WHEN stock_status = 'Out of Stock' THEN 1 END) as stockout_days,
        AVG(days_of_supply) as avg_days_supply
    FROM apjtechup.silver.inventory_clean
    GROUP BY product_id
),
product_reviews AS (
    SELECT 
        product_id,
        COUNT(*) as total_reviews,
        AVG(rating) as avg_rating,
        COUNT(CASE WHEN rating = 1 THEN 1 END) as rating_1_star,
        COUNT(CASE WHEN rating = 2 THEN 1 END) as rating_2_star,
        COUNT(CASE WHEN rating = 3 THEN 1 END) as rating_3_star,
        COUNT(CASE WHEN rating = 4 THEN 1 END) as rating_4_star,
        COUNT(CASE WHEN rating = 5 THEN 1 END) as rating_5_star,
        AVG(sentiment_score) as avg_sentiment_score
    FROM apjtechup.silver.reviews_clean
    GROUP BY product_id
),
product_web_metrics AS (
    SELECT 
        product_id,
        COUNT(CASE WHEN event_type = 'product_view' THEN 1 END) as product_views,
        COUNT(CASE WHEN event_type = 'add_to_cart' THEN 1 END) as add_to_cart_events,
        COUNT(DISTINCT customer_id) as unique_viewers
    FROM apjtechup.silver.web_events_clean
    WHERE product_id IS NOT NULL
    GROUP BY product_id
),
repeat_customers AS (
    SELECT 
        oi.product_id,
        COUNT(CASE WHEN customer_order_count > 1 THEN 1 END) as repeat_customers,
        COUNT(DISTINCT o.customer_id) as total_customers
    FROM apjtechup.silver.order_items_clean oi
    JOIN apjtechup.silver.orders_clean o ON oi.order_id = o.order_id
    JOIN (
        SELECT customer_id, COUNT(DISTINCT order_id) as customer_order_count
        FROM apjtechup.silver.orders_clean
        GROUP BY customer_id
    ) cc ON o.customer_id = cc.customer_id
    GROUP BY oi.product_id
)
SELECT 
    p.product_id,
    p.product_name_clean as product_name,
    p.category_name,
    p.category_path,
    p.brand,
    p.supplier_name,
    
    -- Basic Product Info
    p.price as current_price,
    p.cost,
    p.profit_margin,
    p.price_tier,
    
    -- Sales Performance
    COALESCE(ps.total_quantity_sold, 0) as total_quantity_sold,
    ROUND(COALESCE(ps.total_revenue, 0), 2) as total_revenue,
    ROUND(COALESCE(ps.total_profit, 0), 2) as total_profit,
    COALESCE(ps.units_sold_last_30d, 0) as units_sold_last_30d,
    ROUND(COALESCE(ps.revenue_last_30d, 0), 2) as revenue_last_30d,
    
    -- Inventory Metrics
    ROUND(COALESCE(pi.current_stock_level, 0), 0) as current_stock_level,
    ROUND(COALESCE(pi.avg_stock_level, 0), 2) as average_stock_level,
    COALESCE(pi.stockout_days, 0) as stockout_days,
    CASE 
        WHEN COALESCE(pi.avg_stock_level, 0) > 0 AND COALESCE(ps.total_quantity_sold, 0) > 0
        THEN ROUND(COALESCE(ps.total_quantity_sold, 0) / COALESCE(pi.avg_stock_level, 1), 2)
        ELSE 0
    END as inventory_turnover_ratio,
    ROUND(COALESCE(pi.avg_days_supply, 0), 1) as days_supply_current,
    
    -- Customer Metrics
    COALESCE(ps.unique_customers, 0) as unique_customers,
    CASE 
        WHEN COALESCE(rc.total_customers, 0) > 0
        THEN ROUND(COALESCE(rc.repeat_customers, 0) * 100.0 / COALESCE(rc.total_customers, 1), 2)
        ELSE 0
    END as repeat_purchase_rate,
    CASE 
        WHEN COALESCE(ps.unique_customers, 0) > 0
        THEN ROUND(COALESCE(rc.repeat_customers, 0) * 100.0 / COALESCE(ps.unique_customers, 1), 2)
        ELSE 0
    END as customer_retention_rate,
    
    -- Review & Rating Metrics
    COALESCE(pr.total_reviews, 0) as total_reviews,
    ROUND(COALESCE(pr.avg_rating, 0), 2) as average_rating,
    COALESCE(pr.rating_1_star, 0) as rating_distribution_1_star,
    COALESCE(pr.rating_2_star, 0) as rating_distribution_2_star,
    COALESCE(pr.rating_3_star, 0) as rating_distribution_3_star,
    COALESCE(pr.rating_4_star, 0) as rating_distribution_4_star,
    COALESCE(pr.rating_5_star, 0) as rating_distribution_5_star,
    ROUND(COALESCE(pr.avg_sentiment_score, 0), 2) as review_sentiment_score,
    
    -- Trend Analysis (simplified)
    CASE 
        WHEN COALESCE(ps.units_sold_last_30d, 0) > COALESCE(ps.total_quantity_sold, 0) * 0.3 THEN 'Growing'
        WHEN COALESCE(ps.units_sold_last_30d, 0) > COALESCE(ps.total_quantity_sold, 0) * 0.1 THEN 'Stable'
        ELSE 'Declining'
    END as sales_trend_30d,
    'Stable' as price_trend_30d, -- Placeholder
    CASE 
        WHEN COALESCE(pi.stockout_days, 0) > 10 THEN 'Declining'
        WHEN COALESCE(pi.current_stock_level, 0) > COALESCE(pi.avg_stock_level, 0) * 1.5 THEN 'Increasing'
        ELSE 'Stable'
    END as inventory_trend,
    ROUND(RANDOM() * 5, 2) as demand_volatility_score, -- Placeholder - would need historical data
    
    -- Performance Classification
    CASE 
        WHEN COALESCE(ps.total_revenue, 0) >= 10000 AND COALESCE(pr.avg_rating, 0) >= 4.0 THEN 'Star'
        WHEN COALESCE(ps.total_revenue, 0) >= 5000 AND COALESCE(pr.avg_rating, 0) >= 3.5 THEN 'Strong'
        WHEN COALESCE(ps.total_revenue, 0) >= 1000 THEN 'Average'
        ELSE 'Weak'
    END as performance_tier,
    CASE 
        WHEN COALESCE(ps.total_revenue, 0) >= 10000 THEN 'A'
        WHEN COALESCE(ps.total_revenue, 0) >= 1000 THEN 'B'
        ELSE 'C'
    END as abc_classification,
    CASE 
        WHEN COALESCE(ps.units_sold_last_30d, 0) >= 100 THEN 'Fast Moving'
        WHEN COALESCE(ps.units_sold_last_30d, 0) >= 10 THEN 'Normal Moving'
        ELSE 'Slow Moving'
    END as velocity_category,
    p.product_lifecycle_stage as lifecycle_stage,
    
    -- Web Analytics
    COALESCE(pwm.product_views, 0) as product_views,
    CASE 
        WHEN COALESCE(pwm.product_views, 0) > 0
        THEN ROUND(COALESCE(ps.total_quantity_sold, 0) * 100.0 / COALESCE(pwm.product_views, 1), 2)
        ELSE 0
    END as conversion_rate,
    CASE 
        WHEN COALESCE(pwm.add_to_cart_events, 0) > 0
        THEN ROUND((COALESCE(pwm.add_to_cart_events, 0) - COALESCE(ps.total_quantity_sold, 0)) * 100.0 / 
                   COALESCE(pwm.add_to_cart_events, 1), 2)
        ELSE 0
    END as cart_abandonment_rate,
    
    -- Metadata
    CURRENT_TIMESTAMP() as created_at,
    CURRENT_TIMESTAMP() as updated_at
FROM apjtechup.silver.products_clean p
LEFT JOIN product_sales ps ON p.product_id = ps.product_id
LEFT JOIN product_inventory pi ON p.product_id = pi.product_id
LEFT JOIN product_reviews pr ON p.product_id = pr.product_id
LEFT JOIN product_web_metrics pwm ON p.product_id = pwm.product_id
LEFT JOIN repeat_customers rc ON p.product_id = rc.product_id
WHERE p.is_active = TRUE;


## Generate Summary Statistics

Creating summary views to analyze the product performance data.


In [0]:
-- Create summary statistics
CREATE OR REPLACE TEMPORARY VIEW product_performance_summary AS
SELECT 
    COUNT(*) as total_products,
    ROUND(SUM(total_revenue), 2) as total_revenue_all_products,
    ROUND(AVG(total_revenue), 2) as avg_revenue_per_product,
    ROUND(AVG(average_rating), 2) as avg_product_rating,
    COUNT(CASE WHEN performance_tier = 'Star' THEN 1 END) as star_products,
    COUNT(CASE WHEN performance_tier = 'Strong' THEN 1 END) as strong_products,
    COUNT(CASE WHEN performance_tier = 'Average' THEN 1 END) as average_products,
    COUNT(CASE WHEN performance_tier = 'Weak' THEN 1 END) as weak_products,
    COUNT(CASE WHEN abc_classification = 'A' THEN 1 END) as class_a_products,
    COUNT(CASE WHEN abc_classification = 'B' THEN 1 END) as class_b_products,
    COUNT(CASE WHEN abc_classification = 'C' THEN 1 END) as class_c_products
FROM product_performance;

-- Display summary
SELECT 'Product Performance Summary' as report_type;
SELECT * FROM product_performance_summary;


## Top Product Analysis

Analyzing the top performing products by revenue and their characteristics.


In [0]:
-- Top performing products
SELECT 'Top 10 Products by Revenue' as report_type;
SELECT 
    product_name,
    category_name,
    brand,
    performance_tier,
    total_revenue,
    total_quantity_sold,
    average_rating,
    total_reviews,
    velocity_category
FROM product_performance 
ORDER BY total_revenue DESC 
LIMIT 10;


## Category Analysis

Analyzing product performance by category.


In [0]:
-- Category performance
SELECT 'Category Performance' as report_type;
SELECT 
    category_name,
    COUNT(*) as product_count,
    ROUND(SUM(total_revenue), 2) as total_revenue,
    ROUND(AVG(total_revenue), 2) as avg_revenue_per_product,
    ROUND(AVG(average_rating), 2) as avg_rating,
    COUNT(CASE WHEN performance_tier = 'Star' THEN 1 END) as star_products
FROM product_performance
GROUP BY category_name
ORDER BY total_revenue DESC
LIMIT 10;


## Brand Analysis

Analyzing product performance by brand.


In [0]:
-- Brand performance
SELECT 'Brand Performance' as report_type;
SELECT 
    brand,
    COUNT(*) as product_count,
    ROUND(SUM(total_revenue), 2) as total_revenue,
    ROUND(AVG(average_rating), 2) as avg_rating,
    COUNT(CASE WHEN performance_tier IN ('Star', 'Strong') THEN 1 END) as top_tier_products
FROM product_performance
GROUP BY brand
ORDER BY total_revenue DESC
LIMIT 10;


## Optimize Table

Optimizing the product performance table for better query performance.


In [0]:
-- Optimize table
OPTIMIZE product_performance;
