Stage 3: SQL Analytics (DuckDB)
 
**Description:** In this stage, we simulate a modern **Data Lakehouse** architecture. Instead of loading data into a traditional heavy database (like PostgreSQL), we use **DuckDB** â€” an in-process OLAP SQL engine. This allows us to run complex SQL queries directly on our optimized `Parquet` files.

**Business Objectives:**
1. **Market Segmentation:** Analyze average pricing and nutrient density across the ML-generated clusters.
2. **Value Analysis:** Identify the top 3 "Best Value" products in each category using Window Functions.
3. **Inventory Risk:** Detect premium products with critical stock levels to generate alerts for the Supply Chain team.

**Tech Stack:** `DuckDB`, `SQL`, `Pandas`

In [2]:
import duckdb
import pandas as pd

# --- Configuration ---
# Connecting to the "Gold Layer" (Cleaned & Enriched Data)
INPUT_FILE = '../data/processed/supplements_enriched.parquet'

print("Connecting to Data Lakehouse via DuckDB...")

# Initialize in-memory database connection
con = duckdb.connect()

# ==============================================================================
# REPORT 1: Market Segmentation Overview
# Business Question: "How does pricing and potency differ across our customer segments?"
# ==============================================================================
print("\n--- [Report 1] Market Segmentation Overview ---")

query_overview = f"""
SELECT 
    segment_name,
    COUNT(*) as total_products,
    ROUND(AVG(price_eur), 2) as avg_price_eur,
    ROUND(AVG(vitamin_c_mg), 0) as avg_vit_c_mg,
    ROUND(AVG(magnesium_mg), 0) as avg_magnesium_mg
FROM '{INPUT_FILE}'
GROUP BY segment_name
ORDER BY avg_price_eur DESC
"""

df_overview = con.execute(query_overview).df()
display(df_overview)

# ==============================================================================
# REPORT 2: Best Value Products (Top 3 per Category)
# Business Question: "Which products offer the most active ingredients per Euro?"
# Technical Skill: Window Functions (ROW_NUMBER)
# ==============================================================================
print("\n--- [Report 2] Top 3 'Best Value' Products per Category ---")

query_ranking = f"""
WITH RankedProducts AS (
    SELECT 
        product_name,
        category,
        price_eur,
        price_per_1000mg,
        -- Partition by Category to restart ranking for each product type
        ROW_NUMBER() OVER (PARTITION BY category ORDER BY price_per_1000mg ASC) as rank_id
    FROM '{INPUT_FILE}'
)
SELECT 
    category,
    product_name,
    price_eur,
    ROUND(price_per_1000mg, 2) as eur_per_1000mg_active
FROM RankedProducts
WHERE rank_id <= 3
ORDER BY category, eur_per_1000mg_active
"""

df_ranking = con.execute(query_ranking).df()
display(df_ranking)

# ==============================================================================
# REPORT 3: Inventory Risk Alert
# Business Question: "Which Premium products are running low on stock?"
# ==============================================================================
print("\n--- [Report 3] Low Stock Alert (Premium Segment) ---")

query_stock = f"""
SELECT 
    product_name,
    stock_level,
    price_eur,
    last_updated
FROM '{INPUT_FILE}'
WHERE segment_name LIKE '%Premium%' 
  AND stock_level < 20
ORDER BY stock_level ASC
LIMIT 10
"""

df_stock = con.execute(query_stock).df()
display(df_stock)

Connecting to Data Lakehouse via DuckDB...

--- [Report 1] Market Segmentation Overview ---


Unnamed: 0,segment_name,total_products,avg_price_eur,avg_vit_c_mg,avg_magnesium_mg
0,High-Potency Premium,813,50.54,242.0,147.0
1,Balanced / Standard,553,29.84,333.0,178.0
2,Budget Essentials,631,28.37,178.0,123.0



--- [Report 2] Top 3 'Best Value' Products per Category ---


Unnamed: 0,category,product_name,price_eur,eur_per_1000mg_active
0,Magnesium,NutriLife Magnesium Ultra,16.29,26.95
1,Magnesium,NutriLife Omega-3 Plus,15.63,27.74
2,Magnesium,NutriLife Multivitamin Plus,16.39,28.29
3,Multivitamin,NutriLife Omega-3 Ultra,15.5,25.25
4,Multivitamin,NutriLife Magnesium Pro,16.16,26.83
5,Multivitamin,NutriLife Omega-3 Pro,16.33,27.57
6,Omega-3,NutriLife Multivitamin Basic,56.29,-1628.19
7,Omega-3,NutriLife Vitamin C Basic,15.18,24.92
8,Omega-3,NutriLife Omega-3 Basic,15.41,30.93
9,Vitamin C,NutriLife Zinc Plus,15.95,26.03



--- [Report 3] Low Stock Alert (Premium Segment) ---


Unnamed: 0,product_name,stock_level,price_eur,last_updated
0,NutriLife Omega-3 Pro,0,46.61,2025-12-06 21:32:33.602978
1,NutriLife Magnesium Plus,1,53.86,2025-12-17 21:32:33.602934
2,NutriLife Vitamin C Ultra,1,56.7,2025-12-08 21:32:33.603041
3,NutriLife Magnesium Pro,2,52.7,2025-12-27 21:32:33.602116
4,NutriLife Magnesium Pro,2,49.29,2025-12-21 21:32:33.602580
5,NutriLife Multivitamin Ultra,3,43.07,2025-12-26 21:32:33.601867
6,NutriLife Zinc Ultra,3,48.7,2025-12-11 21:32:33.602682
7,NutriLife Vitamin D3 Plus,3,57.41,2025-12-16 21:32:33.602726
8,NutriLife Magnesium Plus,3,47.67,2025-12-06 21:32:33.603792
9,NutriLife Omega-3 Basic,4,47.51,2025-12-08 21:32:33.603242
