## 2: SILVER LAYER - FEATURE ENGINEERING

In [0]:
print("🎯  2: FEATURE ENGINEERING & SILVER LAYER")
print("="*70)


🎯  2: FEATURE ENGINEERING & SILVER LAYER


###  SET CATALOG FIRST!

In [0]:
#Set the catalog context
spark.sql("USE CATALOG brazil_project")

# Verify
current_catalog = spark.sql("SELECT current_catalog()").collect()[0][0]
current_schema = spark.sql("SELECT current_database()").collect()[0][0]

print(f"✅ Catalog: {current_catalog}")
print(f"✅ Schema: {current_schema}")

# Check if bronze tables exist
print("\n🔍 Checking bronze tables...")
try:
    bronze_tables = spark.sql("SHOW TABLES IN bronze").collect()
    print(f"✅ Found {len(bronze_tables)} tables in bronze:")
    for table in bronze_tables:
        print(f"   - {table.tableName}")
except Exception as e:
    print(f"❌ Error: {e}")
    print("\n⚠️ Make sure you ran part 1 notebook first!")

✅ Catalog: brazil_project
✅ Schema: default

🔍 Checking bronze tables...
✅ Found 8 tables in bronze:
   - customers
   - geolocation
   - order_items
   - order_payments
   - order_reviews
   - orders
   - products
   - sellers


In [0]:
print("\n0️⃣ SETTING CATALOG CONTEXT")

# Set to the correct catalog
spark.sql("USE CATALOG brazil_project")
print("✅ Using catalog: brazil_project")

# Check bronze layer exists
bronze_tables = spark.sql("SHOW TABLES IN bronze").collect()
if not bronze_tables:
    print("❌ ERROR: No tables found in bronze layer!")
    print("   Please run Part 1 notebook first.")
else:
    print(f"✅ Found {len(bronze_tables)} tables in bronze layer")
    
    # Create silver schema
    spark.sql("CREATE SCHEMA IF NOT EXISTS silver")
    print("✅ Silver schema ready")


0️⃣ SETTING CATALOG CONTEXT
✅ Using catalog: brazil_project
✅ Found 8 tables in bronze layer
✅ Silver schema ready


##1: CREATE CUSTOMER 360 VIEW

In [0]:
print("\n1️⃣ BUILDING CUSTOMER 360 VIEW")

customer_360_query = """
WITH customer_orders AS (
    SELECT 
        o.customer_id,
        COUNT(DISTINCT o.order_id) as total_orders,
        SUM(oi.price) as total_spent,
        AVG(oi.price) as avg_order_value,
        MIN(o.order_purchase_timestamp) as first_order_date,
        MAX(o.order_purchase_timestamp) as last_order_date
    FROM bronze.orders o
    JOIN bronze.order_items oi ON o.order_id = oi.order_id
    GROUP BY o.customer_id
)

SELECT 
    c.customer_id,
    c.customer_city,
    c.customer_state,
    COALESCE(co.total_orders, 0) as total_orders,
    COALESCE(co.total_spent, 0) as total_spent,
    COALESCE(co.avg_order_value, 0) as avg_order_value,
    co.first_order_date,
    co.last_order_date,
    
    -- RFM Scores
    CASE 
        WHEN DATEDIFF(CURRENT_DATE(), co.last_order_date) <= 30 THEN 5
        WHEN DATEDIFF(CURRENT_DATE(), co.last_order_date) <= 60 THEN 4
        WHEN DATEDIFF(CURRENT_DATE(), co.last_order_date) <= 90 THEN 3
        WHEN DATEDIFF(CURRENT_DATE(), co.last_order_date) <= 180 THEN 2
        ELSE 1
    END as recency_score,
    
    CASE 
        WHEN co.total_orders >= 10 THEN 5
        WHEN co.total_orders >= 5 THEN 4
        WHEN co.total_orders >= 3 THEN 3
        WHEN co.total_orders >= 2 THEN 2
        ELSE 1
    END as frequency_score,
    
    CASE 
        WHEN co.total_spent >= 1000 THEN 5
        WHEN co.total_spent >= 500 THEN 4
        WHEN co.total_spent >= 200 THEN 3
        WHEN co.total_spent >= 100 THEN 2
        ELSE 1
    END as monetary_score,
    
    -- Churn flag
    CASE 
        WHEN DATEDIFF(CURRENT_DATE(), co.last_order_date) > 90 THEN 1
        ELSE 0
    END as churned_90d
    
FROM bronze.customers c
LEFT JOIN customer_orders co ON c.customer_id = co.customer_id
"""

# Execute and save
customer_360_df = spark.sql(customer_360_query)
customer_360_df.write.mode("overwrite").saveAsTable("brazil_project.silver.customer_360_simple")

print(f"✅ Customer 360 view created: {customer_360_df.count():,} customers")

# Show sample
print("\n📊 Sample from silver.customer_360_simple:")
spark.sql("SELECT * FROM silver.customer_360_simple LIMIT 5").show()




1️⃣ BUILDING CUSTOMER 360 VIEW
✅ Customer 360 view created: 99,441 customers

📊 Sample from silver.customer_360_simple:
+--------------------+--------------------+--------------+------------+-----------+---------------+-------------------+-------------------+-------------+---------------+--------------+-----------+
|         customer_id|       customer_city|customer_state|total_orders|total_spent|avg_order_value|   first_order_date|    last_order_date|recency_score|frequency_score|monetary_score|churned_90d|
+--------------------+--------------------+--------------+------------+-----------+---------------+-------------------+-------------------+-------------+---------------+--------------+-----------+
|06b8999e2fba1a1fb...|              franca|            SP|           1|     124.99|         124.99|2017-05-16 15:05:35|2017-05-16 15:05:35|            1|              1|             2|          1|
|18955e83d337fd6b2...|sao bernardo do c...|            SP|           1|      289.0|        