In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import *
from pyspark.sql.types import *
from delta.tables import *

# Configuration
catalog_name = "main"
database_name = "retail_lakehouse"

# Paths
silver_path = f"/Volumes/{catalog_name}/{database_name}/silver"
gold_path = f"/Volumes/{catalog_name}/{database_name}/gold"

# Create gold directory
try:
    dbutils.fs.mkdirs(gold_path)
except:
    print("Note: dbutils not available - running in local environment")

# Use the database
try:
    spark.sql(f"USE {database_name}")
except:
    print(f"Note: Database {database_name} may not exist yet")

print(f"Silver Path: {silver_path}")
print(f"Gold Path: {gold_path}")
print(f"Using database: {database_name}")

# COMMAND ----------

def create_daily_sales_summary():
    """Create daily sales summary for executive reporting"""
    
    print("Creating Daily Sales Summary...")
    
    daily_sales = spark.sql(f"""
    SELECT 
        transaction_date,
        year,
        month,
        month_name,
        quarter,
        day_name,
        is_weekend,
        
        -- Transaction Metrics
        COUNT(*) as total_transactions,
        COUNT(DISTINCT customer_id) as unique_customers,
        COUNT(DISTINCT product_id) as unique_products,
        COUNT(DISTINCT store_id) as active_stores,
        
        -- Revenue Metrics
        ROUND(SUM(final_amount), 2) as total_revenue,
        ROUND(AVG(final_amount), 2) as avg_transaction_value,
        ROUND(MIN(final_amount), 2) as min_transaction_value,
        ROUND(MAX(final_amount), 2) as max_transaction_value,
        
        -- Quantity Metrics
        SUM(quantity) as total_items_sold,
        ROUND(AVG(quantity), 2) as avg_items_per_transaction,
        
        -- Discount Metrics
        ROUND(SUM(discount_amount), 2) as total_discounts,
        ROUND(AVG(discount_percent), 2) as avg_discount_percent,
        COUNT(CASE WHEN discount_percent > 0 THEN 1 END) as discounted_transactions,
        
        -- Estimated Profit
        ROUND(SUM(estimated_profit), 2) as estimated_total_profit,
        ROUND(AVG(estimated_profit), 2) as avg_profit_per_transaction,
        
        -- Customer Segments
        COUNT(CASE WHEN customer_segment = 'Premium' THEN 1 END) as premium_transactions,
        COUNT(CASE WHEN customer_segment = 'Standard' THEN 1 END) as standard_transactions,
        COUNT(CASE WHEN customer_segment = 'Budget' THEN 1 END) as budget_transactions,
        COUNT(CASE WHEN customer_segment = 'VIP' THEN 1 END) as vip_transactions,
        
        -- Transaction Size Distribution
        COUNT(CASE WHEN transaction_size = 'Small' THEN 1 END) as small_transactions,
        COUNT(CASE WHEN transaction_size = 'Medium' THEN 1 END) as medium_transactions,
        COUNT(CASE WHEN transaction_size = 'Large' THEN 1 END) as large_transactions,
        COUNT(CASE WHEN transaction_size = 'Very Large' THEN 1 END) as very_large_transactions,
        
        -- Time of Day Distribution
        COUNT(CASE WHEN time_of_day = 'Morning' THEN 1 END) as morning_transactions,
        COUNT(CASE WHEN time_of_day = 'Afternoon' THEN 1 END) as afternoon_transactions,
        COUNT(CASE WHEN time_of_day = 'Evening' THEN 1 END) as evening_transactions,
        COUNT(CASE WHEN time_of_day = 'Night' THEN 1 END) as night_transactions,
        
        -- Processing timestamp
        CURRENT_TIMESTAMP() as gold_processing_time
        
    FROM delta.`{silver_path}/sales_silver`
    WHERE data_quality_flag = 'Good'
    GROUP BY transaction_date, year, month, month_name, quarter, day_name, is_weekend
    ORDER BY transaction_date DESC
    """)
    
    return daily_sales

# COMMAND ----------

def create_monthly_sales_summary():
    """Create monthly sales summary for strategic planning"""
    
    print("Creating Monthly Sales Summary...")
    
    monthly_sales = spark.sql(f"""
    SELECT 
        year,
        month,
        month_name,
        quarter,
        CONCAT(year, '-', LPAD(CAST(month AS STRING), 2, '0')) as year_month,
        
        -- Transaction Metrics
        COUNT(*) as total_transactions,
        COUNT(DISTINCT customer_id) as unique_customers,
        COUNT(DISTINCT product_id) as unique_products,
        COUNT(DISTINCT transaction_date) as active_days,
        ROUND(COUNT(*) * 1.0 / COUNT(DISTINCT transaction_date), 2) as avg_transactions_per_day,
        
        -- Revenue Metrics
        ROUND(SUM(final_amount), 2) as total_revenue,
        ROUND(AVG(final_amount), 2) as avg_transaction_value,
        ROUND(SUM(final_amount) / COUNT(DISTINCT transaction_date), 2) as avg_daily_revenue,
        
        -- Growth Metrics (will be calculated via window functions)
        LAG(SUM(final_amount)) OVER (ORDER BY year, month) as prev_month_revenue,
        ROUND((SUM(final_amount) - LAG(SUM(final_amount)) OVER (ORDER BY year, month)) / 
              NULLIF(LAG(SUM(final_amount)) OVER (ORDER BY year, month), 0) * 100, 2) as revenue_growth_percent,
        
        -- Customer Metrics
        ROUND(SUM(final_amount) / NULLIF(COUNT(DISTINCT customer_id), 0), 2) as revenue_per_customer,
        COUNT(*) * 1.0 / NULLIF(COUNT(DISTINCT customer_id), 0) as avg_transactions_per_customer,
        
        -- Product Performance
        SUM(quantity) as total_items_sold,
        ROUND(SUM(final_amount) / NULLIF(SUM(quantity), 0), 2) as avg_price_per_item,
        
        -- Profitability
        ROUND(SUM(estimated_profit), 2) as estimated_total_profit,
        ROUND(SUM(estimated_profit) / NULLIF(SUM(final_amount), 0) * 100, 2) as profit_margin_percent,
        
        -- Discount Impact
        ROUND(SUM(discount_amount), 2) as total_discounts_given,
        ROUND(SUM(discount_amount) / NULLIF(SUM(final_amount), 0) * 100, 2) as discount_percentage_of_revenue,
        
        CURRENT_TIMESTAMP() as gold_processing_time
        
    FROM delta.`{silver_path}/sales_silver`
    WHERE data_quality_flag = 'Good'
    GROUP BY year, month, month_name, quarter
    ORDER BY year DESC, month DESC
    """)
    
    return monthly_sales

# COMMAND ----------

def create_product_performance():
    """Create product performance summary"""
    
    print("Creating Product Performance Analysis...")
    
    product_performance = spark.sql(f"""
    SELECT 
        p.product_id,
        p.product_name,
        p.category,
        p.sub_category,
        p.brand,
        p.price_category,
        p.selling_price,
        p.cost_price,
        p.margin_percentage,
        
        -- Sales Performance
        COUNT(s.transaction_id) as total_transactions,
        COALESCE(SUM(s.quantity), 0) as total_quantity_sold,
        COALESCE(ROUND(SUM(s.final_amount), 2), 0.0) as total_revenue,
        COALESCE(ROUND(AVG(s.final_amount), 2), 0.0) as avg_transaction_value,
        
        -- Time-based metrics
        MIN(s.transaction_date) as first_sale_date,
        MAX(s.transaction_date) as last_sale_date,
        COALESCE(DATEDIFF(MAX(s.transaction_date), MIN(s.transaction_date)), 0) as days_in_market,
        
        -- Customer reach
        COUNT(DISTINCT s.customer_id) as unique_customers,
        COUNT(DISTINCT s.store_id) as stores_sold_in,
        
        -- Performance ratios
        COALESCE(ROUND(SUM(s.final_amount) / NULLIF(COUNT(s.transaction_id), 0), 2), 0.0) as revenue_per_transaction,
        COALESCE(ROUND(SUM(s.quantity) / NULLIF(COUNT(s.transaction_id), 0), 2), 0.0) as avg_quantity_per_transaction,
        COALESCE(ROUND(SUM(s.final_amount) / NULLIF(SUM(s.quantity), 0), 2), 0.0) as avg_selling_price,
        
        -- Profitability
        COALESCE(ROUND(SUM(s.estimated_profit), 2), 0.0) as total_estimated_profit,
        COALESCE(ROUND(SUM(s.estimated_profit) / NULLIF(SUM(s.final_amount), 0) * 100, 2), 0.0) as profit_margin_realized,
        
        -- Ranking metrics (will be added via window functions)
        ROW_NUMBER() OVER (ORDER BY SUM(s.final_amount) DESC NULLS LAST) as revenue_rank,
        ROW_NUMBER() OVER (ORDER BY SUM(s.quantity) DESC NULLS LAST) as quantity_rank,
        ROW_NUMBER() OVER (ORDER BY COUNT(s.transaction_id) DESC NULLS LAST) as transaction_rank,
        
        CURRENT_TIMESTAMP() as gold_processing_time
        
    FROM delta.`{silver_path}/product_silver` p
    LEFT JOIN delta.`{silver_path}/sales_silver` s ON p.product_id = s.product_id
    WHERE s.data_quality_flag = 'Good' OR s.data_quality_flag IS NULL
    GROUP BY p.product_id, p.product_name, p.category, p.sub_category, p.brand, 
             p.price_category, p.selling_price, p.cost_price, p.margin_percentage
    ORDER BY total_revenue DESC NULLS LAST
    """)
    
    return product_performance

# COMMAND ----------

# MAGIC %md
# MAGIC ## Customer Analytics

# COMMAND ----------

def create_customer_analytics():
    """Create customer behavior analytics"""
    
    print("Creating Customer Analytics...")
    
    customer_analytics = spark.sql(f"""
    SELECT 
        c.customer_id,
        c.first_name,
        c.last_name,
        c.email,
        c.age,
        c.age_group,
        c.customer_segment,
        c.tenure_category,
        c.customer_tenure_years,
        c.registration_date,
        
        -- Purchase Behavior
        COUNT(s.transaction_id) as total_transactions,
        COALESCE(SUM(s.quantity), 0) as total_items_purchased,
        COALESCE(ROUND(SUM(s.final_amount), 2), 0.0) as total_lifetime_value,
        COALESCE(ROUND(AVG(s.final_amount), 2), 0.0) as avg_transaction_value,
        COALESCE(ROUND(SUM(s.final_amount) / NULLIF(COUNT(s.transaction_id), 0), 2), 0.0) as avg_order_value,
        
        -- Frequency Analysis
        MIN(s.transaction_date) as first_purchase_date,
        MAX(s.transaction_date) as last_purchase_date,
        COALESCE(DATEDIFF(MAX(s.transaction_date), MIN(s.transaction_date)), 0) as purchase_span_days,
        COALESCE(ROUND(COUNT(s.transaction_id) * 30.0 / NULLIF(DATEDIFF(MAX(s.transaction_date), MIN(s.transaction_date)) + 1, 0), 2), 0.0) as avg_purchases_per_month,
        
        -- Recency, Frequency, Monetary (RFM) Components
        COALESCE(DATEDIFF(CURRENT_DATE(), MAX(s.transaction_date)), 9999) as days_since_last_purchase,
        COUNT(s.transaction_id) as purchase_frequency,
        COALESCE(ROUND(SUM(s.final_amount), 2), 0.0) as monetary_value,
        
        -- Product Preferences
        COUNT(DISTINCT s.product_id) as unique_products_purchased,
        COUNT(DISTINCT s.category) as unique_categories_purchased,
        
        -- Store Preferences
        COUNT(DISTINCT s.store_id) as unique_stores_visited,
        
        -- Discount Usage
        COUNT(CASE WHEN s.discount_percent > 0 THEN 1 END) as discounted_purchases,
        COALESCE(ROUND(COUNT(CASE WHEN s.discount_percent > 0 THEN 1 END) * 100.0 / NULLIF(COUNT(s.transaction_id), 0), 2), 0.0) as discount_usage_rate,
        COALESCE(ROUND(AVG(s.discount_percent), 2), 0.0) as avg_discount_percent,
        
        -- Seasonality (quarters with purchases)
        COUNT(DISTINCT s.quarter) as quarters_active,
        
        -- Customer Value Segment
        CASE 
            WHEN SUM(s.final_amount) >= 1000 AND COUNT(s.transaction_id) >= 10 THEN 'High Value High Frequency'
            WHEN SUM(s.final_amount) >= 1000 AND COUNT(s.transaction_id) < 10 THEN 'High Value Low Frequency'
            WHEN SUM(s.final_amount) < 1000 AND COUNT(s.transaction_id) >= 10 THEN 'Low Value High Frequency'
            WHEN SUM(s.final_amount) < 1000 AND COUNT(s.transaction_id) < 10 THEN 'Low Value Low Frequency'
            ELSE 'Other'
        END as customer_value_segment,
        
        CURRENT_TIMESTAMP() as gold_processing_time
        
    FROM delta.`{silver_path}/customer_silver` c
    LEFT JOIN delta.`{silver_path}/sales_silver` s ON c.customer_id = s.customer_id
    WHERE c.data_quality_flag IN ('Excellent', 'Good') OR c.data_quality_flag IS NULL
    GROUP BY c.customer_id, c.first_name, c.last_name, c.email, c.age, c.age_group,
             c.customer_segment, c.tenure_category, c.customer_tenure_years, c.registration_date
    ORDER BY total_lifetime_value DESC NULLS LAST
    """)
    
    return customer_analytics

# COMMAND ----------

def create_store_performance():
    """Create store performance summary"""
    
    print("Creating Store Performance Analysis...")
    
    store_performance = spark.sql(f"""
    SELECT 
        store_id,
        store_name,
        region,
        
        -- Transaction Metrics
        COUNT(*) as total_transactions,
        COUNT(DISTINCT customer_id) as unique_customers,
        COUNT(DISTINCT product_id) as unique_products,
        
        -- Revenue Metrics
        ROUND(SUM(final_amount), 2) as total_revenue,
        ROUND(AVG(final_amount), 2) as avg_transaction_value,
        ROUND(MIN(final_amount), 2) as min_transaction_value,
        ROUND(MAX(final_amount), 2) as max_transaction_value,
        
        -- Quantity Metrics
        SUM(quantity) as total_items_sold,
        ROUND(AVG(quantity), 2) as avg_items_per_transaction,
        
        -- Time-based metrics
        MIN(transaction_date) as first_transaction_date,
        MAX(transaction_date) as last_transaction_date,
        COUNT(DISTINCT transaction_date) as active_days,
        
        -- Customer Segments
        COUNT(CASE WHEN customer_segment = 'Premium' THEN 1 END) as premium_transactions,
        COUNT(CASE WHEN customer_segment = 'Standard' THEN 1 END) as standard_transactions,
        COUNT(CASE WHEN customer_segment = 'Budget' THEN 1 END) as budget_transactions,
        COUNT(CASE WHEN customer_segment = 'VIP' THEN 1 END) as vip_transactions,
        
        -- Performance ratios
        ROUND(COUNT(*) * 1.0 / COUNT(DISTINCT transaction_date), 2) as avg_transactions_per_day,
        ROUND(SUM(final_amount) / COUNT(DISTINCT transaction_date), 2) as avg_daily_revenue,
        ROUND(SUM(final_amount) / COUNT(DISTINCT customer_id), 2) as revenue_per_customer,
        
        -- Ranking metrics
        ROW_NUMBER() OVER (ORDER BY SUM(final_amount) DESC) as revenue_rank_by_region,
        ROW_NUMBER() OVER (PARTITION BY region ORDER BY SUM(final_amount) DESC) as revenue_rank_within_region,
        
        CURRENT_TIMESTAMP() as gold_processing_time
        
    FROM delta.`{silver_path}/sales_silver`
    WHERE data_quality_flag = 'Good'
    GROUP BY store_id, store_name, region
    ORDER BY total_revenue DESC
    """)
    
    return store_performance

# COMMAND ----------

# Process all aggregations
print("Starting Gold Layer Aggregation Processing...")

# Create daily sales summary
daily_sales_df = create_daily_sales_summary()

# Create monthly sales summary
monthly_sales_df = create_monthly_sales_summary()

# Create product performance analysis
product_performance_df = create_product_performance()

# Create customer analytics
customer_analytics_df = create_customer_analytics()

# Create store performance analysis
store_performance_df = create_store_performance()

print Gold layer aggregations completed!")

# COMMAND ----------

# MAGIC %md
# MAGIC ## Write Gold Layer Tables

# COMMAND ----------

# Write daily sales summary
print("Writing Daily Sales Summary...")
try:
    if daily_sales_df.count() > 0:
        (daily_sales_df
         .write
         .mode("overwrite")
         .option("overwriteSchema", "true")
         .format("delta")
         .save(f"{gold_path}/daily_sales_summary")
        )
        print(" Daily Sales Summary saved successfully!")
    else:
        print("No daily sales data to write")
except Exception as e:
    print(f"Error writing daily sales summary: {e}")

# Write monthly sales summary
print("Writing Monthly Sales Summary...")
try:
    if monthly_sales_df.count() > 0:
        (monthly_sales_df
         .write
         .mode("overwrite")
         .option("overwriteSchema", "true")
         .format("delta")
         .save(f"{gold_path}/monthly_sales_summary")
        )
        print("Monthly Sales Summary saved successfully!")
    else:
        print(" No monthly sales data to write")
except Exception as e:
    print(f"Error writing monthly sales summary: {e}")

# Write product performance
print("Writing Product Performance...")
try:
    if product_performance_df.count() > 0:
        (product_performance_df
         .write
         .mode("overwrite")
         .option("overwriteSchema", "true")
         .format("delta")
         .save(f"{gold_path}/product_performance")
        )
        print(" Product Performance saved successfully!")
    else:
        print(" No product performance data to write")
except Exception as e:
    print(f"Error writing product performance: {e}")

# Write customer analytics
print("Writing Customer Analytics...")
try:
    if customer_analytics_df.count() > 0:
        (customer_analytics_df
         .write
         .mode("overwrite")
         .option("overwriteSchema", "true")
         .format("delta")
         .save(f"{gold_path}/customer_analytics")
        )
        print(" Customer Analytics saved successfully!")
    else:
        print(" No customer analytics data to write")
except Exception as e:
    print(f"Error writing customer analytics: {e}")

# Write store performance
print("Writing Store Performance...")
try:
    if store_performance_df.count() > 0:
        (store_performance_df
         .write
         .mode("overwrite")
         .option("overwriteSchema", "true")
         .format("delta")
         .save(f"{gold_path}/store_performance")
        )
        print(" Store Performance saved successfully!")
    else:
        print(" No store performance data to write")
except Exception as e:
    print(f"Error writing store performance: {e}")

print(" Gold layer data saving process completed!")

# COMMAND ----------

# Create Delta tables for gold layer
print("Creating Gold Layer Delta Tables...")

try:
    # Daily Sales Summary Table
    spark.sql(f"""
    CREATE TABLE IF NOT EXISTS {database_name}.daily_sales_summary
    USING DELTA
    LOCATION '{gold_path}/daily_sales_summary'
    """)
    print(" Daily Sales Summary table created!")
except Exception as e:
    print(f"Error creating daily sales summary table: {e}")

try:
    # Monthly Sales Summary Table
    spark.sql(f"""
    CREATE TABLE IF NOT EXISTS {database_name}.monthly_sales_summary
    USING DELTA
    LOCATION '{gold_path}/monthly_sales_summary'
    """)
    print(" Monthly Sales Summary table created!")
except Exception as e:
    print(f"Error creating monthly sales summary table: {e}")

try:
    # Product Performance Table
    spark.sql(f"""
    CREATE TABLE IF NOT EXISTS {database_name}.product_performance
    USING DELTA
    LOCATION '{gold_path}/product_performance'
    """)
    print(" Product Performance table created!")
except Exception as e:
    print(f"Error creating product performance table: {e}")

try:
    # Customer Analytics Table
    spark.sql(f"""
    CREATE TABLE IF NOT EXISTS {database_name}.customer_analytics
    USING DELTA
    LOCATION '{gold_path}/customer_analytics'
    """)
    print(" Customer Analytics table created!")
except Exception as e:
    print(f"Error creating customer analytics table: {e}")

try:
    # Store Performance Table
    spark.sql(f"""
    CREATE TABLE IF NOT EXISTS {database_name}.store_performance
    USING DELTA
    LOCATION '{gold_path}/store_performance'
    """)
    print(" Store Performance table created!")
except Exception as e:
    print(f"Error creating store performance table: {e}")

print(" Gold layer tables creation process completed!")

# COMMAND ----------

# Validate gold layer data
print("=== GOLD LAYER DATA VALIDATION ===\n")

try:
    # Daily Sales Summary validation
    print("📊 Daily Sales Summary Validation:")
    daily_count = spark.sql(f"SELECT COUNT(*) as count FROM delta.`{gold_path}/daily_sales_summary`").collect()[0]['count']
    print(f"   Records: {daily_count:,}")
    
    if daily_count > 0:
        daily_sample = spark.sql(f"SELECT transaction_date, total_revenue, total_transactions FROM delta.`{gold_path}/daily_sales_summary` ORDER BY transaction_date DESC LIMIT 3")
        daily_sample.show()
except Exception as e:
    print(f"Error validating daily sales summary: {e}")

try:
    # Monthly Sales Summary validation
    print("📈 Monthly Sales Summary Validation:")
    monthly_count = spark.sql(f"SELECT COUNT(*) as count FROM delta.`{gold_path}/monthly_sales_summary`").collect()[0]['count']
    print(f"   Records: {monthly_count:,}")
    
    if monthly_count > 0:
        monthly_sample = spark.sql(f"SELECT year_month, total_revenue, revenue_growth_percent FROM delta.`{gold_path}/monthly_sales_summary` ORDER BY year DESC, month DESC LIMIT 3")
        monthly_sample.show()
except Exception as e:
    print(f"Error validating monthly sales summary: {e}")

try:
    # Top performing products
    print("🛍️ Top Performing Products:")
    top_products = spark.sql(f"""
    SELECT product_name, category, total_revenue, total_quantity_sold 
    FROM delta.`{gold_path}/product_performance` 
    ORDER BY total_revenue DESC 
    LIMIT 5
    """)
    top_products.show()
except Exception as e:
    print(f"Error showing top products: {e}")

try:
    # Top customers by lifetime value
    print("👥 Top Customers by Lifetime Value:")
    top_customers = spark.sql(f"""
    SELECT first_name, last_name, total_lifetime_value, total_transactions
    FROM delta.`{gold_path}/customer_analytics`
    ORDER BY total_lifetime_value DESC
    LIMIT 5
    """)
    top_customers.show()
except Exception as e:
    print(f"Error showing top customers: {e}")

# COMMAND ----------

# MAGIC %md
# MAGIC ## Business Insights Summary

# COMMAND ----------

print("=== BUSINESS INSIGHTS SUMMARY ===\n")

try:
    # Overall performance metrics
    print(" Overall Performance Metrics:")
    overall_metrics = spark.sql(f"""
    SELECT 
        COUNT(*) as total_days,
        ROUND(SUM(total_revenue), 2) as total_revenue,
        ROUND(AVG(total_revenue), 2) as avg_daily_revenue,
        SUM(total_transactions) as total_transactions,
        ROUND(AVG(avg_transaction_value), 2) as avg_transaction_value
    FROM delta.`{gold_path}/daily_sales_summary`
    """)
    overall_metrics.show()
except Exception as e:
    print(f"Error calculating overall metrics: {e}")

try:
    # Revenue by category
    print("Revenue by Category:")
    category_revenue = spark.sql(f"""
    SELECT 
        category,
        ROUND(SUM(total_revenue), 2) as category_revenue,
        ROUND(SUM(total_revenue) * 100.0 / (SELECT SUM(total_revenue) FROM delta.`{gold_path}/product_performance`), 2) as revenue_percentage
    FROM delta.`{gold_path}/product_performance`
    GROUP BY category
    ORDER BY category_revenue DESC
    """)
    category_revenue.show()
except Exception as e:
    print(f"Error calculating category revenue: {e}")

try:
    # Customer segment analysis
    print(" Customer Segment Analysis:")
    segment_analysis = spark.sql(f"""
    SELECT 
        customer_segment,
        COUNT(*) as customer_count,
        ROUND(AVG(total_lifetime_value), 2) as avg_lifetime_value,
        SUM(total_transactions) as total_transactions,
        ROUND(SUM(total_lifetime_value), 2) as total_revenue_contribution
    FROM delta.`{gold_path}/customer_analytics`
    GROUP BY customer_segment
    ORDER BY avg_lifetime_value DESC
    """)
    segment_analysis.show()
except Exception as e:
    print(f"Error analyzing customer segments: {e}")

# COMMAND ----------

print(" Gold Layer Processing Completed!")
print("\n Summary:")
print(" Daily sales summary created")
print(" Monthly sales summary created")
print(" Product performance analysis completed")
print(" Customer analytics generated")
print(" Store performance metrics calculated")
print(" Business insights extracted")
print(" Ready for reporting and dashboarding!")

# Final record counts
try:
    daily_count = spark.sql(f"SELECT COUNT(*) FROM delta.`{gold_path}/daily_sales_summary`").collect()[0][0]
    monthly_count = spark.sql(f"SELECT COUNT(*) FROM delta.`{gold_path}/monthly_sales_summary`").collect()[0][0]
    product_count = spark.sql(f"SELECT COUNT(*) FROM delta.`{gold_path}/product_performance`").collect()[0][0]
    customer_count = spark.sql(f"SELECT COUNT(*) FROM delta.`{gold_path}/customer_analytics`").collect()[0][0]
    store_count = spark.sql(f"SELECT COUNT(*) FROM delta.`{gold_path}/store_performance`").collect()[0][0]

    print(f"\n Final Gold Layer Record Counts:")
    print(f"   Daily Sales Summary: {daily_count:,}")
    print(f"   Monthly Sales Summary: {monthly_count:,}")
    print(f"   Product Performance: {product_count:,}")
    print(f"   Customer Analytics: {customer_count:,}")
    print(f"   Store Performance: {store_count:,}")
except Exception as e:
    print(f"Could not get final record counts: {e}")

Silver Path: /Volumes/main/retail_lakehouse/silver
Gold Path: /Volumes/main/retail_lakehouse/gold
Using database: retail_lakehouse
🚀 Starting Gold Layer Aggregation Processing...
Creating Daily Sales Summary...
Creating Monthly Sales Summary...
Creating Product Performance Analysis...
Creating Customer Analytics...
Creating Store Performance Analysis...
✅ Gold layer aggregations completed!
Writing Daily Sales Summary...
✅ Daily Sales Summary saved successfully!
Writing Monthly Sales Summary...
✅ Monthly Sales Summary saved successfully!
Writing Product Performance...
✅ Product Performance saved successfully!
Writing Customer Analytics...
✅ Customer Analytics saved successfully!
Writing Store Performance...
✅ Store Performance saved successfully!
✅ Gold layer data saving process completed!
Creating Gold Layer Delta Tables...
Error creating daily sales summary table: [RequestId=0af24052-e0fd-4f42-abf3-3af765a426e5 ErrorClass=INVALID_PARAMETER_VALUE.INVALID_PARAMETER_VALUE] Missing cloud 