-- ============================================================================
# GOLD LAYER - Complex Analytics and Aggregations
-- ============================================================================

In [1]:
spark.sql("CREATE DATABASE IF NOT EXISTS gold")
spark.sql("SHOW DATABASES").show()

25/10/28 08:32:06 WARN MetricsConfig: Cannot locate configuration: tried hadoop-metrics2-s3a-file-system.properties,hadoop-metrics2.properties


+---------+
|namespace|
+---------+
|   bronze|
|  default|
|     gold|
|   silver|
+---------+



In [2]:
def create_query_in_location(location, query):
    query += f"\n LOCATION 's3a://data/gold/{location}'"
    spark.sql(query).show()

In [3]:
customer_360 = """
               CREATE TABLE IF NOT EXISTS gold.customer_360
               (
                   customer_key
                   BIGINT
                   NOT
                   NULL,
                   customer_id
                   BIGINT
                   NOT
                   NULL,
                   analysis_date
                   DATE
                   NOT
                   NULL,

                   -- Demographics
                   customer_age
                   INT,
                   customer_segment
                   STRING,
                   lifetime_stage
                   STRING, -- new, active, at_risk, churned, reactivated

                   -- Purchase Behavior (All-Time)
                   first_purchase_date
                   DATE,
                   last_purchase_date
                   DATE,
                   total_orders
                   BIGINT,
                   total_items_purchased
                   BIGINT,
                   total_revenue
                   DECIMAL
               (
                   15,
                   2
               ),
                   total_profit DECIMAL
               (
                   15,
                   2
               ),
                   avg_order_value DECIMAL
               (
                   10,
                   2
               ),
                   avg_items_per_order DECIMAL
               (
                   8,
                   2
               ),

                   -- Purchase Behavior (Last 30 Days)
                   orders_last_30d INT,
                   revenue_last_30d DECIMAL
               (
                   12,
                   2
               ),

                   -- Purchase Behavior (Last 90 Days)
                   orders_last_90d INT,
                   revenue_last_90d DECIMAL
               (
                   12,
                   2
               ),

                   -- Purchase Behavior (Last 365 Days)
                   orders_last_365d INT,
                   revenue_last_365d DECIMAL
               (
                   12,
                   2
               ),
                   unique_products_purchased_365d INT,
                   unique_categories_purchased_365d INT,

                   -- Channel Preferences
                   primary_channel STRING,
                   channel_diversity_score DECIMAL
               (
                   3,
                   2
               ),
                   online_order_percentage DECIMAL
               (
                   5,
                   2
               ),
                   mobile_order_percentage DECIMAL
               (
                   5,
                   2
               ),
                   instore_order_percentage DECIMAL
               (
                   5,
                   2
               ),

                   -- Product Preferences
                   most_purchased_category STRING,
                   most_purchased_brand STRING,
                   avg_price_point DECIMAL
               (
                   10,
                   2
               ),
                   premium_product_percentage DECIMAL
               (
                   5,
                   2
               ),

                   -- Loyalty Metrics
                   loyalty_tier STRING,
                   total_loyalty_points_earned BIGINT,
                   total_loyalty_points_redeemed BIGINT,
                   loyalty_points_balance BIGINT,

                   -- Subscription Metrics
                   has_active_subscription BOOLEAN,
                   subscription_count INT,
                   subscription_mrr DECIMAL
               (
                   12,
                   2
               ),

                   -- Engagement Metrics
                   days_since_last_purchase INT,
                   purchase_frequency_days DECIMAL
               (
                   8,
                   2
               ),
                   recency_score INT,
                   frequency_score INT,
                   monetary_score INT,
                   rfm_segment STRING,

                   -- Support Metrics
                   total_support_interactions BIGINT,
                   avg_satisfaction_rating DECIMAL
               (
                   3,
                   2
               ),
                   unresolved_issues_count INT,

                   -- Return Behavior
                   total_returns BIGINT,
                   return_rate DECIMAL
               (
                   5,
                   2
               ),
                   total_return_value DECIMAL
               (
                   12,
                   2
               ),

                   -- Discount Sensitivity
                   orders_with_discount BIGINT,
                   discount_usage_rate DECIMAL
               (
                   5,
                   2
               ),
                   avg_discount_percentage DECIMAL
               (
                   5,
                   2
               ),

                   -- Predictions & Scores
                   clv_prediction DECIMAL
               (
                   12,
                   2
               ),
                   churn_probability DECIMAL
               (
                   3,
                   2
               ),
                   next_purchase_probability DECIMAL
               (
                   3,
                   2
               ),
                   predicted_next_purchase_date DATE,

                   -- Marketing Response
                   campaign_response_rate DECIMAL
               (
                   5,
                   2
               ),
                   email_open_rate DECIMAL
               (
                   5,
                   2
               ),
                   email_click_rate DECIMAL
               (
                   5,
                   2
               ),

                   -- Metadata
                   created_timestamp TIMESTAMP,
                   updated_timestamp TIMESTAMP
                   )
                   USING PARQUET
                   PARTITIONED BY
               (
                   analysis_date
               ) \
               """
create_query_in_location(query=customer_360, location="customer_360")

25/10/28 08:32:09 WARN SessionState: METASTORE_FILTER_HOOK will be ignored, since hive.security.authorization.manager is set to instance of HiveAuthorizerFactory.


++
||
++
++



In [4]:
product_performance = """
                      CREATE TABLE IF NOT EXISTS gold.product_performance
                      (
                          product_key
                          BIGINT
                          NOT
                          NULL,
                          product_id
                          BIGINT
                          NOT
                          NULL,
                          analysis_date
                          DATE
                          NOT
                          NULL,
                          time_period
                          STRING
                          NOT
                          NULL, -- daily, weekly, monthly, quarterly, yearly

                          -- Product Info
                          product_name
                          STRING,
                          category_level1
                          STRING,
                          category_level2
                          STRING,
                          brand
                          STRING,

                          -- Sales Metrics
                          total_units_sold
                          BIGINT,
                          total_transactions
                          BIGINT,
                          total_customers
                          BIGINT,
                          new_customers
                          BIGINT,
                          repeat_customers
                          BIGINT,

                          -- Revenue Metrics
                          gross_revenue
                          DECIMAL
                      (
                          15,
                          2
                      ),
                          net_revenue DECIMAL
                      (
                          15,
                          2
                      ),
                          total_discounts DECIMAL
                      (
                          12,
                          2
                      ),
                          avg_selling_price DECIMAL
                      (
                          10,
                          2
                      ),
                          avg_discount_percentage DECIMAL
                      (
                          5,
                          2
                      ),

                          -- Profitability Metrics
                          total_cost DECIMAL
                      (
                          15,
                          2
                      ),
                          gross_profit DECIMAL
                      (
                          15,
                          2
                      ),
                          gross_margin_percentage DECIMAL
                      (
                          5,
                          2
                      ),
                          contribution_margin DECIMAL
                      (
                          15,
                          2
                      ),

                          -- Returns Analysis
                          units_returned BIGINT,
                          return_rate DECIMAL
                      (
                          5,
                          2
                      ),
                          return_value DECIMAL
                      (
                          12,
                          2
                      ),
                          top_return_reason STRING,

                          -- Inventory Metrics
                          avg_inventory_on_hand DECIMAL
                      (
                          12,
                          2
                      ),
                          stockout_days INT,
                          stockout_rate DECIMAL
                      (
                          5,
                          2
                      ),
                          inventory_turnover DECIMAL
                      (
                          8,
                          2
                      ),
                          days_inventory_outstanding DECIMAL
                      (
                          8,
                          2
                      ),

                          -- Velocity Metrics
                          units_per_day DECIMAL
                      (
                          10,
                          2
                      ),
                          revenue_per_day DECIMAL
                      (
                          12,
                          2
                      ),
                          sales_velocity_trend STRING, -- accelerating, stable, declining

                      -- Market Position
                          category_revenue_rank INT,
                          category_units_rank INT,
                          overall_revenue_rank INT,
                          category_market_share DECIMAL
                      (
                          5,
                          2
                      ),

                          -- Customer Metrics
                          penetration_rate DECIMAL
                      (
                          5,
                          2
                      ),
                          repeat_purchase_rate DECIMAL
                      (
                          5,
                          2
                      ),
                          avg_units_per_customer DECIMAL
                      (
                          8,
                          2
                      ),

                          -- Price Elasticity
                          price_elasticity_coefficient DECIMAL
                      (
                          8,
                          4
                      ),
                          optimal_price_point DECIMAL
                      (
                          10,
                          2
                      ),

                          -- Cross-Sell Analysis
                          basket_attachment_rate DECIMAL
                      (
                          5,
                          2
                      ),
                          avg_basket_size_with_product DECIMAL
                      (
                          8,
                          2
                      ),
                          top_cross_sell_products ARRAY<STRING>,

                          -- Trend Analysis
                          revenue_growth_rate DECIMAL
                      (
                          8,
                          2
                      ),
                          volume_growth_rate DECIMAL
                      (
                          8,
                          2
                      ),
                          market_share_change DECIMAL
                      (
                          8,
                          2
                      ),

                          -- Flags
                          is_trending BOOLEAN,
                          is_seasonal BOOLEAN,
                          is_low_performer BOOLEAN,
                          is_high_performer BOOLEAN,
                          requires_reorder BOOLEAN,

                          -- Metadata
                          created_timestamp TIMESTAMP
                          )
                          USING PARQUET
                          PARTITIONED BY
                      (
                          analysis_date,
                          time_period
                      ) \
                      """
create_query_in_location(query=product_performance, location="product_performance")

++
||
++
++



In [5]:
cohort_analysis = """
                  CREATE TABLE IF NOT EXISTS gold.cohort_analysis
                  (
                      cohort_month
                      DATE
                      NOT
                      NULL,
                      months_since_first_purchase
                      INT
                      NOT
                      NULL,

                      -- Cohort Size
                      cohort_size
                      BIGINT,
                      customers_active
                      INT,

                      -- Retention Metrics
                      retention_rate
                      DECIMAL
                  (
                      5,
                      2
                  ),
                      cumulative_retention_rate DECIMAL
                  (
                      5,
                      2
                  ),
                      churn_rate DECIMAL
                  (
                      5,
                      2
                  ),
                      cumulative_churn_rate DECIMAL
                  (
                      5,
                      2
                  ),

                      -- Revenue Metrics
                      cohort_revenue DECIMAL
                  (
                      15,
                      2
                  ),
                      cumulative_cohort_revenue DECIMAL
                  (
                      15,
                      2
                  ),
                      avg_revenue_per_customer DECIMAL
                  (
                      10,
                      2
                  ),
                      cumulative_avg_revenue_per_customer DECIMAL
                  (
                      10,
                      2
                  ),

                      -- Order Metrics
                      total_orders BIGINT,
                      cumulative_orders BIGINT,
                      avg_orders_per_customer DECIMAL
                  (
                      8,
                      2
                  ),
                      cumulative_avg_orders_per_customer DECIMAL
                  (
                      8,
                      2
                  ),

                      -- Behavioral Metrics
                      repeat_purchase_rate DECIMAL
                  (
                      5,
                      2
                  ),
                      avg_purchase_frequency DECIMAL
                  (
                      8,
                      2
                  ),
                      avg_days_between_purchases DECIMAL
                  (
                      8,
                      2
                  ),

                      -- Channel Distribution
                      online_percentage DECIMAL
                  (
                      5,
                      2
                  ),
                      mobile_percentage DECIMAL
                  (
                      5,
                      2
                  ),
                      instore_percentage DECIMAL
                  (
                      5,
                      2
                  ),

                      -- Subscription Adoption
                      subscription_adoption_rate DECIMAL
                  (
                      5,
                      2
                  ),
                      avg_subscription_value DECIMAL
                  (
                      10,
                      2
                  ),

                      -- Metadata
                      created_timestamp TIMESTAMP
                      )
                      USING PARQUET
                      PARTITIONED BY
                  (
                      cohort_month
                  ) \
                  """
create_query_in_location(query=cohort_analysis, location="cohort_analysis")

++
||
++
++



In [6]:
channel_attribution = """
                      CREATE TABLE IF NOT EXISTS gold.channel_attribution
                      (
                          attribution_date
                          DATE
                          NOT
                          NULL,
                          customer_id
                          BIGINT
                          NOT
                          NULL,
                          transaction_id
                          BIGINT
                          NOT
                          NULL,

                          -- Transaction Info
                          transaction_revenue
                          DECIMAL
                      (
                          12,
                          2
                      ),
                          transaction_profit DECIMAL
                      (
                          12,
                          2
                      ),

                          -- Attribution Model: First Touch
                          first_touch_channel STRING,
                          first_touch_campaign_id BIGINT,
                          first_touch_attribution_revenue DECIMAL
                      (
                          12,
                          2
                      ),

                          -- Attribution Model: Last Touch
                          last_touch_channel STRING,
                          last_touch_campaign_id BIGINT,
                          last_touch_attribution_revenue DECIMAL
                      (
                          12,
                          2
                      ),

                          -- Attribution Model: Linear
                          touchpoint_count INT,
                          linear_attribution_per_touchpoint DECIMAL
                      (
                          12,
                          2
                      ),

                          -- Attribution Model: Time Decay
                          time_decay_attribution_map MAP <STRING, DECIMAL
                      (
                          12,
                          2
                      )>,

                          -- Attribution Model: Position Based
                          position_based_attribution_map MAP <STRING, DECIMAL
                      (
                          12,
                          2
                      )>,

                          -- Attribution Model: Data-Driven (ML-based)
                          data_driven_attribution_map MAP <STRING, DECIMAL
                      (
                          12,
                          2
                      )>,

                          -- Customer Journey
                          journey_length_days INT,
                          journey_touchpoints ARRAY<STRUCT<
                          channel: STRING,
                          timestamp : TIMESTAMP,
                          campaign_id: BIGINT,
                          event_type: STRING
                          >>,

                          -- Metadata
                          created_timestamp TIMESTAMP
                          )
                          USING PARQUET
                          PARTITIONED BY
                      (
                          attribution_date
                      ) \
                      """
create_query_in_location(query=channel_attribution, location="channel_attribution")

++
||
++
++



In [7]:
channel_attribution = """
                      CREATE TABLE IF NOT EXISTS gold.channel_attribution
                      (
                          attribution_date
                          DATE
                          NOT
                          NULL,
                          customer_id
                          BIGINT
                          NOT
                          NULL,
                          transaction_id
                          BIGINT
                          NOT
                          NULL,

                          -- Transaction Info
                          transaction_revenue
                          DECIMAL
                      (
                          12,
                          2
                      ),
                          transaction_profit DECIMAL
                      (
                          12,
                          2
                      ),

                          -- Attribution Model: First Touch
                          first_touch_channel STRING,
                          first_touch_campaign_id BIGINT,
                          first_touch_attribution_revenue DECIMAL
                      (
                          12,
                          2
                      ),

                          -- Attribution Model: Last Touch
                          last_touch_channel STRING,
                          last_touch_campaign_id BIGINT,
                          last_touch_attribution_revenue DECIMAL
                      (
                          12,
                          2
                      ),

                          -- Attribution Model: Linear
                          touchpoint_count INT,
                          linear_attribution_per_touchpoint DECIMAL
                      (
                          12,
                          2
                      ),

                          -- Attribution Model: Time Decay
                          time_decay_attribution_map MAP <STRING, DECIMAL
                      (
                          12,
                          2
                      )>,

                          -- Attribution Model: Position Based
                          position_based_attribution_map MAP <STRING, DECIMAL
                      (
                          12,
                          2
                      )>,

                          -- Attribution Model: Data-Driven (ML-based)
                          data_driven_attribution_map MAP <STRING, DECIMAL
                      (
                          12,
                          2
                      )>,

                          -- Customer Journey
                          journey_length_days INT,
                          journey_touchpoints ARRAY<STRUCT<
                          channel: STRING,
                          timestamp : TIMESTAMP,
                          campaign_id: BIGINT,
                          event_type: STRING
                          >>,

                          -- Metadata
                          created_timestamp TIMESTAMP
                          )
                          USING PARQUET
                          PARTITIONED BY
                      (
                          attribution_date
                      ) \
                      """
create_query_in_location(query=channel_attribution, location="channel_attribution")

++
||
++
++



In [8]:
category_brand_performance = """
                             CREATE TABLE IF NOT EXISTS gold.category_brand_performance
                             (
                                 analysis_date
                                 DATE
                                 NOT
                                 NULL,
                                 time_period
                                 STRING
                                 NOT
                                 NULL, -- weekly, monthly, quarterly, yearly
                                 category_level1
                                 STRING,
                                 category_level2
                                 STRING,
                                 category_level3
                                 STRING,
                                 brand
                                 STRING,

                                 -- Sales Metrics
                                 total_revenue
                                 DECIMAL
                             (
                                 15,
                                 2
                             ),
                                 total_units_sold BIGINT,
                                 total_transactions BIGINT,
                                 unique_customers BIGINT,

                                 -- Average Metrics
                                 avg_transaction_value DECIMAL
                             (
                                 10,
                                 2
                             ),
                                 avg_units_per_transaction DECIMAL
                             (
                                 8,
                                 2
                             ),
                                 avg_price_point DECIMAL
                             (
                                 10,
                                 2
                             ),

                                 -- Profitability
                                 total_profit DECIMAL
                             (
                                 15,
                                 2
                             ),
                                 profit_margin_percentage DECIMAL
                             (
                                 5,
                                 2
                             ),

                                 -- Market Share
                                 category_revenue_percentage DECIMAL
                             (
                                 5,
                                 2
                             ),
                                 brand_revenue_percentage DECIMAL
                             (
                                 5,
                                 2
                             ),

                                 -- Growth Metrics
                                 revenue_growth_wow DECIMAL
                             (
                                 8,
                                 2
                             ), -- Week over Week
                                 revenue_growth_mom DECIMAL
                             (
                                 8,
                                 2
                             ), -- Month over Month
                                 revenue_growth_yoy DECIMAL
                             (
                                 8,
                                 2
                             ), -- Year over Year
                                 units_growth_yoy DECIMAL
                             (
                                 8,
                                 2
                             ),

                                 -- Customer Penetration
                                 customer_penetration_rate DECIMAL
                             (
                                 5,
                                 2
                             ),
                                 new_customer_percentage DECIMAL
                             (
                                 5,
                                 2
                             ),
                                 repeat_customer_rate DECIMAL
                             (
                                 5,
                                 2
                             ),

                                 -- Price & Promotion
                                 avg_discount_percentage DECIMAL
                             (
                                 5,
                                 2
                             ),
                                 promotional_sales_percentage DECIMAL
                             (
                                 5,
                                 2
                             ),
                                 price_index DECIMAL
                             (
                                 8,
                                 2
                             ),

                                 -- Rankings
                                 revenue_rank_in_category INT,
                                 units_rank_in_category INT,
                                 growth_rank_in_category INT,

                                 -- Metadata
                                 created_timestamp TIMESTAMP
                                 )
                                 USING PARQUET
                                 PARTITIONED BY
                             (
                                 analysis_date,
                                 time_period
                             ) \
                             """
create_query_in_location(query=category_brand_performance, location="category_brand_performance")

++
||
++
++



In [9]:
store_performance = """
                    -- Gold: Store Performance Dashboard
                    CREATE TABLE IF NOT EXISTS gold.store_performance
                    (
                        analysis_date
                        DATE
                        NOT
                        NULL,
                        store_id
                        BIGINT
                        NOT
                        NULL,

                        -- Store Info
                        store_name
                        STRING,
                        store_region
                        STRING,
                        store_tier
                        STRING,

                        -- Sales Metrics
                        daily_revenue
                        DECIMAL
                    (
                        12,
                        2
                    ),
                        daily_transactions INT,
                        daily_customers INT,
                        avg_transaction_value DECIMAL
                    (
                        10,
                        2
                    ),
                        revenue_per_square_foot DECIMAL
                    (
                        10,
                        2
                    ),

                        -- Traffic Metrics
                        foot_traffic INT,
                        conversion_rate DECIMAL
                    (
                        5,
                        2
                    ),

                        -- Staff Metrics
                        staff_count INT,
                        revenue_per_employee DECIMAL
                    (
                        10,
                        2
                    ),

                        -- Inventory Metrics
                        inventory_value DECIMAL
                    (
                        15,
                        2
                    ),
                        stockout_rate DECIMAL
                    (
                        5,
                        2
                    ),
                        inventory_turnover DECIMAL
                    (
                        8,
                        2
                    ),

                        -- Customer Satisfaction
                        avg_satisfaction_rating DECIMAL
                    (
                        3,
                        2
                    ),
                        nps_score DECIMAL
                    (
                        5,
                        2
                    ),

                        -- Comparative Metrics
                        revenue_vs_target DECIMAL
                    (
                        8,
                        2
                    ),
                        revenue_rank_in_region INT,
                        revenue_percentile DECIMAL
                    (
                        5,
                        2
                    ),

                        -- Trend Indicators
                        revenue_trend_7d STRING, -- up, down, stable
                        revenue_growth_wow DECIMAL
                    (
                        8,
                        2
                    ),
                        revenue_growth_yoy DECIMAL
                    (
                        8,
                        2
                    ),

                        -- Metadata
                        created_timestamp TIMESTAMP
                        )
                        USING PARQUET
                        PARTITIONED BY
                    (
                        analysis_date
                    ) \
                    """
create_query_in_location(query=store_performance, location="store_performance")

++
||
++
++



In [10]:
subscription_health = """
                      -- Gold: Subscription Health Metrics
                      CREATE TABLE IF NOT EXISTS gold.subscription_health
                      (
                          analysis_date
                          DATE
                          NOT
                          NULL,
                          plan_id
                          BIGINT
                          NOT
                          NULL,

                          -- Plan Info
                          plan_name
                          STRING,
                          subscription_type
                          STRING,
                          billing_frequency
                          STRING,

                          -- Subscriber Metrics
                          active_subscribers
                          BIGINT,
                          new_subscribers_mtd
                          INT,
                          cancelled_subscribers_mtd
                          INT,
                          paused_subscribers
                          INT,
                          net_subscriber_change
                          INT,

                          -- Revenue Metrics
                          mrr
                          DECIMAL
                      (
                          15,
                          2
                      ), -- Monthly Recurring Revenue
                          arr DECIMAL
                      (
                          15,
                          2
                      ), -- Annual Recurring Revenue
                          avg_revenue_per_subscriber DECIMAL
                      (
                          10,
                          2
                      ),
                          lifetime_value_avg DECIMAL
                      (
                          12,
                          2
                      ),

                          -- Churn Metrics
                          churn_rate_monthly DECIMAL
                      (
                          5,
                          2
                      ),
                          churn_rate_annual DECIMAL
                      (
                          5,
                          2
                      ),
                          revenue_churn_rate DECIMAL
                      (
                          5,
                          2
                      ),
                          reactivation_rate DECIMAL
                      (
                          5,
                          2
                      ),

                          -- Retention Metrics
                          retention_rate_30d DECIMAL
                      (
                          5,
                          2
                      ),
                          retention_rate_90d DECIMAL
                      (
                          5,
                          2
                      ),
                          retention_rate_365d DECIMAL
                      (
                          5,
                          2
                      ),

                          -- Cohort Performance
                          cohort_0_3_months_retention DECIMAL
                      (
                          5,
                          2
                      ),
                          cohort_3_6_months_retention DECIMAL
                      (
                          5,
                          2
                      ),
                          cohort_6_12_months_retention DECIMAL
                      (
                          5,
                          2
                      ),
                          cohort_12plus_months_retention DECIMAL
                      (
                          5,
                          2
                      ),

                          -- Engagement
                          avg_subscription_duration_days DECIMAL
                      (
                          10,
                          2
                      ),
                          avg_payments_before_churn DECIMAL
                      (
                          8,
                          2
                      ),

                          -- Growth Metrics
                          subscriber_growth_rate DECIMAL
                      (
                          8,
                          2
                      ),
                          mrr_growth_rate DECIMAL
                      (
                          8,
                          2
                      ),
                          quick_ratio DECIMAL
                      (
                          8,
                          2
                      ), -- (New MRR + Expansion) / (Churned MRR + Contraction)

                      -- Cancellation Analysis
                          top_cancellation_reason STRING,
                          avg_days_to_cancel DECIMAL
                      (
                          10,
                          2
                      ),
                          voluntary_churn_rate DECIMAL
                      (
                          5,
                          2
                      ),
                          involuntary_churn_rate DECIMAL
                      (
                          5,
                          2
                      ),

                          -- Metadata
                          created_timestamp TIMESTAMP
                          )
                          USING PARQUET
                          PARTITIONED BY
                      (
                          analysis_date
                      ) \
                      """
create_query_in_location(query=subscription_health, location="subscription_health")

++
||
++
++



In [11]:
basket_analysis = """
                  -- Gold: Basket Analysis (Market Basket)
                  CREATE TABLE IF NOT EXISTS gold.basket_analysis
                  (
                      analysis_date
                      DATE
                      NOT
                      NULL,
                      product_id_a
                      BIGINT
                      NOT
                      NULL,
                      product_id_b
                      BIGINT
                      NOT
                      NULL,

                      -- Product Info
                      product_name_a
                      STRING,
                      product_name_b
                      STRING,
                      category_a
                      STRING,
                      category_b
                      STRING,

                      -- Association Metrics
                      transactions_with_a
                      BIGINT,
                      transactions_with_b
                      BIGINT,
                      transactions_with_both
                      BIGINT,
                      total_transactions
                      BIGINT,

                      -- Association Rules
                      support
                      DECIMAL
                  (
                      8,
                      6
                  ), -- P(A ∩ B)
                      confidence_a_to_b DECIMAL
                  (
                      8,
                      6
                  ), -- P(B|A)
                      confidence_b_to_a DECIMAL
                  (
                      8,
                      6
                  ), -- P(A|B)
                      lift DECIMAL
                  (
                      10,
                      4
                  ), -- P(A ∩ B) / (P(A) * P(B))
                      conviction DECIMAL
                  (
                      10,
                      4
                  ),

                      -- Revenue Impact
                      avg_basket_value_with_both DECIMAL
                  (
                      10,
                      2
                  ),
                      avg_basket_value_with_a_only DECIMAL
                  (
                      10,
                      2
                  ),
                      incremental_basket_value DECIMAL
                  (
                      10,
                      2
                  ),

                      -- Recommendation Score
                      recommendation_score DECIMAL
                  (
                      5,
                      2
                  ),
                      is_strong_association BOOLEAN,

                      -- Metadata
                      created_timestamp TIMESTAMP
                      )
                      USING PARQUET
                      PARTITIONED BY
                  (
                      analysis_date
                  ) \
                  """
create_query_in_location(query=basket_analysis, location="basket_analysis")

++
||
++
++



In [12]:
campaign_roi_analysis = """
                        -- Gold: Marketing Campaign ROI Analysis
                        CREATE TABLE IF NOT EXISTS gold.campaign_roi_analysis
                        (
                            analysis_date
                            DATE
                            NOT
                            NULL,
                            campaign_id
                            BIGINT
                            NOT
                            NULL,

                            -- Campaign Info
                            campaign_name
                            STRING,
                            campaign_type
                            STRING,
                            channel
                            STRING,
                            start_date
                            DATE,
                            end_date
                            DATE,
                            campaign_duration_days
                            INT,

                            -- Investment
                            total_budget
                            DECIMAL
                        (
                            12,
                            2
                        ),
                            actual_spend DECIMAL
                        (
                            12,
                            2
                        ),
                            budget_utilization_rate DECIMAL
                        (
                            5,
                            2
                        ),

                            -- Reach & Engagement
                            total_impressions BIGINT,
                            total_clicks BIGINT,
                            total_conversions BIGINT,
                            unique_customers_reached BIGINT,
                            click_through_rate DECIMAL
                        (
                            5,
                            2
                        ),
                            conversion_rate DECIMAL
                        (
                            5,
                            2
                        ),

                            -- Revenue Attribution
                            attributed_revenue DECIMAL
                        (
                            15,
                            2
                        ),
                            attributed_transactions BIGINT,
                            new_customer_revenue DECIMAL
                        (
                            12,
                            2
                        ),
                            existing_customer_revenue DECIMAL
                        (
                            12,
                            2
                        ),

                            -- Profitability
                            attributed_profit DECIMAL
                        (
                            12,
                            2
                        ),
                            cost_per_impression DECIMAL
                        (
                            10,
                            4
                        ),
                            cost_per_click DECIMAL
                        (
                            10,
                            2
                        ),
                            cost_per_acquisition DECIMAL
                        (
                            10,
                            2
                        ),

                            -- ROI Metrics
                            roi_percentage DECIMAL
                        (
                            8,
                            2
                        ),
                            roas DECIMAL
                        (
                            8,
                            2
                        ), -- Return on Ad Spend
                            incremental_revenue DECIMAL
                        (
                            12,
                            2
                        ),
                            incremental_profit DECIMAL
                        (
                            12,
                            2
                        ),

                            -- Customer Lifetime Value Impact
                            avg_clv_of_acquired_customers DECIMAL
                        (
                            10,
                            2
                        ),
                            estimated_lifetime_roi DECIMAL
                        (
                            8,
                            2
                        ),

                            -- Efficiency Metrics
                            revenue_per_impression DECIMAL
                        (
                            10,
                            4
                        ),
                            profit_per_dollar_spent DECIMAL
                        (
                            10,
                            2
                        ),

                            -- Comparison
                            roi_vs_channel_avg DECIMAL
                        (
                            8,
                            2
                        ),
                            performance_rank_in_channel INT,

                            -- Metadata
                            created_timestamp TIMESTAMP
                            )
                            USING PARQUET
                            PARTITIONED BY
                        (
                            analysis_date
                        ) \
                        """
create_query_in_location(query=campaign_roi_analysis, location="campaign_roi")

++
||
++
++



In [13]:
spark.sql("SHOW TABLES IN gold").show()

+---------+--------------------+-----------+
|namespace|           tableName|isTemporary|
+---------+--------------------+-----------+
|     gold|     basket_analysis|      false|
|     gold|campaign_roi_anal...|      false|
|     gold|category_brand_pe...|      false|
|     gold| channel_attribution|      false|
|     gold|     cohort_analysis|      false|
|     gold|        customer_360|      false|
|     gold| product_performance|      false|
|     gold|   store_performance|      false|
|     gold| subscription_health|      false|
+---------+--------------------+-----------+

