In [0]:
%run ../utils/common

In [0]:
dbutils.widgets.text("environment", "", "")
environment = dbutils.widgets.get("environment")

In [0]:
catalog_name = settings[environment]['catalog_name']

In [0]:
spark.sql(f"""
CREATE TABLE IF NOT EXISTS {catalog_name}.udp_wcm_gold_vcm_dmt.f_upper_bound_corrected_sale 
(
    calday DATE,                 
    start_date STRING,        
    end_date STRING,             
    business_unit STRING,       
    mch5_id STRING,              
    mch5_desc STRING,           
    pct_80 DOUBLE,              
    pct_85 DOUBLE,            
    pct_90 DOUBLE,               
    pct_95 DOUBLE               
)
TBLPROPERTIES (
  'DELTA.AUTOOPTIMIZE.OPTIMIZEWRITE' = 'TRUE',
  'DELTA.AUTOOPTIMIZE.AUTOCOMPACT' = 'TRUE'
)
""")

In [0]:
start_date = spark.sql("SELECT DATE_SUB(DATE_TRUNC('MONTH',DATE(CURRENT_DATE()) + INTERVAL 7 HOURS), 365)").collect()[0][0]
end_date = spark.sql("SELECT DATE_SUB(DATE_TRUNC('MONTH', DATE(CURRENT_DATE()) + INTERVAL 7 HOURS), 1)").collect()[0][0]

start_date = start_date.strftime('%Y-%m-%d')
end_date = end_date.strftime('%Y-%m-%d')

In [0]:
spark.sql(f"""DELETE FROM {catalog_name}.udp_wcm_gold_vcm_dmt.f_upper_bound_corrected_sale where calday = DATE_SUB(CURRENT_DATE() + INTERVAL 7 HOURS, 1)
""")

In [0]:
spark.sql(f"""
        INSERT INTO {catalog_name}.udp_wcm_gold_vcm_dmt.f_upper_bound_corrected_sale
        WITH _DATA AS (
            SELECT
                a.calday, a.store_id, a.product_id,
                b.business_unit, c.mch5_id, c.mch5_desc,
                IFNULL(a.revenue.base_sale_qty, 0) base_sale_qty,
                IFNULL(a.revenue.base_promotion_coupon_qty, 0) base_promotion_coupon_qty,
                GREATEST(GREATEST(IFNULL(a.revenue.base_sale_qty, 0), 0) - GREATEST(IFNULL(a.revenue.base_promotion_coupon_qty, 0), 0) - GREATEST(IFNULL(a.revenue.base_sale_qty_sll, 0), 0), 0) normal_sale_qty
            FROM {catalog_name}.udp_wcm_gold_vcm_dmt.a_store_sku_daily a
            LEFT JOIN {catalog_name}.udp_wcm_gold_vcm_dwh.d_store b
                ON a.store_id = b.store_id
            LEFT JOIN {catalog_name}.udp_wcm_gold_vcm_dwh.d_product c
                ON a.product_id = c.product_id
            LEFT JOIN (
                SELECT * 
                FROM {catalog_name}.udp_wcm_gold_vcm_dwh.d_time_lunar_v2
                QUALIFY ROW_NUMBER() OVER(PARTITION BY lunar_year ORDER BY lunar_calday DESC) <= 28
             ) d
                ON a.calday = d.calday
            WHERE a.calday >= '{start_date}' AND a.calday <= '{end_date}'
                AND d.calday IS NULL
                AND b.business_unit IS NOT NULL AND c.mch5_id IS NOT NULL
        ),
        MAIN AS (
            SELECT
                DISTINCT
                business_unit, 
                mch5_id, 
                mch5_desc,
                percentile(normal_sale_qty, 0.8) OVER(PARTITION BY business_unit, mch5_id) pct_80,
                percentile(normal_sale_qty, 0.85) OVER(PARTITION BY business_unit, mch5_id) pct_85,
                percentile(normal_sale_qty, 0.9) OVER(PARTITION BY business_unit, mch5_id) pct_90,
                percentile(normal_sale_qty, 0.95) OVER(PARTITION BY business_unit, mch5_id) pct_95
            FROM _DATA
            WHERE normal_sale_qty > 0
        )
        SELECT
            DATE(CURRENT_TIMESTAMP() + INTERVAL 7 HOURS) - 1 AS calday,
            '{start_date}' AS start_date, 
            '{end_date}' AS end_date,
            *
        FROM MAIN;
""")