In [0]:
%run ../utils/common

In [0]:
dbutils.widgets.text("environment", "DEV")
dbutils.widgets.text("target_dataset", "VCM_DMT_PRD")
dbutils.widgets.text("target_table", "f_corrected_sale")
dbutils.widgets.text("metadata_schema", "udp_wcm_metadata_dev")
dbutils.widgets.text(
    "dependency_table","VCM_DMT_PRD.A_STORE_SKU_DAILY, VCM_DMT_PRD.A_STORE_SKU_CURRENT, VCM_DWH_PRD.D_PROMOTION_FREE_GIFT_CURRENT, VCM_DWH_PRD.F_COGS_MIN"
)

In [0]:
dbutils.widgets.text("field_calday", "CALDAY")
dbutils.widgets.text("field_id", "HASH_ID")

In [0]:
catalog_name = settings[environment]['catalog_name']

In [0]:
environment = dbutils.widgets.get("environment")
target_dataset = dbutils.widgets.get("target_dataset")
target_table = dbutils.widgets.get("target_table")
metadata_schema = dbutils.widgets.get("metadata_schema")
dependency_table = dbutils.widgets.get("dependency_table")
dependency_table = [x.strip().upper() for x in dependency_table.split(",")]
dependency_table = "'" + "','".join(dependency_table) + "'"

print(f"environment: {environment}")
print(f"target_dataset: {target_dataset}")
print(f"target_table: {target_table}")
print(f"catalog_name: {catalog_name}")
print(f"metadata_schema: {metadata_schema}")
print(f"dependency_table: {dependency_table}")

In [0]:
field_calday = dbutils.widgets.get("field_calday")
field_id = dbutils.widgets.get("field_id")

print(f"field_calday: {field_calday}")
print(f"field_id: {field_id}")

In [0]:
%run "../common/common_etl_load"

In [0]:
spark.sql(f"""
CREATE TABLE IF NOT EXISTS {catalog_name}.udp_wcm_gold_vcm_dmt.f_corrected_sale
(
  first_sale_stock_day DATE,
  inactive_range INT,
  store_id STRING,
  product_id STRING,
  calday DATE,
  base_sale_qty FLOAT,
  promotion_qty FLOAT,
  promotion_coupon_qty FLOAT,
  closing_stock_qty FLOAT,
  is_promotion INT,
  is_oos INT,
  upper_outlier FLOAT,
  lower_outlier FLOAT,
  is_outlier INT,
  is_corrected INT,
  avg_corrected_base_sale_qty_rollback_30day FLOAT,
  corrected_base_sale_qty FLOAT,
  note STRING,
  addr_numbr STRING
)
TBLPROPERTIES (
  'DELTA.AUTOOPTIMIZE.OPTIMIZEWRITE' = 'TRUE',
  'DELTA.AUTOOPTIMIZE.AUTOCOMPACT' = 'TRUE'
)
 """)

In [0]:
calday_list = spark.sql(f"""
select EXPLODE(SEQUENCE(DATE(CURRENT_TIMESTAMP() + INTERVAL 7 HOURS) -7, DATE(CURRENT_TIMESTAMP() + INTERVAL 7 HOURS) - 1)) AS calday ORDER BY calday""").collect()
print(calday_list)

In [0]:
for target in calday_list:
    spark.sql(f""" DELETE FROM {catalog_name}.udp_wcm_gold_vcm_dmt.f_corrected_sale WHERE calday = DATE('{target.calday}') """)

    spark.sql(f""" 
              INSERT INTO {catalog_name}.udp_wcm_gold_vcm_dmt.f_corrected_sale
              WITH
              store_sku AS (
                SELECT  DATE('{target.calday}') as calday, * FROM {catalog_name}.udp_wcm_gold_vcm_dmt.a_store_sku_current WHERE first_sale_stock_day IS NOT NULL 
                AND ( date('{target.calday}')) >= first_sale_stock_day
              ),
              percentiles AS (
                  SELECT a.store_id, a.product_id, MAX(a.q1) q1, MAX(a.q3) q3
                  FROM (
                    SELECT 
                     store_id, product_id,
                        PERCENTILE(corrected_base_sale_qty, array(0.25))[0] AS q1,
                        PERCENTILE(corrected_base_sale_qty, array(0.75))[0] AS q3
                    FROM {catalog_name}.udp_wcm_gold_vcm_dmt.f_corrected_sale
                    WHERE calday BETWEEN DATE_SUB( DATE('{target.calday}'),180) AND DATE_SUB( DATE('{target.calday}'),1) GROUP BY 1,2
            ) a 
            GROUP BY 1,2
            ),
            outliers AS (
            SELECT    
                a.*,
                IF(IFNULL(b.cost_per_unit, 0) > 300000, GREATEST(a.q3 + 1.5*(a.q3-a.q1), 0), GREATEST(a.q3 + 1.5*(a.q3-a.q1), IFNULL(e.pct_95, 0))) upper_outlier,
                a.q1 - 1.5*(a.q3-a.q1) lower_outlier
            FROM percentiles a
            LEFT JOIN (SELECT store_id, product_id, cost_per_unit  FROM {catalog_name}.udp_wcm_gold_vcm_dwh.f_cogs_min WHERE CALDAY = DATE('{target.calday}')) b
                ON a.store_id = b.store_id AND a.product_id = b.product_id
            LEFT JOIN {catalog_name}.udp_wcm_gold_vcm_dwh.d_store c
                ON a.store_id = c.store_id
            LEFT JOIN {catalog_name}.udp_wcm_gold_vcm_dwh.d_product d
                ON a.product_id = d.product_id
            LEFT JOIN (
                SELECT * 
                FROM {catalog_name}.udp_wcm_gold_vcm_dmt.f_upper_bound_corrected_sale 
                WHERE calday = (SELECT MAX(calday) FROM {catalog_name}.udp_wcm_gold_vcm_dmt.f_upper_bound_corrected_sale)
            ) e
                ON c.business_unit = e.business_unit AND d.mch5_id = e.mch5_id
        ),
        avg_corrected_base_sale_qty_rollback_30day AS (
            SELECT     
                store_id,
                product_id,
                AVG(IFNULL(corrected_base_sale_qty,0)) avg_corrected_base_sale_qty_rollback_30day
            FROM {catalog_name}.udp_wcm_gold_vcm_dmt.f_corrected_sale
            WHERE calday BETWEEN DATE_SUB( DATE('{target.calday}'),30) AND DATE_SUB( DATE('{target.calday}'),1)
            GROUP BY 1,2
        ),
        a_store_sku_daily_prev AS (
            SELECT
                calday, store_id, product_id,
                IFNULL(revenue.base_sale_qty, 0) - IFNULL(revenue.base_sale_qty_sll, 0) base_sale_qty, 
                IFNULL(revenue.base_promotion_qty, 0) base_promotion_qty,
                IFNULL(revenue.base_promotion_coupon_qty, 0) base_promotion_coupon_qty,
                IFNULL(stock.closing_stock_quantity, 0) closing_stock_quantity
            FROM {catalog_name}.udp_wcm_gold_vcm_dmt.a_store_sku_daily
            WHERE calday = DATE('{target.calday}') 
        ),
        a_store_sku_daily_olala AS (
            SELECT 
                a.calday, a.store_id, b.component_id product_id,
                a.base_sale_qty * (b.component_quantity / b.base_quantity) + a.base_promotion_qty * (b.component_quantity / b.base_quantity) base_sale_qty,
                0 base_promotion_qty,
                0 base_promotion_coupon_qty,
                0 closing_stock_quantity
            FROM a_store_sku_daily_prev a
            INNER JOIN {catalog_name}.udp_wcm_gold_vcm_dwh.d_product_bom_winare b
                ON a.product_id = b.product_id
            WHERE b.is_box = 0
        ),
        a_store_sku_daily AS (
            SELECT
                calday, store_id, product_id,
                SUM(base_sale_qty) base_sale_qty,
                SUM(base_promotion_qty) base_promotion_qty,
                SUM(base_promotion_coupon_qty) base_promotion_coupon_qty,
                SUM(closing_stock_quantity) closing_stock_quantity
            FROM (
                SELECT * FROM a_store_sku_daily_prev
                UNION ALL
                SELECT * FROM a_store_sku_daily_olala
            )
            GROUP BY 1,2,3
        ),
        full_store_sku AS (
            SELECT    
                cat.calday,
                cat.store_id,
                cat.addr_numbr,
                cat.product_id,
                cat.first_sale_stock_day,
                IF(IFNULL(a.base_sale_qty, 0) <= 0, 0, a.base_sale_qty) base_sale_qty,
                IF(IFNULL(a.base_promotion_qty, 0) <= 0, 0, a.base_promotion_qty) promotion_qty,
                IF(IFNULL(a.base_promotion_coupon_qty, 0) <= 0, 0, a.base_promotion_coupon_qty) promotion_coupon_qty,
                IFNULL(a.closing_stock_quantity, 0) closing_stock_qty
            FROM store_sku cat
            LEFT JOIN ( 
                SELECT 
                    * EXCEPT(fc.store_id, a.store_id), 
                    IFNULL(fc.store_id, a.store_id) as store_id 
                FROM a_store_sku_daily a
                LEFT JOIN {catalog_name}.udp_wcm_gold_vcm_dwh.d_franchise_config fc ON a.store_id = fc.ref_fc_store
            ) a
                USING (store_id, product_id)
        ),
        sale_stock_check AS (
            SELECT	  	
                a.*,
                IF(a.promotion_coupon_qty > 0.5 * a.base_sale_qty OR (b.STORE_ID IS NOT NULL AND a.base_sale_qty > 0), 1, 0) is_promotion,
                IF(a.base_sale_qty = 0 AND a.closing_stock_qty <= 0, 1, 0) is_oos
            FROM full_store_sku a
            LEFT JOIN (SELECT * FROM {catalog_name}.udp_wcm_gold_vcm_dwh.d_promotion_free_gift_current_his WHERE calday = DATE('{target.calday}')) b
                USING(store_id, product_id)
        ),
        check_outlier AS (
            SELECT    
                a.*,
                IF(DATE('{target.calday}') >= DATE_ADD(a.first_sale_stock_day, 30), upper_outlier, NULL) upper_outlier,
                IF(DATE('{target.calday}') >= DATE_ADD(a.first_sale_stock_day, 30), lower_outlier, NULL) lower_outlier,
                IF(DATE('{target.calday}') >= DATE_ADD(a.first_sale_stock_day,30), IF(base_sale_qty > upper_outlier, 1, 0), NULL) is_outlier              
            FROM sale_stock_check a
            LEFT JOIN outliers b
                USING (store_id, product_id)
        ),
        check_inactive AS (
            SELECT		
                a.store_id,
                a.product_id,
                SUM(is_oos) inactive_range
            FROM (
                SELECT calday, store_id, product_id, is_oos 
                FROM {catalog_name}.udp_wcm_gold_vcm_dmt.f_corrected_sale
                WHERE calday BETWEEN DATE('{target.calday}') - 29 AND DATE('{target.calday}') - 1
                UNION ALL 
                SELECT calday, store_id, product_id, is_oos 
                FROM sale_stock_check
            ) a
            GROUP BY	
                a.store_id,
                a.product_id
        ),
        check_corrected AS (
            SELECT	
                *,
                IF(DATE('{target.calday}') >= DATE_ADD(first_sale_stock_day, 30),
                IF(is_promotion = 1 OR is_oos = 1 OR is_outlier = 1, 1, 0),
                IF(is_promotion = 1 OR is_oos = 1, 1, 0)) is_corrected
            FROM check_outlier
        )
        SELECT    
            a.first_sale_stock_day, b.inactive_range, a.* EXCEPT(first_sale_stock_day, addr_numbr),
            IF(b.inactive_range >= 30, 0, c.avg_corrected_base_sale_qty_rollback_30day) avg_corrected_base_sale_qty_rollback_30day,
            CASE	
                WHEN DATE('{target.calday}') < DATE_ADD(a.first_sale_stock_day, 30) AND d.product_id IS NULL THEN 0.03 
                WHEN b.inactive_range >= 30 THEN 0
                WHEN is_promotion = 1 OR is_oos = 1 THEN IFNULL(c.avg_corrected_base_sale_qty_rollback_30day, 0)
                WHEN is_outlier = 1 THEN a.upper_outlier
                ELSE base_sale_qty 
            END corrected_base_sale_qty,
            CASE	
                WHEN DATE('{target.calday}') < DATE_ADD(a.first_sale_stock_day,30) AND d.product_id IS NULL THEN "INITIAL CORRECTED SALES"
                WHEN b.inactive_range >= 30 THEN "TEMPORARY INNACTIVE"
                ELSE NULL 
            END note,
            addr_numbr
        FROM check_corrected a
        LEFT JOIN avg_corrected_base_sale_qty_rollback_30day c USING (store_id, product_id)
        LEFT JOIN CHECK_INACTIVE b ON a.store_id = b.store_id AND a.product_id = b.product_id
        LEFT JOIN (SELECT DISTINCT component_id AS product_id FROM {catalog_name}.udp_wcm_gold_vcm_dwh.d_product_bom_winare WHERE is_box = 1) d ON a.product_id = d.product_id
                """)
    

In [0]:
%run "../common/common_etl_update"