In [0]:
%run ../utils/common

In [0]:
dbutils.widgets.text("environment", "", "")
environment = dbutils.widgets.get("environment")

In [0]:
catalog_name = settings[environment]['catalog_name']

In [0]:
spark.sql(f"""
CREATE OR REPLACE TABLE {catalog_name}.udp_wcm_gold_vcm_dwh.d_dynamic_assorment
TBLPROPERTIES (
  'DELTA.AUTOOPTIMIZE.OPTIMIZEWRITE' = 'TRUE',
  'DELTA.AUTOOPTIMIZE.AUTOCOMPACT' = 'TRUE'
)
    AS
    WITH store_sku AS ( 
        SELECT 
            a.store_id, 
            b.concept,
            b.region_domain_vn region, 
            b.city,
            b.qlkv, 
            a.product_id, 
            c.product_name,
            c.mch5_id, 
            c.mch5_desc,
            a.asm_type,
            c.temperature,
            IF(d.store_id IS NULL, 0,1) is_top_store    
        FROM {catalog_name}.udp_wcm_gold_vcm_dwh.d_store_sku_list_simulation a
        LEFT JOIN {catalog_name}.udp_wcm_gold_vcm_dwh.d_store b
            USING(store_id)
        LEFT JOIN {catalog_name}.udp_wcm_gold_vcm_dwh.d_product c
            USING(product_id)
        LEFT JOIN {catalog_name}.udp_wcm_gold_vcm_dwh.d_mdq_pog_core3_store d
           USING(store_id)
        WHERE b.business_unit = '2000'
            AND c.mch2_id IN ('102','201','202','203')
            AND a.asm_type = 'CORE3'
            AND UPPER(IFNULL(b.concept, 'NULL')) <> 'RURAL'
    ),
    asm_active AS (
        SELECT
            DISTINCT
            a.store_id,
            a.product_id,
            b.region,
            b.concept,
            MIN(a.start_date) OVER(PARTITION BY a.product_id) min_listing_sku,
            MIN(a.start_date) OVER(PARTITION BY b.concept, a.product_id) min_listing_concept,
            MIN(a.start_date) OVER(PARTITION BY b.concept, b.region, a.product_id) min_listing_concept_region
        FROM {catalog_name}.udp_wcm_gold_vcm_dwh.d_assortment_scd a
        INNER JOIN store_sku b ON a.store_id = b.store_id AND a.product_id = b.product_id
        WHERE a.status = 'ACTIVE'
    ),
    a_store_sku_daily AS (
        SELECT
            b.concept, 
            b.region_domain_vn region, 
            b.qlkv,  
            a.store_id, 
            c.mch5_id, 
            a.product_id,
            SUM(IF(calday = '{init_date}', a.stock.closing_stock_value, 0)) closing_stock_val,
            SUM(IF(calday = '{init_date}', a.stock.closing_stock_quantity, 0)) closing_stock_qty,
            SUM(IFNULL(revenue.cogs_pos, 0))/30 avg_sale
        FROM {catalog_name}.udp_wcm_gold_vcm_dmt.a_store_sku_daily a
        LEFT JOIN {catalog_name}.udp_wcm_gold_vcm_dwh.d_store b 
            USING(store_id)
        LEFT JOIN {catalog_name}.udp_wcm_gold_vcm_dwh.d_product c
            USING(product_id)
        INNER JOIN (SELECT DISTINCT store_id FROM store_sku) d
            USING(store_id)
        WHERE a.calday >= DATEADD('{init_date}',-29) AND a.calday <= '{init_date}'
            AND c.mch2_id IN ('102','201','202','203')
        GROUP  BY 1,2,3,4,5,6
    ),  
    agg_store_level AS (
        SELECT store_id, SUM(avg_sale)/1000000 avg_sale_store
        FROM a_store_sku_daily
        GROUP BY 1 
    ),
    store_sale_cluster AS (
        SELECT 
            store_id,
            avg_sale_store,
            CASE 
                WHEN avg_sale_store > 14 THEN 'TOP'
                WHEN avg_sale_store >= 12 AND avg_sale_store < 14 THEN '12-14'
                WHEN avg_sale_store >= 10 AND avg_sale_store < 12 THEN '10-12'
                WHEN avg_sale_store >= 8 AND avg_sale_store < 10 THEN '08-10'
                WHEN avg_sale_store < 8 THEN 'FLOP'
            END store_cluster
        FROM agg_store_level
    ),
    sale_concept_region_qlkv AS (
        SELECT 
            a.concept, 
            a.region, 
            a.qlkv, 
            a.product_id, 
            AVG(a.avg_sale) avg_sale
        FROM a_store_sku_daily a
        GROUP BY 1,2,3,4
    ),
    agg_qlkv_level AS (
        SELECT
            a.store_id, 
            a.mch5_id,
            a.product_id,
            b.avg_sale, 
            ROW_NUMBER() OVER (PARTITION BY a.store_id, a.mch5_id ORDER BY IFNULL(b.avg_sale, 0) DESC, IFNULL(a1.avg_sale, 0)) AS rn
        FROM store_sku a
        LEFT JOIN a_store_sku_daily a1
            ON a.store_id = a1.store_id AND a.product_id = a1.product_id
        LEFT JOIN sale_concept_region_qlkv b
            ON a.concept = b.concept AND a.region = b.region AND a.qlkv = b.qlkv AND a.product_id = b.product_id
    ), 
    no_sku_mch5 AS (
        SELECT 
            mch5_id, 
            CASE 
                WHEN store_cluster = 'MID_12_14' THEN '12-14'
                WHEN store_cluster = 'MID_10_12' THEN '10-12'
                WHEN store_cluster = 'MID_8_10' THEN '08-10'
                ELSE store_cluster 
            END AS store_cluster, 
            no_sku
        FROM (
            SELECT mch5_id, store_cluster, no_sku
            FROM {catalog_name}.udp_wcm_gold_wcm_fin_biztrans.no_sku_distribution_import 
            UNPIVOT(
                no_sku FOR store_cluster IN (TOP, MID_12_14, MID_10_12, MID_8_10, FLOP)
            )
            WHERE model = 'PA3'
        )
    ),
    mapping AS (
        SELECT
            a.region,
            a.city,
            a.concept,
            a.store_id,
            b.store_cluster,
            a.mch5_id,
            a.mch5_desc,
            a.product_id,
            a.product_name,
            a.is_top_store,
            c.no_sku no_sku_keeping,
            IFNULL(a1.avg_sale, 0) avg_sale_store,
            IFNULL(a2.avg_sale, 0) avg_sale_qlkv,
            CASE 
            WHEN IFNULL(a1.avg_sale, 0) = 0 OR IFNULL(a2.avg_sale, 0) = 0 
                THEN IFNULL(a1.avg_sale, 0) + IFNULL(a2.avg_sale, 0)
            ELSE (a1.avg_sale + a2.avg_sale) / 2 
            END avg_sale_store_qlkv,
            d.min_listing_sku,
            IF(min_listing_sku > DATEADD( '{init_date}',-90), 1, 0) is_new_listing,
            COUNT(*) OVER(PARTITION BY a.store_id, a.mch5_id) cnt_sku_active
        FROM store_sku a
        LEFT JOIN a_store_sku_daily a1
            USING(store_id,product_id)
        LEFT JOIN agg_qlkv_level a2
            USING(store_id,product_id)
        LEFT JOIN store_sale_cluster b 
            USING(store_id)
        LEFT JOIN no_sku_mch5 c
            ON b.store_cluster = c.store_cluster AND a.mch5_id = c.mch5_id
        LEFT JOIN asm_active d
            USING(store_id,product_id)
    ),
    new_listing_priority AS (
        SELECT 
            *,
            IF(no_sku_keeping > 2, avg_sale_store_qlkv, avg_sale_qlkv) avg_sale_compare
        FROM mapping
        WHERE is_new_listing = 1 or is_top_store = 1 OR store_cluster = 'TOP'
    ),
    all_listing AS (
        SELECT 
            a.*,
            IF(a.no_sku_keeping > 2, a.avg_sale_store_qlkv, a.avg_sale_qlkv) avg_sale_compare,
            ROW_NUMBER() OVER(PARTITION BY a.store_id, a.mch5_id ORDER BY IF(a.no_sku_keeping > 2, a.avg_sale_store_qlkv, a.avg_sale_qlkv) DESC) rn
        FROM mapping a
    ),
    main AS (
        SELECT 
            * ,1 is_keeping
        FROM new_listing_priority
        UNION ALL
        SELECT 
            * EXCEPT(rn), IF(rn <= no_sku_keeping, 1, 0) is_keeping
        FROM all_listing
    )
    SELECT *
    FROM main
    QUALIFY ROW_NUMBER() OVER(PARTITION BY store_id, mch5_id, product_id ORDER BY is_keeping DESC) = 1;
""")

In [0]:
spark.sql(f"""
CREATE  TABLE  IF NOT EXISTS {catalog_name}.udp_wcm_gold_vcm_dwh.d_dynamic_assorment_his (
  calday DATE,
  init_date DATE,
  region STRING,
  city STRING,
  concept STRING,
  store_id STRING,
  store_cluster STRING,
  mch5_id STRING,
  mch5_desc STRING,
  product_id STRING,
  product_name STRING,
  no_sku_keeping BIGINT,
  avg_sale_store DOUBLE,
  avg_sale_qlkv DOUBLE,
  avg_sale_store_qlkv DOUBLE,
  min_listing_sku DATE,
  is_new_listing INT,
  cnt_sku_active BIGINT,
  avg_sale_compare DOUBLE,
  is_keeping INT
)
TBLPROPERTIES (
  'DELTA.AUTOOPTIMIZE.OPTIMIZEWRITE' = 'TRUE',
  'DELTA.AUTOOPTIMIZE.AUTOCOMPACT' = 'TRUE'
);
""")

In [0]:
spark.sql(f"""DELETE FROM {catalog_name}.udp_wcm_gold_vcm_dwh.d_dynamic_assorment_his WHERE calday = DATE(CURRENT_TIMESTAMP() + INTERVAL 7 HOURS);
""")

In [0]:
spark.sql(f"""
INSERT INTO {catalog_name}.udp_wcm_gold_vcm_dwh.d_dynamic_assorment_his(calday,init_date,region,city,concept,store_id,store_cluster,mch5_id,mch5_desc,product_id,product_name,is_top_store,no_sku_keeping,avg_sale_store,avg_sale_qlkv,avg_sale_store_qlkv,min_listing_sku,is_new_listing,cnt_sku_active,avg_sale_compare,is_keeping)
    SELECT
        DATE(CURRENT_TIMESTAMP() + INTERVAL 7 HOURS) calday,
        '{init_date}',
        region,
        city,
        concept,
        store_id,
        store_cluster,
        mch5_id,
        mch5_desc,
        product_id,
        product_name,
        is_top_store,
        no_sku_keeping,
        avg_sale_store,
        avg_sale_qlkv,
        avg_sale_store_qlkv,
        min_listing_sku,
        is_new_listing,
        cnt_sku_active,
        avg_sale_compare,
        is_keeping
        FROM {catalog_name}.udp_wcm_gold_vcm_dwh.d_dynamic_assorment;
""")