In [0]:
%run ../utils/common

In [0]:
dbutils.widgets.text("environment", "", "")
environment = dbutils.widgets.get("environment")

In [0]:
catalog_name = settings[environment]['catalog_name']

In [0]:
spark.sql(f"""
CREATE OR REPLACE TEMPORARY FUNCTION fn_MIN_CONTINUOUS_DATE(arr ARRAY<DATE>)
RETURNS DATE
RETURN (
    SELECT MIN(DT)
    FROM (
        SELECT DT, ROW_NUMBER() OVER (ORDER BY DT) AS RN
        FROM (
            SELECT explode(arr) AS DT
        ) AS exploded_arr
    ) AS numbered_arr
    GROUP BY date_sub(DT, RN)
    ORDER BY MIN(DT) DESC
    LIMIT 1
)
""")

In [0]:
spark.sql(f"""
CREATE OR REPLACE TABLE {catalog_name}.udp_wcm_gold_vcm_dwh.d_vendor_sku_prev
TBLPROPERTIES (
  'DELTA.AUTOOPTIMIZE.OPTIMIZEWRITE' = 'TRUE',
  'DELTA.AUTOOPTIMIZE.AUTOCOMPACT' = 'TRUE'
)
 AS
WITH
assorment AS (
    SELECT 
        a.start_date, TRIM(b.name) assortment_name, a.store_id, 
        c.mch3_id, c.mch3_name, a.product_id, c.product_name, c.manufacturer_id, c.manufacturer_name, c.sub_manufacturer_id, c.sub_manufacturer_name,
        a.assortment
    FROM {catalog_name}.udp_wcm_gold_vcm_dwh.d_assortment_scd a
    LEFT JOIN {catalog_name}.udp_wcm_gold_vcm_dwh.d_assortment_desc b
        ON a.assortment = b.assortment
    LEFT JOIN {catalog_name}.udp_wcm_gold_vcm_dwh.d_product c
        ON a.product_id = c.product_id
    LEFT JOIN {catalog_name}.udp_wcm_gold_vcm_dwh.d_store d
        ON a.store_id = d.store_id
    WHERE a.status = 'ACTIVE' 
        AND DATE(CURRENT_DATE() + INTERVAL 7 HOUR) BETWEEN a.start_date AND a.end_date
        AND b.asrt_group = 'DC_WCM'
        AND d.distribution_channel = '90'
        AND IFNULL(c.product_status, 'NULL') <> 'Z9'
),
pvendor AS (
    SELECT LTRIM('0', vendor_id) vendor_id, company_name vendor_name
    FROM {catalog_name}.udp_wcm_gold_vcm_dwh.d_vendor_master
),
assorment_full as (
   SELECT DISTINCT
    RIGHT(a.assortment_name, LENGTH(a.assortment_name) - 2) region,
    CAST(MAX(a.start_date) OVER (PARTITION BY a.assortment_name, a.product_id) AS STRING) start_date,
    a.mch3_id,
    a.mch3_name,
    a.product_id,
    a.product_name,
    a.manufacturer_id,
    a.manufacturer_name,
    a.sub_manufacturer_id,
    a.sub_manufacturer_name sub_manufacturer,
    a.assortment
FROM assorment a
),
assorment_vendor AS (
    SELECT
        DISTINCT
        RIGHT(a.assortment_name, LENGTH(a.assortment_name) - 2) region, b.supply_region,
        CAST(MAX(a.start_date) OVER(PARTITION BY a.assortment_name, a.product_id) AS STRING) start_date,
        a.mch3_id, a.mch3_name, a.product_id, a.product_name, a.manufacturer_id, a.manufacturer_name, a.sub_manufacturer_id, a.sub_manufacturer_name sub_manufacturer, a.assortment,
        b.uom, COALESCE(d.numerator / NULLIF(d.denomintr, 0), 1) AS conversion,
        IF(d.barcode LIKE '205%', 1, 0) barcode_priority,
        IF(e.order_unit IS NOT NULL AND d.barcode NOT LIKE '205%', 0, 1) pir_priority,
        IF(f.order_unit IS NOT NULL AND d.barcode NOT LIKE '205%', 0, 1) order_unit_priority,
        LTRIM('0', b.vendor) vendor_id, c.vendor_name, b.vendor_subrange, b.valid_from, b.valid_to,
        b.max_grdate,
        b.max_grdate_sub_mnf,
        b.sell_price
    FROM assorment a
    INNER JOIN {catalog_name}.udp_wcm_gold_vcm_dwh.d_vendor_by_dc_sku b
        ON a.store_id = b.dc_id AND a.product_id = b.product_id
    LEFT JOIN pvendor c
        ON LTRIM('0', b.vendor) = c.vendor_id
    LEFT JOIN {catalog_name}.udp_wcm_gold_vcm_dwh.d_product_unit d
        ON a.product_id = d.product_id AND b.uom = d.unit
    LEFT JOIN {catalog_name}.udp_wcm_gold_vcm_dwh.d_pir_current e
        ON LTRIM('0', b.vendor) = LTRIM('0', e.vendor_id) AND a.product_id = e.product_id AND b.uom = e.order_unit
    LEFT JOIN {catalog_name}.udp_wcm_gold_vcm_dwh.d_product f
        ON a.product_id = f.product_id AND b.uom = f.order_unit
),
assorment_vendor_rn AS (
    SELECT *,
        ROW_NUMBER() OVER(PARTITION BY region, product_id 
            ORDER BY 
                LEFT(supply_region, 1), 
                pir_priority, 
                order_unit_priority, 
                conversion, barcode_priority, 
                COALESCE(max_grdate, '2016-01-01') DESC, COALESCE(max_grdate_sub_mnf, '2016-01-01') DESC, COALESCE(valid_from, '2016-01-01') DESC) AS rn
    FROM assorment_vendor
),
filtered_assorment_vendor_rn AS (
    SELECT *
    FROM assorment_vendor_rn
    WHERE rn = 1
),
group_region AS (
    SELECT
        *,
        TRIM(REGEXP_REPLACE(UPPER(region), 'WMP |WMT |WCM |WMP|WMT|WCM', '')) parent_region,
        CASE
            WHEN UPPER(region) RLIKE 'WMT' THEN '1500'
            WHEN UPPER(region) RLIKE 'WMP' THEN '2000'
            ELSE '9999'
        END business_unit  
    FROM filtered_assorment_vendor_rn
),
group_region_vendor AS (
    SELECT 
        DISTINCT
        start_date,
        parent_region, 
        product_id, 
        uom,
        vendor_id,
        vendor_name,
        vendor_subrange,
        ROW_NUMBER() OVER(PARTITION BY parent_region, product_id 
            ORDER BY 
                business_unit, 
                pir_priority, 
                order_unit_priority, 
                conversion, barcode_priority, 
                COALESCE(max_grdate, '2016-01-01') DESC, COALESCE(max_grdate_sub_mnf, '2016-01-01') DESC, COALESCE(valid_from, '2016-01-01') DESC) AS rn
    FROM group_region
),
filtered_group_region_vendor AS (
    SELECT *
    FROM group_region_vendor
    WHERE rn = 1
),
final_vendor AS (
    SELECT
        DISTINCT
        a.region,
        a.mch3_id,
        a.mch3_name,
        a.product_id,
        a.product_name,
        a.manufacturer_id,
        a.manufacturer_name,
        a.sub_manufacturer_id,
        a.sub_manufacturer,
        b.uom,
        a.assortment,
        b.vendor_id,
        b.vendor_name,
        b.vendor_subrange,
        a.sell_price
    FROM group_region a
    LEFT JOIN filtered_group_region_vendor b
        ON a.parent_region = b.parent_region AND a.product_id = b.product_id
),
check_list_in AS (
    SELECT
    region,
    product_id,
    DATE_SUB(fn_MIN_CONTINUOUS_DATE(COLLECT_LIST(calday)), 1) AS roll_out
FROM (
    SELECT
        calday,
        region,
        product_id
    FROM {catalog_name}.udp_wcm_gold_vcm_dwh.d_vendor_sku_his
    WHERE calday <= DATE_SUB(DATE(CURRENT_TIMESTAMP() + INTERVAL 7 HOURS), 1)

    UNION ALL

    SELECT
        DATE(CURRENT_TIMESTAMP() + INTERVAL 7 HOURS) AS calday,
        region,
        product_id
    FROM final_vendor
    WHERE vendor_id iS NOT NULL
) AS combined
    GROUP BY 1,2
),
main AS (
    SELECT
        CAST(c.roll_out AS STRING) roll_out,
        a.region,
        a.mch3_id,
        a.mch3_name,
        a.product_id,
        a.product_name,
        a.manufacturer_id,
        a.manufacturer_name,
        a.sub_manufacturer_id,
        a.sub_manufacturer,
        b.uom,
        b.assortment,
        b.vendor_id,
        b.vendor_name,
        b.vendor_subrange,
        b.sell_price
    FROM assorment_full a
    LEFT JOIN final_vendor b
      USING(region, assortment, product_id)
    LEFT JOIN check_list_in c
    ON a.region = c.region AND a.product_id = c.product_id
)
SELECT *
FROM main
""")