In [0]:
%run ../utils/common

In [0]:

dbutils.widgets.text("environment", "", "")
environment = dbutils.widgets.get("environment")

In [0]:
catalog_name = settings[environment]['catalog_name']

In [0]:
spark.sql(f"""
CREATE OR REPLACE TABLE {catalog_name}.udp_wcm_gold_db_winare_nonfresh_dc.d_leadtime_order_store
TBLPROPERTIES (
  'DELTA.AUTOOPTIMIZE.OPTIMIZEWRITE' = 'TRUE',
  'DELTA.AUTOOPTIMIZE.AUTOCOMPACT' = 'TRUE'
)
AS
WITH dc_golive AS (
    SELECT DISTINCT dc_id
    FROM {catalog_name}.udp_wcm_gold_db_winare_nonfresh_dc.d_cfg_winare_dc_golive
),
po_item AS (
    SELECT DISTINCT
        a.creation_date,
        a.po_doc,
        a.store_id,
        a.dc_id,
        a.gr_date,
        DATEDIFF(a.gr_date, a.creation_date) AS leadtime
    FROM {catalog_name}.udp_wcm_gold_vcm_dwh.f_po_item a
    LEFT JOIN {catalog_name}.udp_wcm_gold_vcm_dwh.d_store b
        ON a.store_id = b.store_id
    WHERE a.creation_date BETWEEN ADD_MONTHS(DATE(CURRENT_TIMESTAMP() + INTERVAL 7 HOURS), -6)
                             AND DATE_SUB(DATE(CURRENT_TIMESTAMP() + INTERVAL 7 HOURS), 1)
      AND a.po_type = 'Stock Transp. Order'
      AND DATEDIFF(a.gr_date, a.creation_date) >= 1
      AND b.business_unit <> '1100'
),
po_percentile AS (
    SELECT DISTINCT
        store_id,
        dc_id,
        percentile_approx(DATEDIFF(gr_date, creation_date), 0.25) OVER (PARTITION BY store_id, dc_id) AS q1_leadtime,
        percentile_approx(DATEDIFF(gr_date, creation_date), 0.5) OVER (PARTITION BY store_id, dc_id) AS q2_leadtime,
        percentile_approx(DATEDIFF(gr_date, creation_date), 0.75) OVER (PARTITION BY store_id, dc_id) AS q3_leadtime
    FROM po_item
),
min_max_avg AS (
    SELECT
        a.store_id,
        a.dc_id,
        AVG(DATEDIFF(gr_date, creation_date)) AS avg_leadtime,
        MIN(DATEDIFF(gr_date, creation_date)) AS min_leadtime,
        MAX(DATEDIFF(gr_date, creation_date)) AS max_leadtime,
        COALESCE(STDDEV(DATEDIFF(gr_date, creation_date)), 0) AS stddev_leadtime
    FROM po_item a
    INNER JOIN dc_golive b ON a.dc_id = b.dc_id
    GROUP BY 1, 2
)
SELECT 
    DATE_SUB(DATE(CURRENT_TIMESTAMP() + INTERVAL 7 HOURS), 1) AS calday,
    a.store_id,
    a.dc_id,
    LEAST(GREATEST(CEIL(a.avg_leadtime), 1), 14) AS leadtime,
    a.avg_leadtime,
    a.min_leadtime,
    a.max_leadtime,
    a.stddev_leadtime,
    b.q1_leadtime,
    b.q2_leadtime,
    b.q3_leadtime
FROM min_max_avg a
LEFT JOIN po_percentile b
    ON a.store_id = b.store_id AND a.dc_id = b.dc_id
""")