In [0]:
%run ../utils/common

In [0]:
dbutils.widgets.text("environment", "", "")
environment = dbutils.widgets.get("environment")

In [0]:
catalog_name = settings[environment]['catalog_name']

In [0]:
spark.sql(f"""
CREATE TABLE IF NOT EXISTS {catalog_name}.udp_wcm_gold_vcm_dwh.d_vendor_sku_in_out_daily
(
  calday DATE,
  roll_out STRING,
  region STRING,
  mch3_id STRING,
  mch3_name STRING,
  product_id STRING,
  product_name STRING,
  manufacturer_id STRING,
  manufacturer_name STRING,
  sub_manufacturer STRING,
  uom STRING,
  assortment STRING,
  vendor_id STRING,
  vendor_name STRING,
  vendor_subrange STRING,
  in_out_type STRING
)
TBLPROPERTIES (
  'DELTA.AUTOOPTIMIZE.OPTIMIZEWRITE' = 'TRUE',
  'DELTA.AUTOOPTIMIZE.AUTOCOMPACT' = 'TRUE'
);
""")

In [0]:
spark.sql(f"""
DELETE FROM {catalog_name}.udp_wcm_gold_vcm_dwh.d_vendor_sku_in_out_daily
WHERE CALDAY = date_sub(current_timestamp() + INTERVAL 7 HOUR, 1);   
""")

In [0]:
spark.sql(f"""
CREATE OR REPLACE TEMP VIEW temp_d_vendor_sku_in_out_daily AS
WITH 
temp0 AS (
    SELECT DISTINCT region, product_id, calday
    FROM {catalog_name}.udp_wcm_gold_vcm_dwh.d_vendor_sku_his
    WHERE calday BETWEEN date_sub(current_timestamp() + INTERVAL 7 HOUR, 1) 
        AND DATE(current_timestamp() + INTERVAL 7 HOUR)
),
temp1 AS (
    SELECT 
        *,
        LEAD(calday) OVER (PARTITION BY region, product_id ORDER BY calday DESC) AS lead_calday,
        LAG(calday) OVER (PARTITION BY region, product_id ORDER BY calday DESC) AS lag_calday
    FROM temp0
)
SELECT 
    date_sub(current_timestamp() + INTERVAL 7 HOUR, 1) AS calday,
    h.region,
    h.product_id,
    
    'IN' AS in_out_type, h.roll_out, h.mch3_id, h.mch3_name, h.product_name, h.manufacturer_id, h.manufacturer_name, h.sub_manufacturer, h.uom, h.assortment, h.vendor_id, h.vendor_name, h.vendor_subrange
FROM temp1 t
INNER JOIN {catalog_name}.udp_wcm_gold_vcm_dwh.d_vendor_sku_his h 
    ON t.region = h.region 
    AND t.product_id = h.product_id
    AND t.calday = h.calday
WHERE t.calday = date(current_timestamp() + INTERVAL 7 HOUR)
    AND lead_calday IS NULL

UNION ALL

SELECT 
    date_sub(current_timestamp() + INTERVAL 7 HOUR, 1) AS calday,
    h.region,
    h.product_id,
    
    'OUT' AS in_out_type, h.roll_out, h.mch3_id, h.mch3_name, h.product_name, h.manufacturer_id, h.manufacturer_name, h.sub_manufacturer, h.uom, h.assortment, h.vendor_id, h.vendor_name, h.vendor_subrange
FROM temp1 t
INNER JOIN {catalog_name}.udp_wcm_gold_vcm_dwh.d_vendor_sku_his h 
    ON t.region = h.region 
    AND t.product_id = h.product_id
    AND t.calday = h.calday
WHERE t.calday = date_sub(current_timestamp() + INTERVAL 7 HOUR, 1)
    AND lag_calday IS NULL
""")

In [0]:
spark.sql(f"""
INSERT INTO {catalog_name}.udp_wcm_gold_vcm_dwh.d_vendor_sku_in_out_daily
SELECT * FROM temp_d_vendor_sku_in_out_daily
""")