In [0]:
dbutils.widgets.text("proc_date", "")
proc_date = dbutils.widgets.get("proc_date")

In [0]:
%run ../../../utils/common

In [0]:
dbutils.widgets.text("environment", "", "")
environment = dbutils.widgets.get("environment")

In [0]:
catalog_name = settings[environment]['catalog_name']

In [0]:
create_temp_view_with_clean_columns(
    spark,
    catalog_name,
    'udp_wcm_bronze_sap_bw',
    'sap_bw_wcm_vcdmdmm09',
    proc_date,
    'temp_sap_assortment_site'
)

In [0]:
spark.sql(f"""
CREATE TABLE IF NOT EXISTS {catalog_name}.udp_wcm_silver_sap_bw.sap_assortment_site 
(	
	hash_id BIGINT
	,assortment STRING
	,lfdnr STRING
	,site STRING
	,date_from DATE
	,date_to DATE
	,aedat DATE
	,proc_date DATE
	,file_creation_ts TIMESTAMP
	,start_date DATE
	,end_date DATE
	,is_delete INT
	)
TBLPROPERTIES (
  'DELTA.AUTOOPTIMIZE.OPTIMIZEWRITE' = 'TRUE',
  'DELTA.AUTOOPTIMIZE.AUTOCOMPACT' = 'TRUE'
)
 """)

In [0]:
spark.sql(f"""
    CREATE OR REPLACE TEMP VIEW sap_assortment_site
    AS
    WITH temp AS (
    SELECT * EXCEPT(rn) FROM (
        SELECT *, ROW_NUMBER() OVER (PARTITION BY assortment, site, date_from, date_to ,DATE(proc_date) ORDER BY proc_date) rn FROM (
            SELECT LTRIM('0',asort) AS assortment
            ,LTRIM('0',lfdnr) AS lfdnr
            ,LTRIM('0',locnr) AS site
            ,TO_DATE(datab, 'yyyyMMdd') AS date_from
            ,TO_DATE(datbi, 'yyyyMMdd') AS date_to
            ,TO_DATE(aedat, 'yyyyMMdd') AS aedat
            ,proc_date
            ,file_creation_ts
            FROM temp_sap_assortment_site ) a
        ) WHERE rn = 1
    ) 
    ,main AS (
    SELECT farm_fingerprint(CONCAT(
                COALESCE(CAST(assortment AS STRING), ""),
                COALESCE(CAST(lfdnr AS STRING), ""),
                COALESCE(CAST(site AS STRING), ""),
                COALESCE(CAST(date_from AS STRING), ""),
                COALESCE(CAST(date_to AS STRING), "")
            )) hash_id,*, DATE(proc_date) start_date FROM temp
    ),
    main_2 AS (
        SELECT *
        , LAG(hash_id,1) OVER (PARTITION BY assortment, site, date_from, date_to ORDER BY proc_date) hash_id_prev
            FROM main
    ),
    main_3 AS (
        SELECT * EXCEPT(hash_id_prev) FROM main_2 WHERE (hash_id != hash_id_prev OR hash_id_prev IS NULL)
    ),
    main_4 AS (
        SELECT *
        , LEAD(DATE(proc_date)) OVER (PARTITION BY assortment, site, date_from, date_to ORDER BY proc_date ASC) end_date
        , MAX(aedat) OVER (PARTITION BY assortment, site) max_aedat FROM main_3
    )
    SELECT 
    * EXCEPT(end_date, max_aedat)
    , COALESCE(DATE_ADD(end_date,-1)
    ,CASE WHEN max_aedat = aedat THEN '2400-01-01' ELSE TO_DATE(proc_date) END) AS end_date
    ,CASE WHEN max_aedat = aedat THEN 0 ELSE 1 END AS is_delete FROM main_4
    """)

In [0]:
spark.sql(f"""
    UPDATE {catalog_name}.udp_wcm_silver_sap_bw.sap_assortment_site
    SET is_delete = 1,
        proc_date = DATE_FROM_UNIX_DATE({proc_date}),
        end_date = DATE_FROM_UNIX_DATE({proc_date})
    WHERE assortment || '_' || site NOT IN (
        SELECT assortment || '_' || site FROM sap_assortment_site
    ) AND is_delete = 0;
""")

In [0]:
spark.sql(f"""
    MERGE INTO {catalog_name}.udp_wcm_silver_sap_bw.sap_assortment_site inf_tbl
    USING (
        SELECT assortment , lfdnr, site, date_from, date_to, hash_id, start_date 
        FROM sap_assortment_site
        WHERE end_date = DATE '2400-01-01'
    ) tmp_tbl
    ON inf_tbl.assortment = tmp_tbl.assortment
    AND  inf_tbl.lfdnr = tmp_tbl.lfdnr
    AND inf_tbl.site = tmp_tbl.site
    AND inf_tbl.date_from = tmp_tbl.date_from 
    AND inf_tbl.date_to = tmp_tbl.date_to 
    AND inf_tbl.hash_id != tmp_tbl.hash_id 
    AND inf_tbl.end_date = DATE '2400-01-01'
    WHEN MATCHED THEN 
    UPDATE SET inf_tbl.end_date = DATE_ADD(tmp_tbl.start_date, -1), is_delete = 0;
""")

In [0]:
spark.sql(f"""
    MERGE INTO {catalog_name}.udp_wcm_silver_sap_bw.sap_assortment_site inf_tbl
    USING sap_assortment_site tmp_tbl
    ON inf_tbl.hash_id = tmp_tbl.hash_id
    AND inf_tbl.end_date = DATE '2400-01-01'
    WHEN NOT MATCHED THEN 
    INSERT *;
""")