In [0]:
dbutils.widgets.text("proc_date", "")
proc_date = dbutils.widgets.get("proc_date")

In [0]:
%run ../../../utils/common

In [0]:
dbutils.widgets.text("environment", "", "")
environment = dbutils.widgets.get("environment")

In [0]:
catalog_name = settings[environment]['catalog_name']

In [0]:
create_temp_view_with_clean_columns(
    spark,
    catalog_name,
    'udp_wcm_bronze_sap_bw',
    'sap_bw_wcm_prpa_mean',
    proc_date,
    'temp_prpa_mean'
)

In [0]:
spark.sql(f""" CREATE TABLE IF NOT EXISTS {catalog_name}.udp_wcm_silver_sap_bw.sap_prpa_mean
(
  hash_id BIGINT,
  rpa_lfnum STRING,
  rpa_disqu STRING,
  rpa_mean STRING,
  eanupc STRING,
  ean_numtyp STRING,
  rpa_hpean STRING,
  proc_date DATE,
  start_date DATE,
  end_date DATE,
  file_creation_ts TIMESTAMP
)
TBLPROPERTIES (
  'delta.autoOptimize.optimizeWrite' = 'true',
  'delta.autoOptimize.autoCompact' = 'true'
)""")

In [0]:
spark.sql(f"""
CREATE OR REPLACE TEMP VIEW sap_prpa_mean
AS
WITH temp AS (
SELECT * EXCEPT(rn) FROM (
    SELECT *, ROW_NUMBER() OVER (PARTITION BY rpa_lfnum, rpa_disqu, rpa_mean, DATE(proc_date) ORDER BY proc_date) rn FROM (
        SELECT 
                CASE WHEN rpa_lfnum = '' THEN NULL ELSE rpa_lfnum END AS rpa_lfnum,
                CASE WHEN rpa_disqu = '' THEN NULL ELSE rpa_disqu END AS rpa_disqu,
                CASE WHEN rpa_mean = '' THEN NULL ELSE rpa_mean END AS rpa_mean,
                CASE WHEN eanupc = '' THEN NULL ELSE eanupc END AS eanupc,
                CASE WHEN ean_numtyp = '' THEN NULL ELSE ean_numtyp END AS ean_numtyp,
                CASE WHEN rpa_hpean = '' THEN NULL ELSE rpa_hpean END AS rpa_hpean,
                proc_date,
                file_creation_ts,
                objvers,
                changed
        FROM temp_prpa_mean
        WHERE objvers = 'A'
            AND rpa_lfnum IS NOT NULL 
            AND rpa_disqu IS NOT NULL 
            AND rpa_mean IS NOT NULL
        ) a
    ) a WHERE rn = 1
),
main AS (
    SELECT 
        farm_fingerprint(
            CONCAT(
                    IFNULL(CAST(rpa_lfnum AS STRING), ""), 
                    IFNULL(CAST(rpa_disqu AS STRING), ""), 
                    IFNULL(CAST(rpa_mean AS STRING), ""), 
                    IFNULL(CAST(eanupc AS STRING), ""), 
                    IFNULL(CAST(ean_numtyp AS STRING), ""), 
                    IFNULL(CAST(rpa_hpean AS STRING), "")
            )
        ) hash_id,
        *, DATE(proc_date) start_date 
    FROM temp
),
main_2 AS (
    SELECT *, 
        LAG(hash_id,1) OVER (PARTITION BY rpa_lfnum, rpa_disqu, rpa_mean ORDER BY proc_date) hash_id_prev 
    FROM main
),
main_3 AS (
    SELECT * EXCEPT(hash_id_prev) 
    FROM main_2 
    WHERE (hash_id != hash_id_prev OR hash_id_prev IS NULL)
),
main_4 AS (
    SELECT *, 
        LEAD(DATE(proc_date)) OVER (PARTITION BY rpa_lfnum, rpa_disqu, rpa_mean ORDER BY proc_date ASC) end_date 
    FROM main_3
)
SELECT 
* EXCEPT(end_date)
    ,IFNULL(DATE_ADD(end_date,-1), '2400-01-01') end_date

FROM main_4;
""")

In [0]:
spark.sql(f""" 
MERGE INTO {catalog_name}.udp_wcm_silver_sap_bw.sap_prpa_mean inf_tbl
USING (
    SELECT 
        rpa_lfnum, 
        rpa_disqu, 
        rpa_mean,  
        hash_id,
        start_date 
    FROM sap_prpa_mean 
    WHERE end_date = DATE('2400-01-01') 
) tmp_tbl ON 
    inf_tbl.rpa_lfnum = tmp_tbl.rpa_lfnum 
    AND inf_tbl.rpa_disqu = tmp_tbl.rpa_disqu 
    AND inf_tbl.rpa_mean = tmp_tbl.rpa_mean 
    AND inf_tbl.hash_id != tmp_tbl.hash_id 
    AND inf_tbl.end_date = DATE('2400-01-01')
WHEN MATCHED THEN UPDATE 
SET inf_tbl.end_date = DATE_ADD(tmp_tbl.start_date,-1);
""")

In [0]:
spark.sql(f"""
MERGE INTO  {catalog_name}.udp_wcm_silver_sap_bw.sap_prpa_mean inf_tbl
    USING sap_prpa_mean tmp_tbl
    ON inf_tbl.hash_id = tmp_tbl.hash_id AND inf_tbl.end_date = DATE('2400-01-01')
WHEN NOT MATCHED THEN INSERT *;
""")