In [0]:
dbutils.widgets.text("proc_date", "")
proc_date = dbutils.widgets.get("proc_date")

In [0]:
%run ../../../utils/common

In [0]:
dbutils.widgets.text("environment", "", "")
environment = dbutils.widgets.get("environment")

In [0]:
catalog_name = settings[environment]['catalog_name']

In [0]:
create_temp_view_with_clean_columns(
    spark,
    catalog_name,
    'udp_wcm_bronze_sap_bw',
    'sap_bw_wcm_paddr_numbr',
    proc_date,
    'temp_sap_paddr_numbr'
)

In [0]:
spark.sql(f"""
CREATE TABLE IF NOT EXISTS {catalog_name}.udp_wcm_silver_sap_bw.sap_paddr_numbr
(
  hash_id BIGINT,
  addr_numbr STRING,
  email_addr STRING,
  name40 STRING,
  street60 STRING,
  str_suppl2 STRING,
  str_suppl3 STRING,
  str_suppl4 STRING,
  city_1 STRING,
  city_2 STRING,
  proc_date DATE,
  start_date DATE,
  end_date DATE,
  file_creation_ts TIMESTAMP
)
TBLPROPERTIES (
  'DELTA.AUTOOPTIMIZE.OPTIMIZEWRITE' = 'true',
  'DELTA.AUTOOPTIMIZE.AUTOCOMPACT' = 'true'
)
""")

In [0]:
spark.sql(f"""
CREATE OR REPLACE TEMP VIEW sap_paddr_numbr
AS
WITH 
temp AS (
SELECT * EXCEPT(rn) 
FROM (
    SELECT *, ROW_NUMBER() OVER (PARTITION BY addr_numbr,DATE(proc_date) ORDER BY proc_date) rn 
    FROM (
        SELECT 
            LTRIM('0',_bic_zc_adnumb) addr_numbr,
            IF(email_addr = '', NULL, email_addr) email_addr,
            IF(name40 = '', NULL, name40) name40,
            IF(street60 = '', NULL, street60) street60,
            IF(str_suppl2 = '', NULL, str_suppl2) str_suppl2,
            IF(str_suppl3 = '', NULL, str_suppl3) str_suppl3,
            IF(str_suppl4 = '', NULL, str_suppl4) str_suppl4,
            IF(city_1 = '', NULL, city_1) city_1,
            IF(city_2 = '', NULL, city_2) city_2,
            proc_date,
            file_creation_ts
        FROM  temp_sap_paddr_numbr
        ) a
    ) a WHERE rn = 1
),
main AS (
    SELECT 
        FARM_FINGERPRINT(CONCAT(
            IFNULL(CAST(addr_numbr AS STRING), ""), 
            IFNULL(CAST(email_addr AS STRING), ""), 
            IFNULL(CAST(name40 AS STRING), ""), 
            IFNULL(CAST(street60 AS STRING), ""), 
            IFNULL(CAST(str_suppl2 AS STRING), ""), 
            IFNULL(CAST(str_suppl3 AS STRING), ""), 
            IFNULL(CAST(str_suppl4 AS STRING), ""), 
            IFNULL(CAST(city_1 AS STRING), ""), 
            IFNULL(CAST(city_2 AS STRING), "")
        )) hash_id, 
        *, 
        DATE(proc_date) start_date 
    FROM temp
),
main_2 AS (
    SELECT *, LAG(hash_id,1) OVER (PARTITION BY addr_numbr ORDER BY proc_date) hash_id_prev 
    FROM main
),
main_3 AS (
    SELECT * EXCEPT(hash_id_prev) 
    FROM main_2 
    WHERE (hash_id != hash_id_prev OR hash_id_prev IS NULL)
),
main_4 AS (
    SELECT *, LEAD(DATE(proc_date)) OVER (PARTITION BY addr_numbr ORDER BY proc_date ASC) end_date 
    FROM main_3
)
SELECT 
    * EXCEPT(end_date), 
    IFNULL(DATE_ADD(end_date,-1), '2400-01-01') end_date 
FROM main_4 """)

In [0]:
spark.sql(f"""
MERGE INTO {catalog_name}.udp_wcm_silver_sap_bw.sap_paddr_numbr inf_tbl
USING (
    SELECT addr_numbr, hash_id, start_date 
    FROM sap_paddr_numbr
    WHERE end_date = CAST('2400-01-01' AS DATE)
) tmp_tbl 
ON inf_tbl.addr_numbr = tmp_tbl.addr_numbr 
    AND inf_tbl.hash_id != tmp_tbl.hash_id 
    AND inf_tbl.end_date = CAST('2400-01-01' AS DATE)
WHEN MATCHED THEN UPDATE SET inf_tbl.end_date = DATE_ADD(tmp_tbl.start_date,-1)""")

In [0]:
spark.sql(f"""
MERGE INTO {catalog_name}.udp_wcm_silver_sap_bw.sap_paddr_numbr inf_tbl
USING sap_paddr_numbr tmp_tbl 
ON inf_tbl.hash_id = tmp_tbl.hash_id  AND inf_tbl.end_date = DATE('2400-01-01')
WHEN NOT MATCHED THEN INSERT * """)