In [0]:
dbutils.widgets.text("proc_date", "")
proc_date = dbutils.widgets.get("proc_date")

In [0]:
%run ../../../utils/common

In [0]:
dbutils.widgets.text("environment", "", "")
environment = dbutils.widgets.get("environment")

In [0]:
catalog_name = settings[environment]['catalog_name']

In [0]:
create_temp_view_with_clean_columns(
    spark,
    catalog_name,
    'udp_wcm_bronze_sap_bw',
    'sap_bw_wcm_zc_manftr',
    proc_date,
    'temp_sap_zc_manftr'
)

In [0]:
spark.sql(f"""
CREATE TABLE IF NOT EXISTS {catalog_name}.udp_wcm_silver_sap_bw.sap_zc_manftr
( 
  hash_id BIGINT,
  _bic_zc_manftr STRING,
  langu STRING,
  txtmd STRING,
  proc_date DATE,
  file_creation_ts TIMESTAMP,
  start_date DATE,
  end_date DATE
)
TBLPROPERTIES (
  'DELTA.AUTOOPTIMIZE.OPTIMIZEWRITE' = 'true',
  'DELTA.AUTOOPTIMIZE.AUTOCOMPACT' = 'true'
)
""")

In [0]:
spark.sql(f"""
CREATE OR REPLACE TEMP VIEW sap_zc_manftr AS
WITH temp AS (
    SELECT * EXCEPT(rn)
    FROM (
        SELECT *, 
               ROW_NUMBER() OVER (
                   PARTITION BY _bic_zc_manftr, DATE(proc_date) 
                   ORDER BY proc_date
               ) AS rn
        FROM (
            SELECT 
                LTRIM('0', _bic_zc_manftr) AS _bic_zc_manftr, 
                langu, 
                txtmd, 
                proc_date,
                file_creation_ts
            FROM temp_sap_zc_manftr
        ) a
    ) a
    WHERE rn = 1
),
main AS (
    SELECT 
        FARM_FINGERPRINT(CONCAT(
            COALESCE(CAST(_bic_zc_manftr AS STRING), ""), 
            COALESCE(CAST(langu AS STRING), ""),
            COALESCE(CAST(txtmd AS STRING), "")
        )) AS hash_id,
        *,
        DATE(proc_date) AS start_date
    FROM temp
),
main_2 AS (
    SELECT *,
           LAG(hash_id, 1) OVER (
               PARTITION BY _bic_zc_manftr 
               ORDER BY proc_date
           ) AS hash_id_prev
    FROM main
),
main_3 AS (
    SELECT * EXCEPT(hash_id_prev)
    FROM main_2
    WHERE hash_id != hash_id_prev OR hash_id_prev IS NULL
),
main_4 AS (
    SELECT *,
           LEAD(DATE(proc_date)) OVER (
               PARTITION BY _bic_zc_manftr 
               ORDER BY proc_date ASC
           ) AS end_date
    FROM main_3
)
SELECT 
    * EXCEPT(end_date),
    COALESCE(DATE_ADD(end_date, -1), '2400-01-01') AS end_date
FROM main_4
""")


In [0]:
spark.sql(f"""
MERGE INTO {catalog_name}.udp_wcm_silver_sap_bw.sap_zc_manftr inf_tbl
USING (
		select _bic_zc_manftr, hash_id, start_date from sap_zc_manftr 
			where end_date = DATE '2400-01-01' 
) tmp_tbl
ON inf_tbl._bic_zc_manftr = tmp_tbl._bic_zc_manftr AND inf_tbl.hash_id != tmp_tbl.hash_id AND inf_tbl.end_date = DATE '2400-01-01'
WHEN MATCHED THEN UPDATE SET inf_tbl.end_date = date_add(tmp_tbl.start_date,-1) """)

In [0]:
spark.sql(f"""
	MERGE INTO {catalog_name}.udp_wcm_silver_sap_bw.sap_zc_manftr inf_tbl
		USING sap_zc_manftr tmp_tbl
		ON inf_tbl.hash_id = tmp_tbl.hash_id  AND inf_tbl.end_date = DATE '2400-01-01'
		WHEN NOT MATCHED THEN INSERT * """)