In [0]:
dbutils.widgets.text("proc_date", "")
proc_date = dbutils.widgets.get("proc_date")

In [0]:
%run ../../../utils/common

In [0]:
dbutils.widgets.text("environment", "", "")
environment = dbutils.widgets.get("environment")

In [0]:
catalog_name = settings[environment]['catalog_name']

In [0]:
create_temp_view_with_clean_columns(
    spark,
    catalog_name,
    'udp_wcm_bronze_sap_bw',
    'sap_bw_wcm_zc_mchlv1',
    proc_date,
    'temp_sap_zc_mchlv1'
)

In [0]:
spark.sql(f"""
CREATE TABLE IF NOT EXISTS {catalog_name}.udp_wcm_silver_sap_bw.sap_zc_mchlv1
( 
 hash_id BIGINT,
 _bic_zc_mchlv1 STRING,
 txtsh STRING,
 txtmd STRING,
 file_creation_ts TIMESTAMP,
 proc_date DATE,
 start_date DATE,
 end_date DATE
) 
TBLPROPERTIES (
  'DELTA.AUTOOPTIMIZE.OPTIMIZEWRITE' = 'TRUE',
  'DELTA.AUTOOPTIMIZE.AUTOCOMPACT' = 'TRUE'
)
""")

In [0]:
spark.sql(f"""
	CREATE OR REPLACE TEMP VIEW sap_zc_mchlv1
	AS
	WITH temp AS (
		SELECT * EXCEPT(rn) FROM (
			SELECT *, ROW_NUMBER() OVER (PARTITION BY _bic_zc_mchlv1, DATE(proc_date) ORDER BY proc_date) rn FROM (
				SELECT * FROM temp_sap_zc_mchlv1
			) a
		) a WHERE rn = 1
	),
	main AS (
		SELECT farm_fingerprint(CONCAT(
				ifnull(CAST(_bic_zc_mchlv1 AS STRING), ""), 
				ifnull(CAST(txtsh AS STRING), ""),
				ifnull(CAST(txtmd AS STRING), "")
			)) hash_id,
			*, 
			DATE(proc_date) start_date 
		FROM temp
	),
	main_2 AS (
		SELECT *, 
			LAG(hash_id, 1) OVER (PARTITION BY _bic_zc_mchlv1 ORDER BY proc_date) hash_id_prev 
		FROM main
	),
	main_3 AS (
		SELECT * EXCEPT(hash_id_prev) 
		FROM main_2 
		WHERE (hash_id != hash_id_prev OR hash_id_prev IS NULL)
	),
	main_4 AS (
		SELECT *, 
			LEAD(DATE(proc_date)) OVER (PARTITION BY _bic_zc_mchlv1 ORDER BY proc_date ASC) end_date 
		FROM main_3
	)
	SELECT 
		* EXCEPT(end_date), 
		IFNULL(date_add(end_date, -1), '2400-01-01') end_date 
	FROM main_4
""")

In [0]:

spark.sql(f"""
	MERGE INTO {catalog_name}.udp_wcm_silver_sap_bw.sap_zc_mchlv1 inf_tbl
	USING (
		SELECT _bic_zc_mchlv1,hash_id, start_date FROM sap_zc_mchlv1
			WHERE end_date =CAST('2400-01-01' as DATE) 
	) tmp_tbl
	ON inf_tbl._bic_zc_mchlv1 = tmp_tbl._bic_zc_mchlv1 AND inf_tbl.hash_id != tmp_tbl.hash_id AND inf_tbl.end_date = CAST('2400-01-01' as DATE)
	WHEN MATCHED THEN UPDATE SET inf_tbl.end_date = date_add(tmp_tbl.start_date,-1) """)

In [0]:
spark.sql(f"""
	MERGE INTO {catalog_name}.udp_wcm_silver_sap_bw.sap_zc_mchlv1 inf_tbl
		USING  sap_zc_mchlv1 tmp_tbl
		ON inf_tbl.hash_id = tmp_tbl.hash_id  AND inf_tbl.end_date =CAST('2400-01-01' as DATE)
		WHEN NOT MATCHED THEN INSERT * """)