In [0]:
dbutils.widgets.text("proc_date", "")
proc_date = dbutils.widgets.get("proc_date")

In [0]:
%run ../../../utils/common

In [0]:
dbutils.widgets.text("environment", "", "")
environment = dbutils.widgets.get("environment")

In [0]:
catalog_name = settings[environment]['catalog_name']

In [0]:
spark.sql(f"""
CREATE TABLE IF NOT EXISTS {catalog_name}.udp_wcm_silver_sap_bw.sap_vccmdm36 (
	hash_id BIGINT,
  recordmode STRING,
  knumh STRING,
  kappl STRING,
  kschl STRING,
  lifnr STRING,
  matnr STRING,
  ekorg STRING,
  esokz STRING,
  bstme STRING,
  datbi DATE,
  datab DATE,
  proc_date DATE
	)
	TBLPROPERTIES 
	(
		DELTA.AUTOOPTIMIZE.OPTIMIZEWRITE = TRUE,
		DELTA.AUTOOPTIMIZE.AUTOCOMPACT = TRUE
	)
""")

In [0]:
create_temp_view_with_clean_columns(
    spark,
    catalog_name,
    'udp_wcm_bronze_sap_bw',
    'sap_bw_wcm_vccmdm36',
    proc_date,
    'temp_sap_vccmdm36'
)

In [0]:
spark.sql(f"""
CREATE OR REPLACE TEMP VIEW sap_vccmdm36 AS
WITH temp AS (
    SELECT 
        reqtsn,
        datapakid,
        record,
        recordmode,
        CASE WHEN knumh = '' THEN NULL ELSE LTRIM('0',knumh) END AS knumh,
        CASE WHEN kappl = '' THEN NULL ELSE kappl END AS kappl,
        CASE WHEN kschl = '' THEN NULL ELSE kschl END AS kschl,
        CASE WHEN lifnr = '' THEN NULL ELSE LTRIM('0',lifnr) END AS lifnr,
        CASE WHEN matnr = '' THEN NULL ELSE LTRIM('0',matnr) END AS matnr,
        CASE WHEN ekorg = '' THEN NULL ELSE ekorg END AS ekorg,
        CASE WHEN esokz = '' THEN NULL ELSE esokz END AS esokz,
        CASE WHEN bstme = '' THEN NULL ELSE bstme END AS bstme,
        CASE 
            WHEN datbi = '' OR datbi = '20183112' THEN NULL 
            ELSE TO_DATE(datbi, 'yyyyMMdd') 
        END AS datbi,
        CASE 
            WHEN datab = '' OR datab = '20183112' THEN NULL 
            ELSE TO_DATE(datab, 'yyyyMMdd') 
        END AS datab,
        proc_date
    FROM (
        SELECT * EXCEPT(rn) FROM (
            SELECT *, ROW_NUMBER() OVER (PARTITION BY reqtsn, datapakid, record ORDER BY proc_date DESC) AS rn 
            FROM temp_sap_vccmdm36
        ) a WHERE rn = 1
    ) a
),
main AS (
    SELECT 
        FARM_FINGERPRINT(CONCAT_WS('',
            COALESCE(CAST(kappl AS STRING), ''),
            COALESCE(CAST(kschl AS STRING), ''),
            COALESCE(CAST(lifnr AS STRING), ''),
            COALESCE(CAST(matnr AS STRING), ''),
            COALESCE(CAST(ekorg AS STRING), ''),
            COALESCE(CAST(esokz AS STRING), ''),
            COALESCE(CAST(bstme AS STRING), ''),
            COALESCE(CAST(datbi AS STRING), '')
        )) AS hash_id,
        *
    FROM temp
),
main_2 AS (
    SELECT *, 
           ROW_NUMBER() OVER (PARTITION BY hash_id ORDER BY reqtsn DESC, datapakid DESC, record DESC) AS rn
    FROM main
)
SELECT * EXCEPT(rn, reqtsn, datapakid, record)
FROM main_2
WHERE rn = 1
""")


In [0]:
spark.sql(f"""
MERGE INTO {catalog_name}.udp_wcm_silver_sap_bw.sap_vccmdm36 main_tbl
USING sap_vccmdm36 temp_tbl
ON main_tbl.hash_id = temp_tbl.hash_id 
WHEN MATCHED THEN UPDATE SET *
WHEN NOT MATCHED THEN INSERT *;
""")