In [0]:
dbutils.widgets.text("proc_date", "")
proc_date = dbutils.widgets.get("proc_date")

In [0]:
%run ../../../utils/common

In [0]:
dbutils.widgets.text("environment", "")
environment = dbutils.widgets.get("environment")

In [0]:
catalog_name = settings[environment]['catalog_name']

In [0]:
create_temp_view_with_clean_columns(
    spark,
    catalog_name,
    'udp_wcm_bronze_sap_bw',
    'sap_bw_wcm_vccmdm37',
    proc_date,
    'temp_sap_vccmdm37'
)

In [0]:
spark.sql(f'''
CREATE TABLE IF NOT EXISTS {catalog_name}.udp_wcm_silver_sap_bw.sap_vccmdm37
(
  hash_id BIGINT,
  recordmode STRING,
  knumh STRING,
  kappl STRING,
  kschl STRING,
  lifnr STRING,
  matnr STRING,
  werks STRING,
  ekorg STRING,
  esokz STRING,
  bstme STRING,
  datbi DATE,
  datab DATE,
  proc_date DATE
) 
TBLPROPERTIES (
    'DELTA.AUTOOPTIMIZE.OPTIMIZEWRITE' = 'true',
    'DELTA.AUTOOPTIMIZE.AUTOCOMPACT' = 'true'
)''')

In [0]:
spark.sql(f'''
CREATE OR REPLACE TEMP VIEW view_sap_vccmdm37 
AS
WITH temp AS (
SELECT  reqtsn
        ,datapakid
        ,record
        ,recordmode
        ,IF(knumh = '',NULL,LTRIM('0',knumh)) as knumh
        ,IF(kappl = '',NULL,kappl) as kappl
        ,IF(kschl = '',NULL,kschl) as kschl
        ,IF(lifnr = '',NULL,LTRIM('0',lifnr)) as lifnr
        ,IF(matnr = '',NULL,LTRIM('0',matnr) ) as matnr
        ,IF(werks = '',NULL,werks)  as werks
        ,IF(ekorg = '',NULL,ekorg ) as ekorg
        ,IF(esokz = '',NULL,esokz) as esokz
        ,IF(bstme = '',NULL,bstme) as bstme
        ,to_date(IF(datbi = '',NULL,IF(datbi = '20183112',NULL,datbi) ),'yyyyMMdd') as datbi
        ,to_date(IF(datab = '',NULL,IF(datbi = '20183112',NULL,datab) ),'yyyyMMdd') as datab
        ,proc_date
         FROM (
                    SELECT * EXCEPT(rn) FROM (
                        SELECT *, ROW_NUMBER() OVER (PARTITION BY reqtsn, datapakid, record ORDER BY proc_date DESC) rn FROM  temp_sap_vccmdm37  
                    ) a WHERE rn = 1 
                ) a
),
main AS (
SELECT FARM_FINGERPRINT(CONCAT(
            IFNULL(CAST(kappl AS STRING), ""),
            IFNULL(CAST(kschl AS STRING), ""),
            IFNULL(CAST(lifnr AS STRING), ""),
            IFNULL(CAST(matnr AS STRING), ""),
            IFNULL(CAST(werks AS STRING), ""),
            IFNULL(CAST(ekorg AS STRING), ""),
            IFNULL(CAST(esokz AS STRING), ""),
            IFNULL(CAST(bstme AS STRING), ""),
            IFNULL(CAST(datbi AS STRING), "")
        )) as hash_id,
        * FROM temp
),
main_2 AS (
    SELECT * 
    , ROW_NUMBER() OVER (PARTITION BY hash_id ORDER BY reqtsn DESC,datapakid DESC,record  DESC) rn 
        FROM main
)
SELECT * EXCEPT(rn,reqtsn,datapakid,record) FROM main_2 WHERE rn = 1;
''')

In [0]:
exclude_columns = ["hash_id"]  # Columns to exclude from update
source_columns = spark.table("view_sap_vccmdm37").columns
insert_columns = ", ".join(source_columns)
insert_values = ", ".join([f"temp_tbl.{col}" for col in source_columns])

update_columns = [f"main_tbl.{col} = temp_tbl.{col}" 
                 for col in source_columns 
                 if col.lower() not in [e.lower() for e in exclude_columns]]
update_set = ",\n    ".join(update_columns)

In [0]:
spark.sql(f"""
MERGE INTO {catalog_name}.udp_wcm_silver_sap_bw.sap_vccmdm37 main_tbl
USING view_sap_vccmdm37 temp_tbl
ON main_tbl.hash_id = temp_tbl.hash_id
WHEN MATCHED THEN UPDATE SET {update_set}
WHEN NOT MATCHED THEN INSERT ({insert_columns}) VALUES ({insert_values})

""")