In [0]:
dbutils.widgets.text("proc_date", "")
proc_date = dbutils.widgets.get("proc_date")

In [0]:
%run ../../../utils/common

In [0]:
dbutils.widgets.text("environment", "", "")
environment = dbutils.widgets.get("environment")

In [0]:
catalog_name = settings[environment]['catalog_name']

In [0]:
create_temp_view_with_clean_columns(
    spark,
    catalog_name,
    'udp_wcm_bronze_sap_bw',
    'sap_bw_wcm_vccmdm40',
    proc_date,
    'temp_sap_vccmdm40'
)

In [0]:
spark.sql(f"""
CREATE TABLE IF NOT EXISTS {catalog_name}.udp_wcm_silver_sap_bw.sap_vccmdm40
(
  hash_id BIGINT,
  reqtsn STRING,
  datapakid STRING,
  record INT,
  recordmode STRING,
  asort STRING,
  lfdnr STRING,
  locnr STRING,
  kunnr STRING,
  vkorg STRING,
  vtweg STRING,
  spart STRING,
  sonut STRING,
  datab DATE,
  datbi DATE,
  rangf STRING,
  ligrd STRING,
  ernam STRING,
  erzet STRING,
  aenam STRING,
  aedat DATE,
  aezet STRING,
  proc_date DATE,
  file_creation_ts TIMESTAMP
)
TBLPROPERTIES (
  'DELTA.AUTOOPTIMIZE.OPTIMIZEWRITE' = 'TRUE',
  'DELTA.AUTOOPTIMIZE.AUTOCOMPACT' = 'TRUE'
)
""")

In [0]:
spark.sql(f"""
    CREATE OR REPLACE TEMP VIEW sap_vccmdm40 AS
    SELECT
        farm_fingerprint(CONCAT(
            COALESCE(CAST(recordmode AS STRING), ""), 
            COALESCE(CAST(asort AS STRING), ""),
            COALESCE(CAST(lfdnr AS STRING), "")
        )) AS hash_id,
        reqtsn,
        datapakid,
        record,
        {catalog_name}.DEFAULT.fn_CLEAN_SAP_STRING(recordmode) AS recordmode,
        {catalog_name}.DEFAULT.fn_CLEAN_SAP_STRING(asort) AS asort,
        {catalog_name}.DEFAULT.fn_CLEAN_SAP_STRING(lfdnr) AS lfdnr,
        {catalog_name}.DEFAULT.fn_CLEAN_SAP_STRING(locnr) AS locnr,
        {catalog_name}.DEFAULT.fn_CLEAN_SAP_STRING(kunnr) AS kunnr,
        {catalog_name}.DEFAULT.fn_CLEAN_SAP_STRING(vkorg) AS vkorg,
        {catalog_name}.DEFAULT.fn_CLEAN_SAP_STRING(vtweg) AS vtweg,
        {catalog_name}.DEFAULT.fn_CLEAN_SAP_STRING(spart) AS spart,
        {catalog_name}.DEFAULT.fn_CLEAN_SAP_STRING(sonut) AS sonut,
        TO_DATE({catalog_name}.DEFAULT.fn_CLEAN_SAP_STRING(datab), "yyyyMMdd") AS datab,
        TO_DATE({catalog_name}.DEFAULT.fn_CLEAN_SAP_STRING(datbi), "yyyyMMdd") AS datbi,
        {catalog_name}.DEFAULT.fn_CLEAN_SAP_STRING(rangf) AS rangf,
        {catalog_name}.DEFAULT.fn_CLEAN_SAP_STRING(ligrd) AS ligrd,
        {catalog_name}.DEFAULT.fn_CLEAN_SAP_STRING(ernam) AS ernam,
        TO_TIMESTAMP({catalog_name}.DEFAULT.fn_CLEAN_SAP_STRING(erzet), "HHmmss") AS erzet,
        {catalog_name}.DEFAULT.fn_CLEAN_SAP_STRING(aenam) AS aenam,
        TO_DATE({catalog_name}.DEFAULT.fn_CLEAN_SAP_STRING(aedat), "yyyyMMdd") AS aedat,
        {catalog_name}.DEFAULT.fn_CLEAN_SAP_STRING(aezet) AS aezet,
        proc_date,
        file_creation_ts
    FROM (
        SELECT * EXCEPT(rn) 
        FROM (
            SELECT * , proc_date, ROW_NUMBER() OVER (PARTITION BY reqtsn, datapakid, record ORDER BY proc_date DESC) AS rn 
            FROM temp_sap_vccmdm40
        ) a 
        WHERE rn = 1
    ) a;
    """)

In [0]:
spark.sql(f"""
    MERGE INTO {catalog_name}.udp_wcm_silver_sap_bw.sap_vccmdm40 AS main_tbl
    USING SAP_VCCMDM40 AS temp_tbl
    ON main_tbl.HASH_ID = temp_tbl.HASH_ID
    AND main_tbl.REQTSN = temp_tbl.REQTSN
    AND main_tbl.DATAPAKID = temp_tbl.DATAPAKID
    AND main_tbl.RECORD = temp_tbl.RECORD
    WHEN NOT MATCHED THEN 
    INSERT * """)