In [0]:
dbutils.widgets.text("proc_date", "")
proc_date = dbutils.widgets.get("proc_date")

In [0]:
%run ../../../utils/common

In [0]:
dbutils.widgets.text("environment", "", "")
environment = dbutils.widgets.get("environment")

In [0]:
catalog_name = settings[environment]['catalog_name']

In [0]:
create_temp_view_with_clean_columns(
    spark,
    catalog_name,
    'udp_wcm_bronze_sap_bw',
    'sap_bw_wcm_vccposd50',
    proc_date,
    'temp_sap_vcdposd13'
)

In [0]:
spark.sql(f"""
CREATE TABLE IF NOT EXISTS {catalog_name}.udp_wcm_silver_sap_bw.sap_vcdposd13
( 
hash_id BIGINT,
reqtsn STRING,
datapakid STRING,
record INT,
recordmode STRING,
calday DATE,
weekday1 STRING,
calweek STRING,
calmonth STRING,
calyear STRING,
calmonth2 STRING,
calquart1 STRING,
halfyear1 STRING,
rt_locatio STRING,
consumer STRING,
rpa_rrc STRING,
rpa_rrg STRING,
rpa_trc STRING,
rpa_drc STRING,
rpa_drg STRING,
rpa_did STRING,
rpa_dqu STRING,
base_uom STRING,
sales_unit STRING,
doc_currcy STRING,
loc_currcy STRING,
rpa_tix STRING,
bic_zpos_tnr STRING,
material STRING,
segment STRING,
plant STRING,
rt_promo STRING,
rpa_tnr STRING,
rpa_ttc STRING,
rt_daytcl INT,
rt_salhour INT,
matl_group STRING,
rpa_pca STRING,
rpa_tcd STRING,
rpa_dep STRING,
rpa_wid STRING,
rt_offer STRING,
rpa_dtc STRING,
rpa_dtg STRING,
rpa_rqtyb FLOAT,
rpa_rqtyv FLOAT,
rpa_retsal FLOAT,
rpa_prmqty FLOAT,
base_qty FLOAT,
rpa_cnr FLOAT,
rpa_cst FLOAT,
rpa_prmsal FLOAT,
rpa_rlq FLOAT,
rpa_slc FLOAT,
rpa_psl FLOAT,
rpa_rsl FLOAT,
rpa_cco FLOAT,
rpa_rea FLOAT,
rpa_tam FLOAT,
rpa_tat FLOAT,
proc_date DATE,
file_creation_ts TIMESTAMP
)
TBLPROPERTIES (
  'DELTA.AUTOOPTIMIZE.OPTIMIZEWRITE' = 'true',
  'DELTA.AUTOOPTIMIZE.AUTOCOMPACT' = 'true'
)
""")

In [0]:
spark.sql(f"""
CREATE OR REPLACE TEMP VIEW sap_vcdposd13 AS 
    SELECT 
        farm_fingerprint(CONCAT(
            IFNULL(CAST(recordmode AS STRING), ""),
            IFNULL(CAST(calday AS STRING), ""),
            IFNULL(CAST(weekday1 AS STRING), ""),
            IFNULL(CAST(calweek AS STRING), ""),
            IFNULL(CAST(calmonth AS STRING), ""),
            IFNULL(CAST(calyear AS STRING), ""),
            IFNULL(CAST(calmonth2 AS STRING), ""),
            IFNULL(CAST(calquart1 AS STRING), ""),
            IFNULL(CAST(halfyear1 AS STRING), ""),
            IFNULL(CAST(base_uom AS STRING), ""),
            IFNULL(CAST(sales_unit AS STRING), ""),
            IFNULL(CAST(doc_currcy AS STRING), ""),
            IFNULL(CAST(loc_currcy AS STRING), ""),
            IFNULL(CAST(rpa_tix AS STRING), ""),
            IFNULL(CAST(_bic_zpos_tnr AS STRING), ""),
            IFNULL(CAST(material AS STRING), ""),
            IFNULL(CAST(segment AS STRING), ""),
            IFNULL(CAST(plant AS STRING), ""),
            IFNULL(CAST(rt_locatio AS STRING), ""),
            IFNULL(CAST(consumer AS STRING), ""),
            IFNULL(CAST(rt_promo AS STRING), ""),
            IFNULL(CAST(rpa_tnr AS STRING), ""),
            IFNULL(CAST(rpa_rrc AS STRING), ""),
            IFNULL(CAST(rpa_ttc AS STRING), ""),
            IFNULL(CAST(rpa_rrg AS STRING), ""),
            IFNULL(CAST(rpa_trc AS STRING), ""),
            IFNULL(CAST(rt_daytcl AS STRING), ""),
            IFNULL(CAST(rt_salhour AS STRING), ""),
            IFNULL(CAST(matl_group AS STRING), ""),
            IFNULL(CAST(rpa_pca AS STRING), ""),
            IFNULL(CAST(rpa_tcd AS STRING), ""),
            IFNULL(CAST(rpa_dep AS STRING), ""),
            IFNULL(CAST(rpa_wid AS STRING), ""),
            IFNULL(CAST(rt_offer AS STRING), ""),
            IFNULL(CAST(rpa_drc AS STRING), ""),
            IFNULL(CAST(rpa_drg AS STRING), ""),
            IFNULL(CAST(rpa_did AS STRING), ""),
            IFNULL(CAST(rpa_dtc AS STRING), ""),
            IFNULL(CAST(rpa_dtg AS STRING), ""),
            IFNULL(CAST(rpa_dqu AS STRING), "")
        )) AS hash_id,
        reqtsn,
        datapakid,
        record,
        recordmode,
        CASE WHEN calday = '00000000' THEN NULL ELSE TO_DATE(calday, 'yyyyMMdd') END AS calday,
        weekday1,
        calweek,
        calmonth,
        calyear,
        calmonth2,
        calquart1,
        halfyear1,
        CASE WHEN rpa_pca = '' THEN NULL ELSE rt_locatio END AS rt_locatio,
        CASE WHEN consumer = '' THEN NULL ELSE consumer END AS consumer,
        CASE WHEN rpa_rrc = '' THEN NULL ELSE rpa_rrc END AS rpa_rrc,
        CASE WHEN rpa_rrg = '' THEN NULL ELSE rpa_rrg END AS rpa_rrg,
        CASE WHEN rpa_trc = '' THEN NULL ELSE rpa_trc END AS rpa_trc,
        CASE WHEN rpa_drc = '' THEN NULL ELSE rpa_drc END AS rpa_drc,
        CASE WHEN rpa_drg = '' THEN NULL ELSE rpa_drg END AS rpa_drg,
        CASE WHEN rpa_did = '' THEN NULL ELSE rpa_did END AS rpa_did,
        CASE WHEN rpa_dqu = '' THEN NULL ELSE rpa_dqu END AS rpa_dqu,
        CASE WHEN base_uom = '' THEN NULL ELSE base_uom END AS base_uom,
        CASE WHEN sales_unit = '' THEN NULL ELSE sales_unit END AS sales_unit,
        CASE WHEN doc_currcy = '' THEN NULL ELSE doc_currcy END AS doc_currcy,
        CASE WHEN loc_currcy = '' THEN NULL ELSE loc_currcy END AS loc_currcy,
        CASE WHEN rpa_tix = '' THEN NULL ELSE rpa_tix END AS rpa_tix,
        CASE WHEN _bic_zpos_tnr = '' THEN NULL ELSE _bic_zpos_tnr END AS bic_zpos_tnr,
        CASE WHEN material = '' THEN NULL ELSE LTRIM('0', material) END AS material,
        CASE WHEN segment = '' THEN NULL ELSE LTRIM('0', segment) END AS segment,
        CASE WHEN plant = '' THEN NULL ELSE plant END AS plant,
        CASE WHEN rt_promo = '' THEN NULL ELSE rt_promo END AS rt_promo,
        CASE WHEN rpa_tnr = '' THEN NULL ELSE rpa_tnr END AS rpa_tnr,
        CASE WHEN rpa_ttc = '' THEN NULL ELSE rpa_ttc END AS rpa_ttc,
        CAST(rt_daytcl AS BIGINT) AS rt_daytcl,
        CAST(rt_salhour AS BIGINT) AS rt_salhour,
        CASE WHEN matl_group = '' THEN NULL ELSE matl_group END AS matl_group,
        CASE WHEN rpa_pca = '' THEN NULL ELSE rpa_pca END AS rpa_pca,
        CASE WHEN rpa_tcd = '' THEN NULL ELSE rpa_tcd END AS rpa_tcd,
        CASE WHEN rpa_dep = '' THEN NULL ELSE rpa_dep END AS rpa_dep,
        CASE WHEN rpa_wid = '' THEN NULL ELSE rpa_wid END AS rpa_wid,
        CASE WHEN rt_offer = '' THEN NULL ELSE rt_offer END AS rt_offer,
        CASE WHEN rpa_dtc = '' THEN NULL ELSE rpa_dtc END AS rpa_dtc,
        CASE WHEN rpa_dtg = '' THEN NULL ELSE rpa_dtg END AS rpa_dtg,
        rpa_rqtyb,
        rpa_rqtyv,
        rpa_retsal * 100 AS rpa_retsal,
        rpa_prmqty,
        base_qty,
        rpa_cnr,
        rpa_cst,
        rpa_prmsal * 100 AS rpa_prmsal,
        rpa_rlq,
        rpa_slc * 100 AS rpa_slc,
        rpa_psl * 100 AS rpa_psl,
        rpa_rsl * 100 AS rpa_rsl,
        rpa_cco,
        rpa_rea * 100 AS rpa_rea,
        rpa_tam * 100 AS rpa_tam,
        rpa_tat * 100 AS rpa_tat,
        proc_date AS proc_date,
        file_creation_ts
    FROM (
        SELECT * EXCEPT(rn) FROM (
            SELECT *, ROW_NUMBER() OVER (
                PARTITION BY reqtsn, datapakid, record ORDER BY proc_date DESC
            ) rn
            FROM temp_sap_vcdposd13
        ) a WHERE rn = 1
    ) a
""")


In [0]:
spark.sql(f"""
    MERGE INTO {catalog_name}.udp_wcm_silver_sap_bw.sap_vcdposd13 main_tbl 
    USING sap_vcdposd13 temp_tbl 
    ON main_tbl.hash_id = temp_tbl.hash_id 
    AND main_tbl.reqtsn = temp_tbl.reqtsn 
    AND main_tbl.datapakid = temp_tbl.datapakid
    AND main_tbl.record = temp_tbl.record
    WHEN NOT MATCHED THEN INSERT *;
""")