In [0]:
dbutils.widgets.text("proc_date", "")
proc_date = dbutils.widgets.get("proc_date")

In [0]:
%run ../../../utils/common

In [0]:
dbutils.widgets.text("environment", "", "")
environment = dbutils.widgets.get("environment")

In [0]:
catalog_name = settings[environment]['catalog_name']

In [0]:
create_temp_view_with_clean_columns(
    spark,
    catalog_name,
    'udp_wcm_bronze_sap_bw',
    'sap_bw_wcm_vcdmmpo53',
    proc_date,
    'temp_sap_vcdmmpo03'
)

In [0]:
spark.sql(f"""
CREATE TABLE IF NOT EXISTS {catalog_name}.udp_wcm_silver_sap_bw.sap_vcdmmpo03
(
    hash_id BIGINT,
    recordmode STRING,
    rt_promo STRING,
    bic_zc_ebelp DOUBLE,
    bic_zc_ebeln STRING,
    refer_itm DOUBLE,
    comp_code STRING,
    oi_ebeln STRING,
    oi_ebelp BIGINT,
    sched_line BIGINT,
    doctype STRING,
    doc_cat STRING,
    itm_cat STRING,
    item_del STRING,
    ret_itm STRING,
    purch_org STRING,
    pur_group STRING,
    vendor STRING,
    matl_group STRING,
    material STRING,
    plant STRING,
    gr_re_ind STRING,
    final_inv STRING,
    compl_del STRING,
    gr_base_iv STRING,
    eval_r_s STRING,
    po_status STRING,
    pur_reason STRING,
    prod_descr STRING,
    contract STRING,
    cont_item DOUBLE,
    ct_flag STRING,
    requisit STRING,
    supp_plant STRING,
    stor_loc STRING,
    inv_party STRING,
    batch STRING,
    gn_r3_ssy STRING,
    log_sys_be STRING,
    calday DATE,
    calmonth BIGINT,
    calquarter BIGINT,
    calyear BIGINT,
    fiscvarnt STRING,
    fiscper BIGINT,
    fiscyear BIGINT,
    entry_date DATE,
    pstng_date DATE,
    sched_date DATE,
    scl_deldat DATE,
    stat_date DATE,
    val_end DATE,
    val_start DATE,
    upd_date DATE,
    order_curr STRING,
    loc_currcy STRING,
    po_unit STRING,
    base_uom STRING,
    bic_zk_cretim STRING,
    bic_zk_credat DATE,
    po_itemlns DOUBLE,
    sr_povoc DOUBLE,
    sr_povlc DOUBLE,
    sr_poqbu DOUBLE,
    sr_grvoc DOUBLE,
    sr_grvlc DOUBLE,
    sr_grqbu DOUBLE,
    sr_ivvlc DOUBLE,
    sr_ivvoc DOUBLE,
    sr_ivqbu DOUBLE,
    proc_date DATE,
    segment STRING,
    file_creation_ts TIMESTAMP
)
TBLPROPERTIES (
    'delta.autoOptimize.optimizeWrite' = 'true',
    'delta.autoOptimize.autoCompact' = 'true'
)
""")

In [0]:
spark.sql(f"""
CREATE OR REPLACE TEMP VIEW sap_vcdmmpo03 AS
WITH temp AS (
SELECT 
        reqtsn,
        datapakid,
        record,
        recordmode,
        rt_promo,
        CAST(_bic_zc_ebelp AS DOUBLE) bic_zc_ebelp,
        IF(_bic_zc_ebeln = '', NULL,_bic_zc_ebeln) bic_zc_ebeln,
        CAST(refer_itm AS DOUBLE) refer_itm,
        IF(segment = '', NULL,LTRIM('0',segment)) segment,
        IF(comp_code = '', NULL,comp_code) comp_code,
        IF(oi_ebeln = '', NULL,oi_ebeln) oi_ebeln,
        CAST(oi_ebelp AS BIGINT) oi_ebelp,
        CAST(sched_line AS BIGINT) sched_line,
        IF(doctype = '', NULL,doctype) doctype,
        IF(doc_cat = '', NULL,doc_cat) doc_cat,
        IF(itm_cat = '', NULL,itm_cat) itm_cat,
        IF(item_del = '', NULL,item_del) item_del,
        IF(ret_itm = '', NULL,ret_itm) ret_itm,
        IF(purch_org = '', NULL,purch_org) purch_org,
        IF(pur_group = '', NULL,pur_group) pur_group,
        IF(vendor = '', NULL,vendor) vendor,
        IF(matl_group = '', NULL,matl_group) matl_group,
        IF(REGEXP_LIKE(material,'^[0-9]+$') ,CAST(CAST(material AS BIGINT) AS STRING),material) material,
        IF(plant = '', NULL,plant) plant,
        IF(gr_re_ind = '', NULL,gr_re_ind) gr_re_ind,
        IF(final_inv = '', NULL,final_inv) final_inv,
        IF(compl_del = '', NULL,compl_del) compl_del,
        IF(gr_base_iv = '', NULL,gr_base_iv) gr_base_iv,
        IF(eval_r_s = '', NULL,eval_r_s) eval_r_s,
        IF(po_status = '', NULL,po_status) po_status,
        IF(pur_reason = '', NULL,pur_reason) pur_reason,
        IF(prod_descr = '', NULL,prod_descr) prod_descr,
        IF(contract = '', NULL,contract) contract,
        CAST(cont_item AS BIGINT) cont_item,
        IF(ct_flag = '', NULL,ct_flag) ct_flag,
        IF(requisit = '', NULL,requisit) requisit,
        IF(supp_plant = '', NULL,supp_plant) supp_plant,
        IF(stor_loc = '', NULL,stor_loc) stor_loc,
        IF(inv_party = '', NULL,inv_party) inv_party,
        IF(batch = '', NULL,batch) batch,
        IF(gn_r3_ssy = '', NULL,gn_r3_ssy) gn_r3_ssy,
        IF(log_sys_be = '', NULL,log_sys_be) log_sys_be,
        IF(calday = '00000000',NULL,TO_DATE(calday,'yyyyMMdd')) AS calday,
        CAST(calmonth AS BIGINT) calmonth,
        CAST(calquarter AS BIGINT) calquarter,
        CAST(calyear AS BIGINT) calyear,
        fiscvarnt,
        CAST(fiscper AS BIGINT) fiscper,
        CAST(fiscyear AS BIGINT) fiscyear,
        IF(entry_date = '00000000',NULL,TO_DATE(entry_date,'yyyyMMdd')) entry_date,
        IF(pstng_date = '00000000',NULL,TO_DATE(pstng_date,'yyyyMMdd')) pstng_date,
        IF(sched_date = '00000000',NULL,TO_DATE(sched_date,'yyyyMMdd')) sched_date,
        IF(scl_deldat = '00000000',NULL,TO_DATE(scl_deldat,'yyyyMMdd')) scl_deldat,
        IF(stat_date = '00000000',NULL,TO_DATE(stat_date,'yyyyMMdd')) stat_date,
        IF(val_end = '00000000',NULL,TO_DATE(val_end,'yyyyMMdd')) val_end,
        IF(val_start = '00000000',NULL,TO_DATE(val_start,'yyyyMMdd')) val_start,
        IF(upd_date = '00000000',NULL,TO_DATE(upd_date,'yyyyMMdd')) upd_date,
        IF(order_curr = '', NULL,order_curr) order_curr,
        IF(loc_currcy = '', NULL,loc_currcy) loc_currcy,
        IF(po_unit = '', NULL,po_unit) po_unit,
        IF(base_uom = '', NULL,base_uom) base_uom,
        IF(_bic_zk_cretim = '000000',NULL,TO_TIMESTAMP(_bic_zk_cretim,'HHmmSS')) bic_zk_cretim,
        IF(_bic_zk_credat = '00000000',NULL,TO_DATE(_bic_zk_credat,'yyyyMMdd')) bic_zk_credat,
        po_itemlns,
        sr_povoc * 100 sr_povoc,
        sr_povlc * 100 sr_povlc,
        sr_poqbu,
        sr_grvoc * 100 sr_grvoc,
        sr_grvlc * 100 sr_grvlc,
        sr_grqbu,
        sr_ivvlc * 100 sr_ivvlc,
        sr_ivvoc * 100 sr_ivvoc,
        sr_ivqbu,
        proc_date,
        file_creation_ts
         FROM (
                    SELECT * EXCEPT(rn) FROM (
                        SELECT *, ROW_NUMBER() OVER (PARTITION BY reqtsn, datapakid, record ORDER BY proc_date DESC) rn 
												FROM  temp_sap_vcdmmpo03 
                    ) a WHERE rn = 1
                ) 
),
  main AS (
  SELECT farm_fingerprint(CONCAT(
  			IFNULL(CAST(oi_ebeln AS STRING), ""),
 			IFNULL(CAST(oi_ebelp AS STRING), ""),
 			IFNULL(CAST(sched_line AS STRING), "")
  		)) hash_id,
          * FROM temp
  ),
  main_2 AS (
  	SELECT * 
  	, ROW_NUMBER() OVER (PARTITION BY hash_id ORDER BY reqtsn DESC,datapakid DESC,record  DESC) rn 
  		FROM main
  )
  SELECT 
  * EXCEPT(rn,reqtsn,datapakid,record) FROM main_2 WHERE rn = 1;
  """)

In [0]:
calday_query = f"""
SELECT
    CONCAT("'", CONCAT_WS("','", COLLECT_SET(CAST(TO_DATE(calday, 'yyyy-MM-dd') AS STRING))), "'") AS calday_in_str
FROM sap_vcdmmpo03
"""
calday_df = spark.sql(calday_query)
calday_in_str = calday_df.collect()[0][0]
print(calday_in_str)

In [0]:
source_columns = spark.table("sap_vcdmmpo03").columns
insert_columns = ", ".join(source_columns)

In [0]:
spark.sql(f"""
DELETE FROM {catalog_name}.udp_wcm_silver_sap_bw.sap_vcdmmpo03 
WHERE calday IN ({calday_in_str}) AND hash_id IN (select  hash_id from sap_vcdmmpo03)
""")

In [0]:
spark.sql(f"""
INSERT INTO {catalog_name}.udp_wcm_silver_sap_bw.sap_vcdmmpo03 ({insert_columns})  
SELECT {insert_columns} FROM sap_vcdmmpo03;
""")