In [0]:
dbutils.widgets.text("proc_date", "")
proc_date = dbutils.widgets.get("proc_date")

In [0]:
%run ../../../utils/common

In [0]:
dbutils.widgets.text("environment", "", "")
environment = dbutils.widgets.get("environment")

In [0]:
catalog_name = settings[environment]['catalog_name']

In [0]:
create_temp_view_with_clean_columns(
    spark,
    catalog_name,
    'udp_wcm_bronze_sap_bw',
    'sap_bw_wcm_vccsdbl52',
    proc_date,
    'temp_sap_vcdsdbl02'
)

In [0]:
spark.sql(f"""
CREATE TABLE IF NOT EXISTS {catalog_name}.udp_wcm_silver_sap_bw.sap_vcdsdbl02
( 
  hash_id BIGINT,
  recordmode STRING,
  supp_plant STRING,
  bill_type STRING,
  segment STRING,
  profit_ctr STRING,
  rt_promo STRING,
  co_area STRING,
  bill_num STRING,
  bill_item BIGINT,
  sold_to STRING,
  ship_to STRING,
  billtoprty STRING,
  payer STRING,
  bp_grp STRING,
  comp_code STRING,
  cp_categ STRING,
  distr_chan STRING,
  division STRING,
  salesemply INT,
  salesorg STRING,
  sales_grp STRING,
  sales_off STRING,
  material STRING,
  crm_prod STRING,
  prod_categ STRING,
  matl_group STRING,
  plant STRING,
  bic_zc_stype STRING,
  ship_point STRING,
  deb_cred STRING,
  imodoccat STRING,
  doc_class STRING,
  doc_number STRING,
  s_ord_item INT,
  alloc_nmbr STRING,
  refer_itm INT,
  refer_doc STRING,
  doc_date DATE,
  apo_deldat DATE,
  gn_r3_ssy STRING,
  calday DATE,
  fiscvarnt STRING,
  upd_date DATE,
  stat_curr STRING,
  doc_currcy STRING,
  base_uom STRING,
  tax_amount FLOAT,
  bil_i_cnt FLOAT,
  net_val_s FLOAT,
  netval_inv FLOAT,
  quant_b FLOAT,
  cost_val_s FLOAT,
  cost FLOAT,
  gr_wt_kg FLOAT,
  nt_wt_kg FLOAT,
  volume_cdm FLOAT,
  subtot_1s FLOAT,
  subtot_2s FLOAT,
  subtot_3s FLOAT,
  subtot_4s FLOAT,
  subtot_5s FLOAT,
  subtot_6s FLOAT,
  proc_date DATE,
  file_creation_ts TIMESTAMP
)
TBLPROPERTIES (
 'DELTA.AUTOOPTIMIZE.OPTIMIZEWRITE' = 'TRUE',
  'DELTA.AUTOOPTIMIZE.AUTOCOMPACT' = 'TRUE'
)
"""
)

In [0]:
spark.sql(f"""
CREATE OR REPLACE TEMP VIEW sap_vcdsdbl02 AS
WITH temp AS (
SELECT 
        reqtsn,
        datapakid,
        record,
        recordmode,
        CASE WHEN supp_plant = '' THEN NULL ELSE supp_plant END AS supp_plant,
        bill_type,
        CASE WHEN REGEXP(segment,'^[0-9]+$') THEN CAST(CAST(segment AS BIGINT) AS STRING) ELSE segment END AS segment,
        CASE WHEN profit_ctr = '' THEN NULL ELSE profit_ctr END AS profit_ctr,
        CASE WHEN rt_promo = '' THEN NULL ELSE rt_promo END AS rt_promo,
        CASE WHEN co_area = ''  THEN NULL ELSE co_area END AS co_area,
        CASE WHEN bill_num = ''  THEN NULL ELSE bill_num END AS bill_num,
        CAST(bill_item AS BIGINT) bill_item,
        CASE WHEN sold_to = ''  THEN NULL ELSE sold_to END AS sold_to,
        CASE WHEN ship_to = ''  THEN NULL ELSE ship_to END AS ship_to,
        CASE WHEN billtoprty = ''  THEN NULL ELSE billtoprty END AS billtoprty,
        CASE WHEN payer = ''  THEN NULL ELSE payer END AS payer,
        CASE WHEN bp_grp = ''  THEN NULL ELSE bp_grp END AS bp_grp,
        CASE WHEN comp_code = ''  THEN NULL ELSE comp_code END AS comp_code,
        CASE WHEN cp_categ = ''  THEN NULL ELSE cp_categ END AS cp_categ,
        CASE WHEN distr_chan = ''  THEN NULL ELSE distr_chan END AS distr_chan,
        CASE WHEN division = ''  THEN NULL ELSE division END AS division,
        CAST(salesemply AS BIGINT) salesemply,
        CASE WHEN salesorg = ''  THEN NULL ELSE salesorg END AS salesorg,
        CASE WHEN sales_grp = ''  THEN NULL ELSE sales_grp END AS sales_grp,
        CASE WHEN sales_off = ''  THEN NULL ELSE sales_off END AS sales_off,
        CASE WHEN REGEXP(material,'^[0-9]+$') THEN CAST(CAST(material AS BIGINT) AS STRING) ELSE material END AS material,
        CASE WHEN crm_prod = ''  THEN NULL ELSE crm_prod END AS crm_prod,
        CASE WHEN prod_categ = ''  THEN NULL ELSE prod_categ END AS prod_categ,
        CASE WHEN matl_group = ''  THEN NULL ELSE matl_group END AS matl_group,
        CASE WHEN plant = ''  THEN NULL ELSE plant END AS plant,
        CASE WHEN _bic_zc_stype = ''  THEN NULL ELSE _bic_zc_stype END AS bic_zc_stype,
        CASE WHEN ship_point = ''  THEN NULL ELSE ship_point END AS ship_point,
        CASE WHEN deb_cred = ''  THEN NULL ELSE deb_cred END AS deb_cred,
        CASE WHEN imodoccat = ''  THEN NULL ELSE imodoccat END AS imodoccat,
        CASE WHEN doc_class = ''  THEN NULL ELSE doc_class END AS doc_class,
        CASE WHEN doc_number = ''  THEN NULL ELSE doc_number END AS doc_number,
        CAST(s_ord_item AS BIGINT) s_ord_item,
        CASE WHEN alloc_nmbr = ''  THEN NULL ELSE alloc_nmbr END AS alloc_nmbr,
        CAST(refer_itm AS BIGINT) refer_itm,
        CASE WHEN refer_doc = ''  THEN NULL ELSE refer_doc END AS refer_doc,
        CASE WHEN doc_date = '00000000'  THEN NULL ELSE TO_DATE(doc_date,'yyyyMMdd') END AS doc_date,
        CASE WHEN apo_deldat = '00000000'  THEN NULL ELSE TO_DATE(apo_deldat,'yyyyMMdd') END AS apo_deldat,
        CASE WHEN gn_r3_ssy = ''  THEN NULL ELSE gn_r3_ssy END AS gn_r3_ssy,
        CASE WHEN calday = '00000000'  THEN NULL ELSE TO_DATE(calday,'yyyyMMdd') END AS calday,
        CASE WHEN fiscvarnt = ''  THEN NULL ELSE fiscvarnt END AS fiscvarnt,
        CASE WHEN upd_date = '00000000'  THEN NULL ELSE TO_DATE(upd_date, 'yyyyMMdd') END AS upd_date,
        CASE WHEN stat_curr = ''  THEN NULL ELSE stat_curr END AS stat_curr,
        CASE WHEN doc_currcy = ''  THEN NULL ELSE doc_currcy END AS doc_currcy,
        CASE WHEN base_uom = ''  THEN NULL ELSE base_uom END AS base_uom,
        tax_amount * 100 tax_amount,
        bil_i_cnt,
        net_val_s * 100 net_val_s,
        netval_inv * 100 netval_inv,
        quant_b,
        cost_val_s * 100 cost_val_s,
        cost * 100 cost,
        gr_wt_kg,
        nt_wt_kg,
        volume_cdm,
        subtot_1_s * 100 subtot_1s,
        subtot_2_s * 100 subtot_2s,
        subtot_3_s * 100 subtot_3s,
        subtot_4_s * 100 subtot_4s,
        subtot_5_s * 100 subtot_5s,
        subtot_6_s * 100 subtot_6s,
        proc_date,
        file_creation_ts
         FROM (
            SELECT * EXCEPT(rn) FROM (
                SELECT * EXCEPT(proc_date),proc_date AS proc_date, ROW_NUMBER() OVER (PARTITION BY reqtsn, datapakid, record ORDER BY proc_date DESC) rn FROM  temp_sap_vcdsdbl02
            ) a WHERE rn = 1 
        ) a
),
main AS (
SELECT farm_fingerprint(CONCAT(
            IFNULL(CAST(bill_num AS STRING), ""),
            IFNULL(CAST(bill_item AS STRING), ""),
            IFNULL(CAST(calday AS STRING), "")
        )) hash_id,
        * FROM temp
),
main_2 AS (
    SELECT * 
    , ROW_NUMBER() OVER (PARTITION BY hash_id ORDER BY reqtsn DESC,datapakid DESC,record  DESC) rn 
        FROM main
)
SELECT 
* EXCEPT(rn,reqtsn,datapakid,record) FROM main_2 WHERE rn = 1""")

In [0]:
spark.sql(
    f"""
MERGE INTO {catalog_name}.udp_wcm_silver_sap_bw.sap_vcdsdbl02 main_tbl
USING sap_vcdsdbl02 temp_tbl
ON main_tbl.hash_id = temp_tbl.hash_id  
WHEN MATCHED THEN 
UPDATE SET 
    main_tbl.recordmode = temp_tbl.recordmode,
    main_tbl.supp_plant = temp_tbl.supp_plant,
    main_tbl.bill_type = temp_tbl.bill_type,
    main_tbl.segment = temp_tbl.segment,
    main_tbl.profit_ctr = temp_tbl.profit_ctr,
    main_tbl.rt_promo = temp_tbl.rt_promo,
    main_tbl.co_area = temp_tbl.co_area,
    main_tbl.bill_num = temp_tbl.bill_num,
    main_tbl.bill_item = temp_tbl.bill_item,
    main_tbl.sold_to = temp_tbl.sold_to,
    main_tbl.ship_to = temp_tbl.ship_to,
    main_tbl.billtoprty = temp_tbl.billtoprty,
    main_tbl.payer = temp_tbl.payer,
    main_tbl.bp_grp = temp_tbl.bp_grp,
    main_tbl.comp_code = temp_tbl.comp_code,
    main_tbl.cp_categ = temp_tbl.cp_categ,
    main_tbl.distr_chan = temp_tbl.distr_chan,
    main_tbl.division = temp_tbl.division,
    main_tbl.salesemply = temp_tbl.salesemply,
    main_tbl.salesorg = temp_tbl.salesorg,
    main_tbl.sales_grp = temp_tbl.sales_grp,
    main_tbl.sales_off = temp_tbl.sales_off,
    main_tbl.material = temp_tbl.material,
    main_tbl.crm_prod = temp_tbl.crm_prod,
    main_tbl.prod_categ = temp_tbl.prod_categ,
    main_tbl.matl_group = temp_tbl.matl_group,
    main_tbl.plant = temp_tbl.plant,
    main_tbl.bic_zc_stype = temp_tbl.bic_zc_stype,
    main_tbl.ship_point = temp_tbl.ship_point,
    main_tbl.deb_cred = temp_tbl.deb_cred,
    main_tbl.imodoccat = temp_tbl.imodoccat,
    main_tbl.doc_class = temp_tbl.doc_class,
    main_tbl.doc_number = temp_tbl.doc_number,
    main_tbl.s_ord_item = temp_tbl.s_ord_item,
    main_tbl.alloc_nmbr = temp_tbl.alloc_nmbr,
    main_tbl.refer_itm = temp_tbl.refer_itm,
    main_tbl.refer_doc = temp_tbl.refer_doc,
    main_tbl.doc_date = temp_tbl.doc_date,
    main_tbl.apo_deldat = temp_tbl.apo_deldat,
    main_tbl.gn_r3_ssy = temp_tbl.gn_r3_ssy,
    main_tbl.calday = temp_tbl.calday,
    main_tbl.fiscvarnt = temp_tbl.fiscvarnt,
    main_tbl.upd_date = temp_tbl.upd_date,
    main_tbl.stat_curr = temp_tbl.stat_curr,
    main_tbl.doc_currcy = temp_tbl.doc_currcy,
    main_tbl.base_uom = temp_tbl.base_uom,
    main_tbl.tax_amount = temp_tbl.tax_amount,
    main_tbl.bil_i_cnt = temp_tbl.bil_i_cnt,
    main_tbl.net_val_s = temp_tbl.net_val_s,
    main_tbl.netval_inv = temp_tbl.netval_inv,
    main_tbl.quant_b = temp_tbl.quant_b,
    main_tbl.cost_val_s = temp_tbl.cost_val_s,
    main_tbl.cost = temp_tbl.cost,
    main_tbl.gr_wt_kg = temp_tbl.gr_wt_kg,
    main_tbl.nt_wt_kg = temp_tbl.nt_wt_kg,
    main_tbl.volume_cdm = temp_tbl.volume_cdm,
    main_tbl.subtot_1s = temp_tbl.subtot_1s,
    main_tbl.subtot_2s = temp_tbl.subtot_2s,
    main_tbl.subtot_3s = temp_tbl.subtot_3s,
    main_tbl.subtot_4s = temp_tbl.subtot_4s,
    main_tbl.subtot_5s = temp_tbl.subtot_5s,
    main_tbl.subtot_6s = temp_tbl.subtot_6s,
    main_tbl.proc_date = temp_tbl.proc_date,
    main_tbl.file_creation_ts = temp_tbl.file_creation_ts
WHEN NOT MATCHED THEN 
INSERT (
    hash_id, recordmode, supp_plant, bill_type, segment, profit_ctr, rt_promo, co_area, bill_num, bill_item, sold_to, ship_to, billtoprty, payer, bp_grp, comp_code, cp_categ, distr_chan, division, salesemply, salesorg, sales_grp, sales_off, material, crm_prod, prod_categ, matl_group, plant, bic_zc_stype, ship_point, deb_cred, imodoccat, doc_class, doc_number, s_ord_item, alloc_nmbr, refer_itm, refer_doc, doc_date, apo_deldat, gn_r3_ssy, calday, fiscvarnt, upd_date, stat_curr, doc_currcy, base_uom, tax_amount, bil_i_cnt, net_val_s, netval_inv, quant_b, cost_val_s, cost, gr_wt_kg, nt_wt_kg, volume_cdm, subtot_1s, subtot_2s, subtot_3s, subtot_4s, subtot_5s, subtot_6s, proc_date, file_creation_ts
) VALUES (
    temp_tbl.hash_id, temp_tbl.recordmode, temp_tbl.supp_plant, temp_tbl.bill_type, temp_tbl.segment, temp_tbl.profit_ctr, temp_tbl.rt_promo, temp_tbl.co_area, temp_tbl.bill_num, temp_tbl.bill_item, temp_tbl.sold_to, temp_tbl.ship_to, temp_tbl.billtoprty, temp_tbl.payer, temp_tbl.bp_grp, temp_tbl.comp_code, temp_tbl.cp_categ, temp_tbl.distr_chan, temp_tbl.division, temp_tbl.salesemply, temp_tbl.salesorg, temp_tbl.sales_grp, temp_tbl.sales_off, temp_tbl.material, temp_tbl.crm_prod, temp_tbl.prod_categ, temp_tbl.matl_group, temp_tbl.plant, temp_tbl.bic_zc_stype, temp_tbl.ship_point, temp_tbl.deb_cred, temp_tbl.imodoccat, temp_tbl.doc_class, temp_tbl.doc_number, temp_tbl.s_ord_item, temp_tbl.alloc_nmbr, temp_tbl.refer_itm, temp_tbl.refer_doc, temp_tbl.doc_date, temp_tbl.apo_deldat, temp_tbl.gn_r3_ssy, temp_tbl.calday, temp_tbl.fiscvarnt, temp_tbl.upd_date, temp_tbl.stat_curr, temp_tbl.doc_currcy, temp_tbl.base_uom, temp_tbl.tax_amount, temp_tbl.bil_i_cnt, temp_tbl.net_val_s, temp_tbl.netval_inv, temp_tbl.quant_b, temp_tbl.cost_val_s, temp_tbl.cost, temp_tbl.gr_wt_kg, temp_tbl.nt_wt_kg, temp_tbl.volume_cdm, temp_tbl.subtot_1s, temp_tbl.subtot_2s, temp_tbl.subtot_3s, temp_tbl.subtot_4s, temp_tbl.subtot_5s, temp_tbl.subtot_6s, temp_tbl.proc_date, temp_tbl.file_creation_ts
)
"""
)