In [0]:
dbutils.widgets.text("proc_date", "")
proc_date = dbutils.widgets.get("proc_date")

In [0]:
%run ../../../utils/common

In [0]:
dbutils.widgets.text("environment", "", "")
environment = dbutils.widgets.get("environment")

In [0]:
catalog_name = settings[environment]['catalog_name']

In [0]:
create_temp_view_with_clean_columns(
    spark,
    catalog_name,
    'udp_wcm_bronze_sap_bw',
    'sap_bw_wcm_vccmmim53',
    proc_date,
    'temp_sap_vcdmmim03'
)

In [0]:
spark.sql(f"""
CREATE TABLE IF NOT EXISTS {catalog_name}.udp_wcm_silver_sap_bw.sap_vcdmmim03
(
  hash_id BIGINT,
  recordmode STRING,
  segment STRING,
  zc_ltstgr DATE,
  doc_year BIGINT,
  bwcounter BIGINT,
  mat_doc STRING,
  mat_item BIGINT,
  plant STRING,
  stor_loc STRING,
  strge_bin STRING,
  whse_num STRING,
  strge_type STRING,
  move_plant STRING,
  material STRING,
  vendor STRING,
  customer STRING,
  batch STRING,
  stocktype STRING,
  stockcat STRING,
  stockrelev STRING,
  val_type STRING,
  val_class STRING,
  bwapplnm STRING,
  processkey BIGINT,
  movetype STRING,
  indspecstk STRING,
  valspecst STRING,
  matmrea BIGINT,
  dcindic STRING,
  storno STRING,
  comp_code STRING,
  bus_area STRING,
  co_area STRING,
  profit_ctr STRING,
  oi_ebeln STRING,
  oi_ebelp BIGINT,
  doc_number STRING,
  s_ord_item BIGINT,
  sched_line BIGINT,
  coorder STRING,
  wbs_elemt STRING,
  chrt_accts STRING,
  account STRING,
  rt_promo STRING,
  gn_r3_ssy STRING,
  calday DATE,
  calyear BIGINT,
  fiscvarnt STRING,
  doc_date DATE,
  upd_date DATE,
  base_uom STRING,
  loc_currcy STRING,
  zk_tp_qty DOUBLE,
  zk_tp_val DOUBLE,
  value_lc DOUBLE,
  zk_ltstgr BIGINT,
  cpquabu DOUBLE,
  cppvlc DOUBLE,
  cpsvlc DOUBLE,
  cpstlc DOUBLE,
  cpnoitems STRING,
  proc_date DATE,
  costcenter STRING,
  file_creation_ts TIMESTAMP
)
TBLPROPERTIES (
  'delta.autoOptimize.optimizeWrite' = 'TRUE',
  'delta.autoOptimize.autoCompact' = 'TRUE'
)""")


In [0]:
spark.sql(f"""
CREATE OR REPLACE TEMP VIEW sap_vcdmmim03 AS
WITH temp AS (
  SELECT  reqtsn,
          datapakid,
          record,
          recordmode,
          IF(segment = '', NULL, CAST(CAST(segment AS BIGINT) AS STRING)) segment,
          IF(_bic_zc_ltstgr = '00000000', NULL, TO_DATE(_bic_zc_ltstgr, 'yyyyMMdd')) zc_ltstgr,
          CAST(doc_year AS BIGINT) doc_year,
          CAST(bwcounter AS BIGINT) bwcounter,
          IF(mat_doc = '', NULL, mat_doc) mat_doc,
          CAST(mat_item AS BIGINT) mat_item,
          IF(plant = '', NULL, LTRIM('0', plant)) plant,
          IF(stor_loc = '', NULL, stor_loc) stor_loc,
          IF(strge_bin = '', NULL, strge_bin) strge_bin,
          IF(whse_num = '', NULL, whse_num) whse_num,
          IF(strge_type = '', NULL, strge_type) strge_type,
          IF(move_plant = '', NULL, move_plant) move_plant,
          IF(REGEXP_LIKE(material, '^[0-9]+$'), CAST(CAST(material AS BIGINT) AS STRING), material) material,
          IF(vendor = '', NULL, vendor) vendor,
          IF(customer = '', NULL, customer) customer,
          IF(batch = '', NULL, batch) batch,
          IF(stocktype = '', NULL, stocktype) stocktype,
          IF(stockcat = '', NULL, stockcat) stockcat,
          IF(stockrelev = '', NULL, stockrelev) stockrelev,
          IF(val_type = '', NULL, val_type) val_type,
          IF(val_class = '', NULL, val_class) val_class,
          IF(bwapplnm = '', NULL, bwapplnm) bwapplnm,
          CAST(processkey AS BIGINT) processkey,
          IF(movetype = '', NULL, movetype) movetype,
          IF(indspecstk = '', NULL, indspecstk) indspecstk,
          IF(valspecst = '', NULL, valspecst) valspecst,
          CAST(matmrea AS BIGINT) matmrea,
          IF(dcindic = '', NULL, dcindic) dcindic,
          IF(storno = '', NULL, storno) storno,
          IF(comp_code = '', NULL, comp_code) comp_code,
          IF(bus_area = '', NULL, bus_area) bus_area,
          IF(co_area = '', NULL, co_area) co_area,
          IF(costcenter = '', NULL, LTRIM('0', costcenter)) costcenter,
          IF(REGEXP_LIKE(profit_ctr, '^[0-9]+$'), CAST(CAST(profit_ctr AS BIGINT) AS STRING),IF(profit_ctr = '', NULL, profit_ctr)) profit_ctr,
          IF(oi_ebeln = '', NULL, oi_ebeln) oi_ebeln,
          CAST(oi_ebelp AS BIGINT) oi_ebelp,
          IF(doc_number = '', NULL, doc_number) doc_number,
          CAST(s_ord_item AS BIGINT) s_ord_item,
          CAST(sched_line AS BIGINT) sched_line,
          IF(coorder = '', NULL, coorder) coorder,
          IF(wbs_elemt = '', NULL, wbs_elemt) wbs_elemt,
          IF(chrt_accts = '', NULL, chrt_accts) chrt_accts,
          IF(account = '', NULL, account) account,
          IF(rt_promo = '', NULL, rt_promo) rt_promo,
          IF(gn_r3_ssy = '', NULL, gn_r3_ssy) gn_r3_ssy,
          IF(calday = '00000000', NULL, TO_DATE(calday, 'yyyyMMdd')) calday,
          IF(fiscvarnt = '', NULL, fiscvarnt) fiscvarnt,
          IF(doc_date = '00000000', NULL, TO_DATE(doc_date, 'yyyyMMdd')) doc_date,
          IF(upd_date = '00000000', NULL, TO_DATE(upd_date, 'yyyyMMdd')) upd_date,
          IF(base_uom = '', NULL, base_uom) base_uom,
          IF(loc_currcy = '', NULL, loc_currcy) loc_currcy,
          _bic_zk_tp_qty zk_tp_qty,
          _bic_zk_tp_val * 100 zk_tp_val,
          value_lc * 100 value_lc,
          CAST(_bic_zk_ltstgr AS BIGINT) zk_ltstgr,
          cpquabu,
          cppvlc * 100 cppvlc,
          cpsvlc * 100 cpsvlc,
          cpstlc * 100 cpstlc,
          CAST(cpnoitems AS STRING) cpnoitems,
          proc_date,
          file_creation_ts
  FROM (
    SELECT * EXCEPT(rn) FROM (
      SELECT *, ROW_NUMBER() OVER (PARTITION BY reqtsn, datapakid, record ORDER BY proc_date DESC) rn
      FROM temp_sap_vcdmmim03
    ) a
    WHERE rn = 1
  ) a
),
main AS (
  SELECT
    farm_fingerprint(CONCAT(
      IFNULL(CAST(fiscvarnt AS STRING), ''),
      IFNULL(CAST(doc_year AS STRING), ''),
      IFNULL(CAST(mat_doc AS STRING), ''),
      IFNULL(CAST(mat_item AS STRING), ''),
      IFNULL(CAST(bwcounter AS STRING), '')
    )) AS hash_id,
    *
  FROM temp
),
main_2 AS (
  SELECT *,
         ROW_NUMBER() OVER (PARTITION BY hash_id ORDER BY reqtsn DESC, datapakid DESC, record DESC) rn
  FROM main
)
SELECT
  * EXCEPT(rn, reqtsn, datapakid, record)
FROM main_2
WHERE rn = 1
""")

In [0]:
calday_query = f"""
SELECT
    CONCAT("'", CONCAT_WS("','", COLLECT_SET(CAST(TO_DATE(calday, 'yyyy-MM-dd') AS STRING))), "'") AS calday_in_str
FROM sap_vcdmmim03
"""
calday_df = spark.sql(calday_query)
calday_in_str = calday_df.collect()[0][0]
print(calday_in_str)

In [0]:
exclude_columns = ["calday", "hash_id"]  # Columns to exclude from update
source_columns = spark.table("sap_vcdmmim03").columns
insert_columns = ", ".join(source_columns)
insert_values = ", ".join([f"temp_tbl.{col}" for col in source_columns])

update_columns = [f"main_tbl.{col} = temp_tbl.{col}" 
                 for col in source_columns 
                 if col.lower() not in [e.lower() for e in exclude_columns]]
update_set = ",\n    ".join(update_columns)

In [0]:
spark.sql(f"""
MERGE INTO {catalog_name}.udp_wcm_silver_sap_bw.sap_vcdmmim03 main_tbl
USING sap_vcdmmim03 temp_tbl
ON main_tbl.hash_id = temp_tbl.hash_id
AND main_tbl.calday IN ({calday_in_str})
WHEN MATCHED THEN UPDATE SET {update_set}
WHEN NOT MATCHED THEN INSERT ({insert_columns}) VALUES ({insert_values})
""")