In [0]:
dbutils.widgets.text("proc_date", "")
proc_date = dbutils.widgets.get("proc_date")

In [0]:
%run ../../../utils/common

In [0]:
dbutils.widgets.text("environment", "", "")
environment = dbutils.widgets.get("environment")

In [0]:
catalog_name = settings[environment]['catalog_name']

In [0]:
create_temp_view_with_clean_columns(
    spark,
    catalog_name,
    'udp_wcm_bronze_sap_bw',
    'sap_bw_wcm_vccmmim51',
    proc_date,
    'temp_sap_vccmmim51'
)

In [0]:
spark.sql(f"""
CREATE TABLE IF NOT EXISTS {catalog_name}.udp_wcm_silver_sap_bw.sap_vcdmmim01
(
  hash_id BIGINT,
  material STRING,
  stor_loc STRING,
  stocktype STRING,
  stockcat STRING,
  comp_code STRING,
  zc_ltstgr DATE,
  vendor STRING,
  customer STRING,
  calday DATE,
  upd_date DATE,
  segment STRING,
  plant STRING,
  base_uom STRING,
  zk_tp_qty DOUBLE,
  zk_ltstgr DOUBLE,
  rec_tots_tck DOUBLE,
  iss_tots_tck DOUBLE,
  rec_val_stck DOUBLE,
  iss_val_stck DOUBLE,
  rec_cns_stck DOUBLE,
  iss_cns_stck DOUBLE,
  proc_date DATE,
  file_creation_ts TIMESTAMP
) 
TBLPROPERTIES (
  'DELTA.AUTOOPTIMIZE.OPTIMIZEWRITE' = 'TRUE',
  'DELTA.AUTOOPTIMIZE.AUTOCOMPACT' = 'TRUE'
)
""")

In [0]:
spark.sql(f"""
    CREATE OR REPLACE TEMP VIEW sap_vcdmmim01 AS
    WITH main AS (
                SELECT 
                    reqtsn, datapakid, record,
                    CASE WHEN material RLIKE '^[0-9]+$' THEN CAST(CAST(material AS BIGINT) AS STRING) ELSE material END AS material,
                    CASE WHEN stor_loc = '' THEN NULL ELSE stor_loc END AS stor_loc,
                    CASE WHEN stocktype = '' THEN NULL ELSE stocktype END AS stocktype,
                    CASE WHEN stockcat = '' THEN NULL ELSE stockcat END AS stockcat,
                    CASE WHEN comp_code = '' THEN NULL ELSE comp_code END AS comp_code,
                    CASE WHEN _bic_zc_ltstgr = '00000000' THEN NULL ELSE TO_DATE(_bic_zc_ltstgr, 'yyyyMMdd') END AS zc_ltstgr,
                    CASE WHEN vendor = '' THEN NULL ELSE vendor END AS vendor,
                    CASE WHEN customer = '' THEN NULL ELSE customer END AS customer,
                    CASE WHEN calday = '00000000' THEN NULL ELSE TO_DATE(calday, 'yyyyMMdd') END AS calday,
                    CASE WHEN upd_date = '00000000' THEN NULL ELSE TO_DATE(upd_date, 'yyyyMMdd') END AS upd_date,
                    CASE WHEN segment = '' THEN NULL ELSE CAST(CAST(segment AS BIGINT) AS STRING) END AS segment,
                    CASE WHEN plant = '' THEN NULL ELSE LTRIM('0',plant) END AS plant,
                    CASE WHEN base_uom = '' THEN NULL ELSE base_uom END AS base_uom,
                    SUM(_bic_zk_tp_qty) AS zk_tp_qty,
                    SUM(_bic_zk_ltstgr) AS zk_ltstgr,
                    SUM(rectotstck) AS rectotstck,
                    SUM(isstotstck) AS isstotstck,
                    SUM(recvalstck) AS recvalstck,
                    SUM(issvalstck) AS issvalstck,
                    SUM(reccnsstck) AS reccnsstck,
                    SUM(isscnsstck) AS isscnsstck,
                    MAX(proc_date) AS proc_date,
                    MAX(file_creation_ts) AS file_creation_ts
                FROM (
                    SELECT * except (rn) FROM (
                        SELECT *, ROW_NUMBER() OVER (PARTITION BY reqtsn, datapakid, record ORDER BY proc_date DESC) AS rn 
                        FROM temp_sap_vccmmim51
                    ) a WHERE rn = 1 
                ) a
                GROUP BY reqtsn, datapakid, record, material, stor_loc, stocktype, stockcat, comp_code, 
                        _bic_zc_ltstgr, vendor, customer, calday, upd_date, segment, plant, base_uom
            )
            SELECT
            FARM_FINGERPRINT(CONCAT(
                COALESCE(CAST(reqtsn AS STRING), ""),
                COALESCE(CAST(datapakid AS STRING), ""),
                COALESCE(CAST(record AS STRING), ""),
                COALESCE(CAST(material AS STRING), ""),
                COALESCE(CAST(stor_loc AS STRING), ""),
                COALESCE(CAST(stocktype AS STRING), ""),
                COALESCE(CAST(stockcat AS STRING), ""),
                COALESCE(CAST(comp_code AS STRING), ""),
                COALESCE(CAST(zc_ltstgr AS STRING), ""),
                COALESCE(CAST(vendor AS STRING), ""),
                COALESCE(CAST(customer AS STRING), ""),
                COALESCE(CAST(calday AS STRING), ""),
                COALESCE(CAST(upd_date AS STRING), ""),
                COALESCE(CAST(segment AS STRING), ""),
                COALESCE(CAST(plant AS STRING), ""),
                COALESCE(CAST(base_uom AS STRING), "")
            )) AS hash_id,
            * EXCEPT (reqtsn, datapakid, record), reqtsn, datapakid, record
            FROM main""")

In [0]:
calday_query = f"""
SELECT
    CONCAT("'", CONCAT_WS("','", COLLECT_SET(CAST(TO_DATE(calday, 'yyyy-MM-dd') AS STRING))), "'") AS calday_in_str
FROM sap_vcdmmim01
"""
calday_df = spark.sql(calday_query)
calday_in_str = calday_df.collect()[0][0]
print(calday_in_str)

In [0]:
spark.sql(f"""
MERGE INTO {catalog_name}.udp_wcm_silver_sap_bw.sap_vcdmmim01 main_tbl
USING sap_vcdmmim01 temp_tbl
ON main_tbl.hash_id = temp_tbl.hash_id
AND main_tbl.calday IN ({calday_in_str})
WHEN NOT MATCHED THEN INSERT *
""")