In [0]:
dbutils.widgets.text("proc_date", "")
proc_date = dbutils.widgets.get("proc_date")

In [0]:
%run ../../../utils/common

In [0]:
dbutils.widgets.text("environment", "", "")
environment = dbutils.widgets.get("environment")

In [0]:
catalog_name = settings[environment]['catalog_name']

In [0]:
create_temp_view_with_clean_columns(
    spark,
    catalog_name,
    'udp_wcm_bronze_sap_bw',
    'sap_bw_wcm_vccmmpo09',
    proc_date,
    'temp_sap_vccmmpo09'
)

In [0]:
spark.sql(f"""
CREATE TABLE IF NOT EXISTS {catalog_name}.udp_wcm_silver_sap_bw.sap_vccmmpo09
( 
  hash_id BIGINT,
  hash_value BIGINT,
  pir STRING,
  articleno STRING,
  category STRING,
  vendor STRING,
  subrange STRING,
  availbleto DATE,
  delete_indicator STRING,
  start_date DATE,
  end_date DATE,
  created_on STRING,
  created_by STRING,
  search_term STRING,
  order_unit STRING,
  numerator DOUBLE,
  denominator DOUBLE,
  vendor_article_no STRING,
  salesperson STRING,
  telephone_1 STRING,
  first_reminder_exped DOUBLE,
  second_reminder_exped DOUBLE,
  third_reminder_exped DOUBLE,
  certificate_number STRING,
  country_of_origin STRING,
  certificate_category STRING,
  number STRING,
  base_unit_of_measure STRING,
  region STRING,
  variable_purchase_order_unit STRING,
  vsr_sort_seq_number STRING,
  vendor_mdse_catgry STRING,
  return_agreement STRING,
  available_from STRING,
  prior_vendor STRING,
  points DOUBLE,
  points_unit STRING,
  regular_vendor STRING,
  proc_date DATE,
  file_creation_ts TIMESTAMP
)
TBLPROPERTIES (
  'DELTA.AUTOOPTIMIZE.OPTIMIZEWRITE' = 'true',
  'DELTA.AUTOOPTIMIZE.AUTOCOMPACT' = 'true'
)
""")

In [0]:
spark.sql(f"""
CREATE OR REPLACE TEMP VIEW sap_vccmmpo09 AS
WITH temp AS (
    SELECT * EXCEPT(rn) FROM (
        SELECT *, ROW_NUMBER() OVER (PARTITION BY pir, articleno, vendor, category ORDER BY proc_date DESC) rn
        FROM (
            SELECT
                IF(infnr = '', NULL, infnr) AS pir,
                LTRIM('0', matnr) AS articleno,
                IF(matkl = '', NULL, matkl) AS category,
                LTRIM('0', lifnr) AS vendor,
                IF(ltsnr = '', NULL, ltsnr) AS subrange,
                CASE
                    WHEN lifbi = '00000000' THEN NULL
                    ELSE TO_DATE(lifbi, 'yyyyMMdd')
                END AS availbleto,
                IF(loekz = '', NULL, loekz) AS delete_indicator,
                erdat AS created_on,
                ernam AS created_by,
                sortl AS search_term,
                meins AS order_unit,
                CAST(umrez AS INT) AS numerator,
                CAST(umren AS INT) AS denominator,
                idnlf AS vendor_article_no,
                verkf AS salesperson,
                telf1 AS telephone_1,
                CAST(mahn1 AS INT) AS first_reminder_exped,
                CAST(mahn2 AS INT) AS second_reminder_exped,
                CAST(mahn3 AS INT) AS third_reminder_exped,
                urznr AS certificate_number,
                urzla AS country_of_origin,
                urztp AS certificate_category,
                urzzt AS number,
                lmein AS base_unit_of_measure,
                regio AS region,
                vabme AS variable_purchase_order_unit,
                ltssf AS vsr_sort_seq_number,
                wglif AS vendor_mdse_catgry,
                rueck AS return_agreement,
                lifab AS available_from,
                kolif AS prior_vendor,
                CAST(anzpu AS INT) AS points,
                punei AS points_unit,
                relif AS regular_vendor,
                proc_date,
                file_creation_ts
            FROM (
                SELECT * EXCEPT(rn) FROM (
                    SELECT *,
                    proc_date,
                    ROW_NUMBER() OVER (PARTITION BY reqtsn, datapakid, record ORDER BY proc_date DESC) rn
                    FROM temp_sap_vccmmpo09
                ) a WHERE rn = 1
            )
        ) a
    ) a WHERE rn = 1
),
main AS (
    SELECT
        farm_fingerprint(CONCAT(
            IFNULL(CAST(pir AS STRING), ''),
            IFNULL(CAST(articleno AS STRING), ''),
            IFNULL(CAST(vendor AS STRING), ''),
            IFNULL(CAST(category AS STRING), '')
        )) hash_id,
        farm_fingerprint(CONCAT(
            IFNULL(CAST(pir AS STRING), ''),
            IFNULL(CAST(articleno AS STRING), ''),
            IFNULL(CAST(vendor AS STRING), ''),
            IFNULL(CAST(category AS STRING), ''),
            IFNULL(CAST(subrange AS STRING), ''),
            IFNULL(CAST(availbleto AS STRING), ''),
            IFNULL(CAST(delete_indicator AS STRING), ''),
            IFNULL(CAST(created_on AS STRING), ''),
            IFNULL(CAST(created_by AS STRING), ''),
            IFNULL(CAST(search_term AS STRING), ''),
            IFNULL(CAST(order_unit AS STRING), ''), 
            IFNULL(CASE WHEN CAST(numerator AS DOUBLE) = CAST(CAST(numerator AS DOUBLE) AS BIGINT) THEN CAST(CAST(numerator AS BIGINT) AS STRING) ELSE TRIM(TRAILING '0' FROM(CAST(numerator AS DECIMAL(38,10)))) END, ""),
            IFNULL(CASE WHEN CAST(denominator AS DOUBLE) = CAST(CAST(denominator AS DOUBLE) AS BIGINT) THEN CAST(CAST(denominator AS BIGINT) AS STRING) ELSE TRIM(TRAILING '0' FROM(CAST(denominator AS DECIMAL(38,10)))) END, ""),
            IFNULL(CAST(vendor_article_no AS STRING), ''),
            IFNULL(CAST(salesperson AS STRING), ''),
            IFNULL(CAST(telephone_1 AS STRING), ''),
            IFNULL(CASE WHEN CAST(first_reminder_exped AS DOUBLE) = CAST(CAST(first_reminder_exped AS DOUBLE) AS BIGINT) THEN CAST(CAST(first_reminder_exped AS BIGINT) AS STRING) ELSE TRIM(TRAILING '0' FROM(CAST(first_reminder_exped AS DECIMAL(38,10)))) END, ""),
            IFNULL(CASE WHEN CAST(second_reminder_exped AS DOUBLE) = CAST(CAST(second_reminder_exped AS DOUBLE) AS BIGINT) THEN CAST(CAST(second_reminder_exped AS BIGINT) AS STRING) ELSE TRIM(TRAILING '0' FROM(CAST(second_reminder_exped AS DECIMAL(38,10)))) END, ""),
            IFNULL(CASE WHEN CAST(third_reminder_exped AS DOUBLE) = CAST(CAST(third_reminder_exped AS DOUBLE) AS BIGINT) THEN CAST(CAST(third_reminder_exped AS BIGINT) AS STRING) ELSE TRIM(TRAILING '0' FROM(CAST(third_reminder_exped AS DECIMAL(38,10)))) END, ""),
            IFNULL(CAST(certificate_number AS STRING), ''),
            IFNULL(CAST(country_of_origin AS STRING), ''),
            IFNULL(CAST(certificate_category AS STRING), ''),
            IFNULL(CAST(number AS STRING), ''),
            IFNULL(CAST(base_unit_of_measure AS STRING), ''),
            IFNULL(CAST(region AS STRING), ''),
            IFNULL(CAST(variable_purchase_order_unit AS STRING), ''),
            IFNULL(CAST(vsr_sort_seq_number AS STRING), ''),
            IFNULL(CAST(vendor_mdse_catgry AS STRING), ''),
            IFNULL(CAST(return_agreement AS STRING), ''),
            IFNULL(CAST(available_from AS STRING), ''),
            IFNULL(CAST(prior_vendor AS STRING), ''),
            IFNULL(CASE WHEN CAST(points AS DOUBLE) = CAST(CAST(points AS DOUBLE) AS BIGINT) THEN CAST(CAST(points AS BIGINT) AS STRING) ELSE TRIM(TRAILING '0' FROM(CAST(points AS DECIMAL(38,10)))) END, ""),
            IFNULL(CAST(points_unit AS STRING), ''),
            IFNULL(CAST(regular_vendor AS STRING), '')
        )) hash_value,
        pir,
        articleno,
        category,
        vendor,
        subrange,
        availbleto,
        delete_indicator,
        DATE(proc_date) AS start_date,
        DATE('2400-01-01') AS end_date,
        created_on,
        created_by,
        search_term,
        order_unit,
        numerator,
        denominator,
        vendor_article_no,
        salesperson,
        telephone_1,
        first_reminder_exped,
        second_reminder_exped,
        third_reminder_exped,
        certificate_number,
        country_of_origin,
        certificate_category,
        number,
        base_unit_of_measure,
        region,
        variable_purchase_order_unit,
        vsr_sort_seq_number,
        vendor_mdse_catgry,
        return_agreement,
        available_from,
        prior_vendor,
        points,
        points_unit,
        regular_vendor,
        proc_date,
        file_creation_ts
    FROM temp
)
SELECT * FROM main;
""")

In [0]:
spark.sql(f"""
MERGE INTO {catalog_name}.udp_wcm_silver_sap_bw.sap_vccmmpo09 AS main_tbl
USING sap_vccmmpo09 AS temp_tbl
ON main_tbl.hash_value != temp_tbl.hash_value
   AND main_tbl.hash_id = temp_tbl.hash_id
   AND main_tbl.end_date >= CURRENT_DATE
WHEN MATCHED THEN
UPDATE SET main_tbl.end_date = DATE_ADD(temp_tbl.start_date, -1);

""")

In [0]:
spark.sql(
    f"""
        MERGE INTO {catalog_name}.udp_wcm_silver_sap_bw.sap_vccmmpo09 AS main_tbl
        USING sap_vccmmpo09 AS temp_tbl
        ON main_tbl.hash_id = temp_tbl.hash_id
        AND main_tbl.hash_value = temp_tbl.hash_value
        AND main_tbl.end_date >= CURRENT_DATE
        WHEN NOT MATCHED THEN INSERT *;
""")