In [0]:
dbutils.widgets.text("proc_date", "")
proc_date = dbutils.widgets.get("proc_date")

In [0]:
%run ../../../utils/common

In [0]:
dbutils.widgets.text("environment", "", "")
environment = dbutils.widgets.get("environment")

In [0]:
catalog_name = settings[environment]['catalog_name']

In [0]:
create_temp_view_with_clean_columns(
    spark,
    catalog_name,
    'udp_wcm_bronze_sap_bw',
    'sap_bw_wcm_pmat_unit',
    proc_date,
    'temp_sap_pmat_unit'
)

In [0]:
spark.sql(f""" 
CREATE TABLE IF NOT EXISTS {catalog_name}.udp_wcm_silver_sap_bw.sap_pmat_unit
(
  hash_id BIGINT,
  material STRING,
  mat_unit STRING,
  denomintr FLOAT,
  eanupc STRING,
  ean_numtyp STRING,
  gross_wt FLOAT,
  height FLOAT,
  len FLOAT,
  numerator FLOAT,
  unit STRING,
  unit_dim STRING,
  unit_of_wt STRING,
  volume FLOAT,
  volumeunit STRING,
  width FLOAT,
  proc_date DATE,
  file_creation_ts TIMESTAMP,
  start_date DATE,
  end_date DATE
)
TBLPROPERTIES (
  'DELTA.AUTOOPTIMIZE.OPTIMIZEWRITE' = 'true',
  'DELTA.AUTOOPTIMIZE.AUTOCOMPACT' = 'true'
)
""")

In [0]:
spark.sql(f""" CREATE OR REPLACE TEMP VIEW sap_pmat_unit
AS
WITH temp AS (
SELECT * EXCEPT(rn) FROM (
    SELECT *, ROW_NUMBER() OVER (PARTITION BY material, mat_unit ORDER BY proc_date DESC) rn FROM (
        SELECT 
            LTRIM('0', material) material,
            CASE WHEN mat_unit = '' THEN NULL ELSE mat_unit END AS mat_unit,
            denomintr,
            CASE WHEN eanupc = '' THEN NULL ELSE eanupc END AS eanupc,
            CASE WHEN ean_numtyp = '' THEN NULL ELSE ean_numtyp END AS ean_numtyp,
            gross_wt,
            height,
            len,
            numerator,
            CASE WHEN unit = '' THEN NULL ELSE unit END AS unit,
            CASE WHEN unit_dim = '' THEN NULL ELSE unit_dim END AS unit_dim,
            CASE WHEN unit_of_wt = '' THEN NULL ELSE unit_of_wt END AS unit_of_wt,
            volume,
            CASE WHEN volumeunit = '' THEN NULL ELSE volumeunit END AS volumeunit,
            width,
            proc_date,
            file_creation_ts
        FROM temp_sap_pmat_unit 
        ) a
    ) a WHERE rn = 1
),
main AS (
    SELECT 
        farm_fingerprint(
            CONCAT(
                    IFNULL(CAST(material AS STRING), ""), 
                    IFNULL(CAST(mat_unit AS STRING), ""), 
                    IFNULL(CASE WHEN CAST(denomintr AS DOUBLE) = CAST(CAST(denomintr AS DOUBLE) AS BIGINT) THEN CAST(CAST(denomintr AS BIGINT) AS STRING) ELSE TRIM(TRAILING '0' FROM(CAST(denomintr AS DECIMAL(38,10)))) END, ""),
                    IFNULL(CAST(eanupc AS STRING), ""), 
                    IFNULL(CAST(ean_numtyp AS STRING), ""), 
                    IFNULL(CASE WHEN CAST(gross_wt AS DOUBLE) = CAST(CAST(gross_wt AS DOUBLE) AS BIGINT) THEN CAST(CAST(gross_wt AS BIGINT) AS STRING) ELSE TRIM(TRAILING '0' FROM(CAST(gross_wt AS DECIMAL(38,10)))) END, ""),
                    IFNULL(CASE WHEN CAST(height AS DOUBLE) = CAST(CAST(height AS DOUBLE) AS BIGINT) THEN CAST(CAST(height AS BIGINT) AS STRING) ELSE TRIM(TRAILING '0' FROM(CAST(height AS DECIMAL(38,10)))) END, ""),
                    IFNULL(CASE WHEN CAST(len AS DOUBLE) = CAST(CAST(len AS DOUBLE) AS BIGINT) THEN CAST(CAST(len AS BIGINT) AS STRING) ELSE TRIM(TRAILING '0' FROM(CAST(len AS DECIMAL(38,10)))) END, ""),
                    IFNULL(CASE WHEN CAST(numerator AS DOUBLE) = CAST(CAST(numerator AS DOUBLE) AS BIGINT) THEN CAST(CAST(numerator AS BIGINT) AS STRING) ELSE TRIM(TRAILING '0' FROM(CAST(numerator AS DECIMAL(38,10)))) END, ""),
                    IFNULL(CAST(unit AS STRING), ""), 
                    IFNULL(CAST(unit_dim AS STRING), ""), 
                    IFNULL(CAST(unit_of_wt AS STRING), ""), 
                    IFNULL(CASE WHEN CAST(volume AS DOUBLE) = CAST(CAST(volume AS DOUBLE) AS BIGINT) THEN CAST(CAST(volume AS BIGINT) AS STRING) ELSE TRIM(TRAILING '0' FROM(CAST(volume AS DECIMAL(38,10)))) END, ""),
                    IFNULL(CAST(volumeunit AS STRING), ""), 
                    IFNULL(CASE WHEN CAST(width AS DOUBLE) = CAST(CAST(width AS DOUBLE) AS BIGINT) THEN CAST(CAST(width AS BIGINT) AS STRING) ELSE TRIM(TRAILING '0' FROM(CAST(width AS DECIMAL(38,10)))) END, "")
            )
        ) hash_id,
        *, DATE(proc_date) start_date 
    FROM temp
),
main_2 AS (
    SELECT *, 
        LAG(hash_id) OVER (PARTITION BY material, mat_unit ORDER BY proc_date) hash_id_prev 
    FROM main
),
main_3 AS (
    SELECT * EXCEPT(hash_id_prev) 
    FROM main_2 
    WHERE (hash_id != hash_id_prev OR hash_id_prev IS NULL)
),
main_4 AS (
    SELECT *, 
        LEAD(DATE(proc_date)) OVER (PARTITION BY material, mat_unit ORDER BY proc_date) end_date 
    FROM main_3
)
SELECT 
* EXCEPT(end_date), 
    IFNULL(DATE_ADD(end_date, -1), '2400-01-01') end_date 
FROM main_4;
""")

In [0]:
spark.sql(f""" MERGE INTO {catalog_name}.udp_wcm_silver_sap_bw.sap_pmat_unit inf_tbl
USING (
    SELECT material, mat_unit, hash_id, start_date FROM sap_pmat_unit
        WHERE end_date = '2400-01-01' 
) tmp_tbl
ON inf_tbl.material = tmp_tbl.material AND inf_tbl.mat_unit = tmp_tbl.mat_unit AND inf_tbl.hash_id != tmp_tbl.hash_id AND inf_tbl.end_date = '2400-01-01'
WHEN MATCHED THEN UPDATE SET inf_tbl.end_date = DATE_ADD(tmp_tbl.start_date, -1);
""")

In [0]:
spark.sql(f""" MERGE INTO {catalog_name}.udp_wcm_silver_sap_bw.sap_pmat_unit inf_tbl
USING sap_pmat_unit tmp_tbl
ON inf_tbl.hash_id = tmp_tbl.hash_id AND inf_tbl.end_date = '2400-01-01'
WHEN NOT MATCHED THEN INSERT *;
""")