In [0]:
dbutils.widgets.text("proc_date", "")
proc_date = dbutils.widgets.get("proc_date")

In [0]:
%run ../../../utils/common

In [0]:
dbutils.widgets.text("environment", "", "")
environment = dbutils.widgets.get("environment")

In [0]:
catalog_name = settings[environment]['catalog_name']

In [0]:
spark.sql(f"""
CREATE TABLE IF NOT EXISTS {catalog_name}.udp_wcm_silver_sap_bw.sap_avccmdm332 (
	hash_id BIGINT
	,bbynr STRING
	,prqnr STRING
	,matposnr STRING
	,recordmode STRING
	,prqtype STRING
	,grpgnr STRING
	,matnr STRING
	,matquan DOUBLE
	,matunit STRING
	,sumflag STRING
	,disctype STRING
	,prqval_min DOUBLE
	,prqcur STRING
	,stgrp STRING
	,proc_date DATE
	,start_date DATE
	,end_date DATE
	,file_creation_ts TIMESTAMP
	)
 TBLPROPERTIES 
(
	DELTA.AUTOOPTIMIZE.OPTIMIZEWRITE = TRUE,
	DELTA.AUTOOPTIMIZE.AUTOCOMPACT = TRUE
)
 """)

In [0]:
create_temp_view_with_clean_columns(
    spark,
    catalog_name,
    'udp_wcm_bronze_sap_bw',
    'sap_bw_wcm_avccmdm332',
    proc_date,
    'temp_sap_avccmdm332'
)

In [0]:
spark.sql(f"""
CREATE OR REPLACE TEMP VIEW sap_avccmdm332 AS
WITH temp AS (
  SELECT * EXCEPT(rn) FROM (
    SELECT *, ROW_NUMBER() OVER (
      PARTITION BY bbynr, prqnr, matposnr, DATE(proc_date)
      ORDER BY proc_date
    ) rn FROM (
      SELECT 
        IF(bbynr = '', NULL, LTRIM('0',bbynr)) AS bbynr,
        IF(prqnr = '', NULL, prqnr) AS prqnr,
        IF(matposnr = '', NULL, matposnr) AS matposnr,
        IF(recordmode = '', NULL, recordmode) AS recordmode,
        IF(prqtype = '', NULL, prqtype) AS prqtype,
        IF(grpgnr = '', NULL, grpgnr) AS grpgnr,
        IF(matnr = '', NULL, LTRIM('0',matnr)) AS matnr,
        COALESCE(NULLIF(CASE 
            WHEN ABS(matquan) < 1E-10 THEN '0'
            WHEN matquan = CAST(matquan AS DECIMAL(38,0)) THEN CAST(CAST(matquan AS DECIMAL(38,0)) AS STRING) 
            ELSE CAST(matquan AS STRING) 
        END, 'None'), '') AS matquan,
        IF(matunit = '', NULL, matunit) AS matunit,
        IF(sumflag = '', NULL, sumflag) AS sumflag,
        IF(disctype = '', NULL, disctype) AS disctype,
        COALESCE(NULLIF(CASE 
            WHEN ABS(prqval_min) < 1E-10 THEN '0'
            WHEN prqval_min = CAST(prqval_min AS DECIMAL(38,0)) THEN CAST(CAST(prqval_min AS DECIMAL(38,0)) AS STRING) 
            ELSE CAST(prqval_min AS STRING) 
        END, 'None'), '') AS prqval_min,
        IF(prqcur = '', NULL, prqcur) AS prqcur,
        IF(stgrp = '', NULL, stgrp) AS stgrp,
        proc_date,
        file_creation_ts
      FROM temp_sap_avccmdm332
    ) a
  ) a WHERE rn = 1
),
main AS (
  SELECT 
    farm_fingerprint(CONCAT(
        COALESCE(NULLIF(REPLACE(LTRIM('0', CAST(bbynr AS STRING)), 'None', ''), ''), ''),
        COALESCE(NULLIF(prqnr, 'None'), ''),
        COALESCE(NULLIF(matposnr, 'None'), ''),
        COALESCE(NULLIF(recordmode, 'None'), ''),
        COALESCE(NULLIF(prqtype, 'None'), ''),
        COALESCE(NULLIF(grpgnr, 'None'), ''),
        COALESCE(NULLIF(LTRIM('0', CAST(matnr AS STRING)), 'None'), ''),
        IFNULL(CASE WHEN CAST(matquan AS DOUBLE) = CAST(CAST(matquan AS DOUBLE) AS BIGINT) THEN CAST(CAST(matquan AS BIGINT) AS STRING) ELSE TRIM(TRAILING '0' FROM(CAST(matquan AS DECIMAL(38,10)))) END, ""),
        COALESCE(NULLIF(matunit, 'None'), ''),
        COALESCE(NULLIF(sumflag, 'None'), ''),
        COALESCE(NULLIF(disctype, 'None'), ''),
        IFNULL(CASE WHEN CAST(prqval_min AS DOUBLE) = CAST(CAST(prqval_min AS DOUBLE) AS BIGINT) THEN CAST(CAST(prqval_min AS BIGINT) AS STRING) ELSE TRIM(TRAILING '0' FROM(CAST(prqval_min AS DECIMAL(38,10)))) END, ""),
        COALESCE(NULLIF(prqcur, 'None'), ''),
        COALESCE(NULLIF(stgrp, 'None'), '')
    )) AS hash_id,
    *,
    DATE(proc_date) AS start_date
  FROM temp
),
main_2 AS (
  SELECT *,
    LAG(hash_id, 1) OVER (
      PARTITION BY bbynr, prqnr, matposnr
      ORDER BY proc_date
    ) AS hash_id_prev
  FROM main
),
main_3 AS (
  SELECT * EXCEPT(hash_id_prev)
  FROM main_2
  WHERE (hash_id != hash_id_prev OR hash_id_prev IS NULL)
),
main_4 AS (
  SELECT *,
    LEAD(DATE(proc_date)) OVER (
      PARTITION BY bbynr
      ORDER BY proc_date ASC
    ) AS end_date
  FROM main_3
)
SELECT * EXCEPT(end_date),
  IFNULL(DATE_ADD(end_date, -1), DATE('2400-01-01')) AS end_date
FROM main_4
""")

In [0]:
spark.sql(f"""
MERGE INTO {catalog_name}.udp_wcm_silver_sap_bw.sap_avccmdm332 inf_tbl
USING (
    select bbynr, prqnr, matposnr, hash_id, start_date from sap_avccmdm332
        where end_date = DATE '2400-01-01' 
) tmp_tbl
ON inf_tbl.bbynr = tmp_tbl.bbynr 
AND inf_tbl.matposnr = tmp_tbl.matposnr 
AND inf_tbl.prqnr = tmp_tbl.prqnr 
AND inf_tbl.hash_id != tmp_tbl.hash_id 
AND inf_tbl.end_date = DATE '2400-01-01'
WHEN MATCHED THEN UPDATE SET inf_tbl.end_date = date_add(tmp_tbl.start_date,-1);
""")

In [0]:
columns = [col.name for col in spark.table(f"{catalog_name}.udp_wcm_silver_sap_bw.sap_avccmdm332").schema]

spark.sql(f"""
MERGE INTO {catalog_name}.udp_wcm_silver_sap_bw.sap_avccmdm332 inf_tbl
USING sap_avccmdm332 tmp_tbl
ON inf_tbl.hash_id = tmp_tbl.hash_id 
   AND inf_tbl.matposnr = tmp_tbl.matposnr 
   AND inf_tbl.prqnr = tmp_tbl.prqnr 
   AND inf_tbl.end_date = DATE '2400-01-01'
WHEN NOT MATCHED THEN INSERT ({', '.join(columns)})
VALUES ({', '.join([f'tmp_tbl.{col}' for col in columns])});
""")
