In [0]:
dbutils.widgets.text("proc_date", "")
proc_date = dbutils.widgets.get("proc_date")

In [0]:
%run ../../../utils/common

In [0]:
dbutils.widgets.text("environment", "", "")
environment = dbutils.widgets.get("environment")

In [0]:
catalog_name = settings[environment]['catalog_name']

In [0]:
create_temp_view_with_clean_columns(
    spark,
    catalog_name,
    'udp_wcm_bronze_sap_bw',
    'sap_bw_wcm_tmaterial',
    proc_date,
    'temp_sap_tmaterial'
)

In [0]:
spark.sql(f"""
CREATE TABLE IF NOT EXISTS {catalog_name}.udp_wcm_silver_sap_bw.sap_tmaterial
(
  hash_id BIGINT,
  material STRING,
  langu STRING,
  txtmd STRING,
  proc_date DATE,
  file_creation_ts TIMESTAMP,
  start_date DATE,
  end_date DATE
)
TBLPROPERTIES (
  'DELTA.AUTOOPTIMIZE.OPTIMIZEWRITE' = 'true',
  'DELTA.AUTOOPTIMIZE.AUTOCOMPACT' = 'true'
)
 """)

In [0]:
spark.sql(f"""
  CREATE OR REPLACE TEMP VIEW sap_tmaterial AS
  WITH temp (material, langu, txtmd, proc_date, file_creation_ts) AS (
    SELECT material, langu, txtmd, proc_date, file_creation_ts
    FROM (
      SELECT *, ROW_NUMBER() OVER (PARTITION BY material, DATE(proc_date) ORDER BY proc_date) AS rn 
      FROM (
        SELECT 
          LTRIM('0', material) AS material, 
          langu, 
          txtmd, 
          proc_date,
          file_creation_ts
        FROM temp_sap_tmaterial
      ) a
    ) a 
    WHERE rn = 1
  ),
  main (hash_id, material, langu, txtmd, proc_date, file_creation_ts, start_date) AS (
    SELECT 
      farm_fingerprint(CONCAT(
        IFNULL(CAST(material AS STRING), ""), 
        IFNULL(CAST(langu AS STRING), ""), 
        IFNULL(CAST(txtmd AS STRING), "")
      )) AS hash_id,
      material, langu, txtmd, proc_date, file_creation_ts, DATE(proc_date) AS start_date 
    FROM temp
  ),
  main_2 (hash_id, material, langu, txtmd, proc_date, file_creation_ts, start_date, hash_id_prev) AS (
    SELECT 
      hash_id, material, langu, txtmd, proc_date, file_creation_ts, start_date, 
      LAG(hash_id, 1) OVER (PARTITION BY material ORDER BY proc_date) AS hash_id_prev 
    FROM main
  ),
  main_3 (hash_id, material, langu, txtmd, proc_date, file_creation_ts, start_date) AS (
    SELECT hash_id, material, langu, txtmd, proc_date, file_creation_ts, start_date 
    FROM main_2 
    WHERE (hash_id != hash_id_prev OR hash_id_prev IS NULL)
  ),
  main_4 (hash_id, material, langu, txtmd, proc_date, file_creation_ts, start_date, end_date) AS (
    SELECT 
      hash_id, material, langu, txtmd, proc_date, file_creation_ts, start_date, 
      LEAD(DATE(proc_date)) OVER (PARTITION BY material ORDER BY proc_date ASC) AS end_date 
    FROM main_3
  )
  SELECT 
    hash_id, material, langu, txtmd, proc_date, start_date, 
    IFNULL(DATE_SUB(end_date, 1), '2400-01-01') AS end_date,
    file_creation_ts
  FROM main_4
""")

In [0]:
spark.sql(f"""
  MERGE INTO {catalog_name}.udp_wcm_silver_sap_bw.sap_tmaterial AS inf_tbl
  USING (
    SELECT material, hash_id, start_date 
    FROM sap_tmaterial 
    WHERE end_date = '2400-01-01'
  ) AS tmp_tbl
  ON inf_tbl.material = tmp_tbl.material 
    AND inf_tbl.hash_id != tmp_tbl.hash_id 
    AND inf_tbl.end_date = '2400-01-01'
  WHEN MATCHED THEN 
    UPDATE SET inf_tbl.end_date = date_Add(tmp_tbl.start_date,-1)""")

In [0]:
spark.sql(f"""
  MERGE INTO {catalog_name}.udp_wcm_silver_sap_bw.sap_tmaterial AS inf_tbl
  USING sap_tmaterial AS tmp_tbl
  ON inf_tbl.hash_id = tmp_tbl.hash_id 
    AND inf_tbl.end_date = '2400-01-01'
  WHEN NOT MATCHED THEN 
    INSERT (
      hash_id, material, langu, txtmd, proc_date, start_date, end_date, file_creation_ts
    ) 
    VALUES (
      tmp_tbl.hash_id, tmp_tbl.material, tmp_tbl.langu, tmp_tbl.txtmd, tmp_tbl.proc_date,
      tmp_tbl.start_date, tmp_tbl.end_date, tmp_tbl.file_creation_ts
    )
""")