In [0]:
dbutils.widgets.text("proc_date", "")
proc_date = dbutils.widgets.get("proc_date")

In [0]:
%run ../../../utils/common

In [0]:
dbutils.widgets.text("environment", "", "")
environment = dbutils.widgets.get("environment")

In [0]:
catalog_name = settings[environment]['catalog_name']
print(catalog_name)

In [0]:
create_temp_view_with_clean_columns(
    spark,
    catalog_name,
    'udp_wcm_bronze_sap_bw',
    'sap_bw_wcm_0plant',
    proc_date,
    'temp_sap_0plant'
)

In [0]:
spark.sql(f"""
CREATE TABLE IF NOT EXISTS {catalog_name}.udp_wcm_silver_sap_bw.sap_0plant
(
  hash_id BIGINT,
  plant STRING,
  country STRING,
  distr_chan STRING,
  factcal_id STRING,
  latitude FLOAT,
  loc_currcy STRING,
  longitude FLOAT,
  plantcat STRING,
  postal_cd STRING,
  postcd_gis STRING,
  purch_org STRING,
  region STRING,
  rt_custpl STRING,
  salesorg STRING,
  sales_dist STRING,
  comp_code STRING,
  _bic_zc_sup_rg STRING,
  _bic_zc_region STRING,
  _bic_zc_saunit STRING,
  segment STRING,
  _bic_zk_saqty FLOAT,
  _bic_zk_reaqty FLOAT,
  _bic_zk_numflr FLOAT,
  currency STRING,
  _bic_zk_renamt FLOAT,
  _bic_zc_stoloc STRING,
  crm_delblk STRING,
  _bic_zc_catsiz STRING,
  _bic_zc_posout STRING,
  _bic_zc_posin STRING,
  _bic_zc_rptsgm STRING,
  _bic_zc_catage STRING,
  _bic_zc_stosts STRING,
  _bic_zc_kpists STRING,
  street STRING,
  name STRING,
  name2 STRING,
  city STRING,
  email_addr STRING,
  addr_numbr STRING,
  fax_number STRING,
  tel_number STRING,
  _bic_zc_sitel2 STRING,
  _bic_zc_sitel3 STRING,
  _bic_zc_gplant STRING,
  _bic_zc_dcgr01 STRING,
  _bic_zc_subrg1 STRING,
  proc_date STRING,
  file_creation_ts TIMESTAMP,
  rt_lbldatf DATE,
  rt_lbldatt DATE,
  rt_lcldat DATE,
  rt_lopdat DATE,
  aedat DATE,
  start_date DATE,
  end_date DATE
) 
TBLPROPERTIES (
  'DELTA.AUTOOPTIMIZE.OPTIMIZEWRITE' = 'true',
  'DELTA.AUTOOPTIMIZE.AUTOCOMPACT' = 'true'
)
""")

In [0]:
spark.sql("""
CREATE OR REPLACE TEMP VIEW sap_0plant AS
WITH temp AS (
    SELECT * EXCEPT(rn) FROM (
        SELECT *, ROW_NUMBER() OVER (PARTITION BY plant, DATE(proc_date) ORDER BY proc_date) AS rn
        FROM (
            SELECT
                plant,
                IF(country = '', NULL, country) AS country,
                IF(distr_chan = '', NULL, distr_chan) AS distr_chan,
                CAST(NULL AS STRING) AS factcal_id,
                latitude,
                IF(loc_currcy = '', NULL, loc_currcy) AS loc_currcy,
                longitude,
                CAST(NULL AS STRING) AS plantcat,
                CAST(NULL AS STRING) AS postal_cd,
                IF(postcd_gis = '', NULL, postcd_gis) AS postcd_gis,
                IF(purch_org = '', NULL, purch_org) AS purch_org,
                IF(region = '', NULL, region) AS region,
                IF(rt_custpl = '', NULL, LTRIM('0', rt_custpl)) AS rt_custpl,
                IF(salesorg = '', NULL, LTRIM('0', salesorg)) AS salesorg,
                IF(sales_dist = '', NULL, sales_dist) AS sales_dist,
                IF(comp_code = '', NULL, LTRIM('0', comp_code)) AS comp_code,
                IF(_bic_zc_sup_rg = '', NULL, _bic_zc_sup_rg) AS _bic_zc_sup_rg,
                IF(_bic_zc_region = '', NULL, _bic_zc_region) AS _bic_zc_region,
                IF(_bic_zc_saunit = '', NULL, _bic_zc_saunit) AS _bic_zc_saunit,
                IF(segment = '', NULL, LTRIM('0', segment)) AS segment,
                _bic_zk_saqty as _bic_zk_saqty,
                _bic_zk_reaqty as _bic_zk_reaqty,
                _bic_zk_numflr as _bic_zk_numflr,
                IF(currency = '', NULL, currency) AS currency,
                _bic_zk_renamt as _bic_zk_renamt,
                IF(_bic_zc_stoloc = '', NULL, _bic_zc_stoloc) AS _bic_zc_stoloc,
                IF(crm_delblk = '', NULL, crm_delblk) AS crm_delblk,
                IF(_bic_zc_catsiz = '', NULL, _bic_zc_catsiz) AS _bic_zc_catsiz,
                IF(_bic_zc_posout = '', NULL, _bic_zc_posout) AS _bic_zc_posout,
                IF(_bic_zc_posin = '', NULL, _bic_zc_posin) AS _bic_zc_posin,
                IF(_bic_zc_rptsgm = '', NULL, LTRIM('0', _bic_zc_rptsgm)) AS _bic_zc_rptsgm,
                IF(_bic_zc_catage = '', NULL, _bic_zc_catage) AS _bic_zc_catage,
                IF(_bic_zc_stosts = '', NULL, _bic_zc_stosts) AS _bic_zc_stosts,
                IF(_bic_zc_kpists = '', NULL, _bic_zc_kpists) AS _bic_zc_kpists,
                IF(street = '', NULL, street) AS street,
                IF(name = '', NULL, name) AS name,
                IF(name2 = '', NULL, name2) AS name2,
                IF(city = '', NULL, city) AS city,
                IF(email_addr = '', NULL, email_addr) AS email_addr,
                IF(addr_numbr = '', NULL, addr_numbr) AS addr_numbr,
                IF(fax_number = '', NULL, fax_number) AS fax_number,
                IF(tel_number = '', NULL, tel_number) AS tel_number,
                IF(_bic_zc_sitel2 = '', NULL, _bic_zc_sitel2) AS _bic_zc_sitel2,
                IF(_bic_zc_sitel3 = '', NULL, _bic_zc_sitel3) AS _bic_zc_sitel3,
                IF(_bic_zc_gplant = '', NULL, _bic_zc_gplant) AS _bic_zc_gplant,
                IF(_bic_zc_dcgr01 = '', NULL, _bic_zc_dcgr01) AS _bic_zc_dcgr01,
                IF(_bic_zc_subrg1 = '', NULL, _bic_zc_subrg1) AS _bic_zc_subrg1,
                proc_date,
                file_creation_ts,
                TO_DATE(IF(rt_lbldatf='00000000', NULL, rt_lbldatf), 'yyyyMMdd') AS rt_lbldatf,
                TO_DATE(IF(rt_lbldatt='00000000', NULL, rt_lbldatt), 'yyyyMMdd') AS rt_lbldatt,
                TO_DATE(IF(rt_lcldat='00000000', NULL, rt_lcldat), 'yyyyMMdd') AS rt_lcldat,
                TO_DATE(IF(rt_lopdat= '00000000', NULL, rt_lopdat), 'yyyyMMdd') AS rt_lopdat,
                TO_DATE(IF(aedat= '00000000', NULL, aedat), 'yyyyMMdd') AS aedat
            FROM temp_sap_0plant
        ) a
    ) a WHERE rn = 1
),
main AS (
    SELECT
        farm_fingerprint(CONCAT(
            IFNULL(CAST(plant AS STRING), ""), 
            IFNULL(CAST(country AS STRING), ""), 
            IFNULL(CAST(distr_chan AS STRING), ""), 
            IFNULL(CAST(factcal_id AS STRING), ""), 
            IFNULL(CASE WHEN CAST(latitude AS DOUBLE) = CAST(CAST(latitude AS DOUBLE) AS BIGINT) THEN CAST(CAST(latitude AS BIGINT) AS STRING) ELSE TRIM(TRAILING '0' FROM(CAST(latitude AS DECIMAL(38,10)))) END, ""), 
            IFNULL(CAST(loc_currcy AS STRING), ""), 
           IFNULL(CASE WHEN CAST(longitude AS DOUBLE) = CAST(CAST(longitude AS DOUBLE) AS BIGINT) THEN CAST(CAST(longitude AS BIGINT) AS STRING) ELSE TRIM(TRAILING '0' FROM(CAST(longitude AS DECIMAL(38,10)))) END, ""), 
            IFNULL(CAST(plantcat AS STRING), ""), 
            IFNULL(CAST(postal_cd AS STRING), ""), 
            IFNULL(CAST(postcd_gis AS STRING), ""), 
            IFNULL(CAST(purch_org AS STRING), ""), 
            IFNULL(CAST(region AS STRING), ""), 
            IFNULL(CAST(rt_custpl AS STRING), ""), 
            IFNULL(CAST(rt_lbldatf AS STRING), ""), 
            IFNULL(CAST(rt_lbldatt AS STRING), ""), 
            IFNULL(CAST(rt_lcldat AS STRING), ""), 
            IFNULL(CAST(rt_lopdat AS STRING), ""), 
            IFNULL(CAST(salesorg AS STRING), ""), 
            IFNULL(CAST(sales_dist AS STRING), ""), 
            IFNULL(CAST(comp_code AS STRING), ""), 
            IFNULL(CAST(_bic_zc_sup_rg AS STRING), ""), 
            IFNULL(CAST(_bic_zc_region AS STRING), ""), 
            IFNULL(CAST(_bic_zc_saunit AS STRING), ""), 
            IFNULL(CAST(segment AS STRING), ""), 
            IFNULL(CASE WHEN CAST(_bic_zk_saqty AS DOUBLE) = CAST(CAST(_bic_zk_saqty AS DOUBLE) AS BIGINT) THEN CAST(CAST(_bic_zk_saqty AS BIGINT) AS STRING) ELSE TRIM(TRAILING '0' FROM(CAST(_bic_zk_saqty AS DECIMAL(38,10)))) END, ""), 
            IFNULL(CASE WHEN CAST(_bic_zk_reaqty AS DOUBLE) = CAST(CAST(_bic_zk_reaqty AS DOUBLE) AS BIGINT) THEN CAST(CAST(_bic_zk_reaqty AS BIGINT) AS STRING) ELSE TRIM(TRAILING '0' FROM(CAST(_bic_zk_reaqty AS DECIMAL(38,10)))) END, ""), 
            IFNULL(CASE WHEN CAST(_bic_zk_numflr AS DOUBLE) = CAST(CAST(_bic_zk_numflr AS DOUBLE) AS BIGINT) THEN CAST(CAST(_bic_zk_numflr AS BIGINT) AS STRING) ELSE TRIM(TRAILING '0' FROM(CAST(_bic_zk_numflr AS DECIMAL(38,10)))) END, ""), 
            IFNULL(CAST(currency AS STRING), ""), 
            IFNULL(CASE WHEN CAST(_bic_zk_renamt AS DOUBLE) = CAST(CAST(_bic_zk_renamt AS DOUBLE) AS BIGINT) THEN CAST(CAST(_bic_zk_renamt AS BIGINT) AS STRING) ELSE TRIM(TRAILING '0' FROM(CAST(_bic_zk_renamt AS DECIMAL(38,10)))) END, ""), 
            IFNULL(CAST(_bic_zc_stoloc AS STRING), ""), 
            IFNULL(CAST(crm_delblk AS STRING), ""), 
            IFNULL(CAST(_bic_zc_catsiz AS STRING), ""), 
            IFNULL(CAST(_bic_zc_posout AS STRING), ""), 
            IFNULL(CAST(_bic_zc_posin AS STRING), ""), 
            IFNULL(CAST(_bic_zc_rptsgm AS STRING), ""), 
            IFNULL(CAST(_bic_zc_catage AS STRING), ""), 
            IFNULL(CAST(_bic_zc_stosts AS STRING), ""), 
            IFNULL(CAST(_bic_zc_kpists AS STRING), ""), 
            IFNULL(CAST(street AS STRING), ""), 
            IFNULL(CAST(name AS STRING), ""), 
            IFNULL(CAST(name2 AS STRING), ""), 
            IFNULL(CAST(city AS STRING), ""), 
            IFNULL(CAST(email_addr AS STRING), ""), 
            IFNULL(CAST(addr_numbr AS STRING), ""), 
            IFNULL(CAST(fax_number AS STRING), ""), 
            IFNULL(CAST(tel_number AS STRING), ""), 
            IFNULL(CAST(aedat AS STRING), ""), 
            IFNULL(CAST(_bic_zc_sitel2 AS STRING), ""), 
            IFNULL(CAST(_bic_zc_sitel3 AS STRING), ""), 
            IFNULL(CAST(_bic_zc_gplant AS STRING), ""), 
            IFNULL(CAST(_bic_zc_dcgr01 AS STRING), ""), 
            IFNULL(CAST(_bic_zc_subrg1 AS STRING), ""), 
            IFNULL(CAST(rt_lbldatf AS STRING), ""), 
            IFNULL(CAST(rt_lbldatt AS STRING), ""), 
            IFNULL(CAST(rt_lcldat AS STRING), ""), 
            IFNULL(CAST(rt_lopdat AS STRING), "")
        )) AS hash_id,
        *,
        DATE(proc_date) AS start_date
    FROM temp
),
main_2 AS (
    SELECT *, LAG(hash_id, 1) OVER (PARTITION BY plant ORDER BY proc_date) AS hash_id_prev
    FROM main
),
main_3 AS (
    SELECT * EXCEPT(hash_id_prev)
    FROM main_2
    WHERE hash_id != hash_id_prev OR hash_id_prev IS NULL
),
main_4 AS (
    SELECT *,
        LEAD(DATE(proc_date)) OVER (PARTITION BY plant ORDER BY proc_date ASC) AS end_date
    FROM main_3
)
SELECT * EXCEPT(end_date),
    IFNULL(DATE_ADD(end_date, -1), DATE('2400-01-01')) AS end_date
FROM main_4
""")

In [0]:
spark.sql(f"""
MERGE INTO {catalog_name}.udp_wcm_silver_sap_bw.sap_0plant inf_tbl
USING (
    SELECT plant, hash_id, start_date 
    FROM sap_0plant
    WHERE end_date = '2400-01-01'
) tmp_tbl
ON inf_tbl.plant = tmp_tbl.plant AND inf_tbl.hash_id != tmp_tbl.hash_id AND inf_tbl.end_date = '2400-01-01'
WHEN MATCHED THEN UPDATE SET inf_tbl.end_date = DATE_SUB(tmp_tbl.start_date, 1) """)

In [0]:
spark.sql(f"""
MERGE INTO {catalog_name}.udp_wcm_silver_sap_bw.sap_0plant inf_tbl
USING sap_0plant tmp_tbl
ON inf_tbl.hash_id = tmp_tbl.hash_id AND inf_tbl.end_date = '2400-01-01'
WHEN NOT MATCHED THEN INSERT * """)