In [0]:
dbutils.widgets.text("proc_date", "")
proc_date = dbutils.widgets.get("proc_date")

In [0]:
%run ../../../utils/common

In [0]:
dbutils.widgets.text("environment", "", "")
environment = dbutils.widgets.get("environment")

In [0]:
catalog_name = settings[environment]['catalog_name']

In [0]:
proc_date_str = str(proc_date)[:10]

In [0]:
spark.sql(f"""
CREATE TABLE IF NOT EXISTS {catalog_name}.udp_wcm_silver_sap_erp.purchasing_price
(
  hash_value BIGINT,
  hash_id BIGINT,
  warehouse_code STRING,
  merchant_sku STRING,
  sell_price Double,
  price_from DATE,
  price_to DATE,
  delete_indicator STRING,
  old_merchant_sku STRING,
  barcode STRING,
  condition_type STRING,
  promo_md_ind STRING,
  purchasing_group STRING,
  supply_region STRING,
  cond_table STRING,
  vendor STRING,
  vendor_subrange STRING,
  site_region STRING,
  info_type STRING,
  purchasing_org STRING,
  purchasing_unit STRING,
  customer STRING,
  customer_group STRING,
  promotion STRING,
  proc_date TIMESTAMP,
  start_date DATE,
  end_date DATE,
  file_creation_ts TIMESTAMP
)
TBLPROPERTIES (
  'DELTA.AUTOOPTIMIZE.OPTIMIZEWRITE' = 'TRUE',
  'DELTA.AUTOOPTIMIZE.AUTOCOMPACT' = 'TRUE'
)
""")

In [0]:
spark.sql(f"""
          CREATE OR REPLACE TEMP VIEW purchasing_price AS
    WITH temp AS (
        SELECT
            CAST(selling_price_tbl.warehousecode AS STRING) AS warehouse_code,
            LTRIM('0', CAST(selling_price_tbl.merchantsku AS STRING)) AS merchant_sku,
            CAST(selling_price_tbl.sellprice AS DOUBLE) AS sell_price,
            CASE
                WHEN selling_price_tbl.pricefrom IN ('00000000', '') THEN NULL
                ELSE TO_DATE(selling_price_tbl.pricefrom, 'yyyyMMdd')
            END AS price_from,
            CASE
                WHEN selling_price_tbl.priceto IN ('00000000', '') THEN NULL
                ELSE TO_DATE(selling_price_tbl.priceto, 'yyyyMMdd')
            END AS price_to,
            UPPER(NULLIF(selling_price_tbl.delete_indicator, '')) AS delete_indicator,
            CAST(selling_price_tbl.oldmerchantsku AS STRING) AS old_merchant_sku,
            NULLIF(selling_price_tbl.barcode, '') AS barcode,
            NULLIF(selling_price_tbl.conditiontype, '') AS condition_type,
            NULLIF(selling_price_tbl.promo_md_ind, '') AS promo_md_ind,
            NULLIF(selling_price_tbl.purchasinggroup, '') AS purchasing_group,
            NULLIF(selling_price_tbl.supplyregion, '') AS supply_region,
            LTRIM('0', CAST(selling_price_tbl.condtable AS STRING)) AS cond_table,
            NULLIF(selling_price_tbl.vendor, '') AS vendor,
            NULLIF(selling_price_tbl.vendorsubrange, '') AS vendor_subrange,
            NULLIF(selling_price_tbl.siteregion, '') AS site_region,
            CAST(selling_price_tbl.infotype AS STRING) AS info_type,
            CAST(selling_price_tbl.purchasingorg AS STRING) AS purchasing_org,
            UPPER(NULLIF(selling_price_tbl.purchaseunit, '')) AS purchasing_unit,
            NULLIF(selling_price_tbl.customer, '') AS customer,
            NULLIF(selling_price_tbl.customergroup, '') AS customer_group,
            LTRIM('0', CAST(selling_price_tbl.promotion AS STRING)) AS promotion,
            FROM_UTC_TIMESTAMP(
            TO_TIMESTAMP(
              CONCAT(
                SPLIT(REPLACE(SPLIT(filename, '_')[4], '.xml', ''), '-')[0],
                SPLIT(REPLACE(SPLIT(filename, '_')[4], '.xml', ''), '-')[1]
              ),
              'yyyyMMddHHmmss'
            ),
            'Asia/Bangkok'
            ) AS proc_date,
            filename,
            file_creation_ts
        FROM {catalog_name}.udp_wcm_bronze_sap_erp.purchasing_price 
        LATERAL VIEW EXPLODE(selling_price) AS selling_price_tbl
        WHERE DATE(proc_date) = '{proc_date_str}'
    ),
    deduped AS (
        SELECT * FROM (
            SELECT *, 
                   ROW_NUMBER() OVER (
                       PARTITION BY warehouse_code, merchant_sku, old_merchant_sku, barcode, condition_type,
                                    promo_md_ind, purchasing_group, supply_region, cond_table, vendor,
                                    vendor_subrange, site_region, info_type, purchasing_org, purchasing_unit,
                                    customer, customer_group, promotion, DATE(proc_date), price_from, price_to,filename,file_creation_ts
                       ORDER BY proc_date DESC
                   ) AS rn
            FROM temp
        ) t
        WHERE rn = 1
    ),
    main AS (
        SELECT
            farm_fingerprint(
                CONCAT(
                    COALESCE(warehouse_code, ''),
                    COALESCE(merchant_sku, ''),
                    COALESCE(old_merchant_sku, ''),
                    COALESCE(barcode, ''),
                    COALESCE(condition_type, ''),
                    COALESCE(promo_md_ind, ''),
                    COALESCE(purchasing_group, ''),
                    COALESCE(supply_region, ''),
                    COALESCE(cond_table, ''),
                    COALESCE(vendor, ''),
                    COALESCE(vendor_subrange, ''),
                    COALESCE(site_region, ''),
                    COALESCE(info_type, ''),
                    COALESCE(purchasing_org, ''),
                    COALESCE(purchasing_unit, ''),
                    COALESCE(customer, ''),
                    COALESCE(customer_group, ''),
                    COALESCE(promotion, '')
                )
            ) AS hash_value,
            farm_fingerprint(
                CONCAT(
                    COALESCE(warehouse_code, ''),
                    COALESCE(merchant_sku, ''),
                    COALESCE(CAST(sell_price AS STRING), ''),
                    COALESCE(CAST(price_from AS STRING), ''),
                    COALESCE(CAST(price_to AS STRING), ''),
                    COALESCE(delete_indicator, ''),
                    COALESCE(old_merchant_sku, ''),
                    COALESCE(barcode, ''),
                    COALESCE(condition_type, ''),
                    COALESCE(promo_md_ind, ''),
                    COALESCE(purchasing_group, ''),
                    COALESCE(supply_region, ''),
                    COALESCE(cond_table, ''),
                    COALESCE(vendor, ''),
                    COALESCE(vendor_subrange, ''),
                    COALESCE(site_region, ''),
                    COALESCE(info_type, ''),
                    COALESCE(purchasing_org, ''),
                    COALESCE(purchasing_unit, ''),
                    COALESCE(customer, ''),
                    COALESCE(customer_group, ''),
                    COALESCE(promotion, '')
                )
            ) AS hash_id,
            *,
            DATE(proc_date) AS start_date
        FROM deduped
    ),
    main_2 AS (
        SELECT *,
               LAG(hash_id, 1) OVER (PARTITION BY hash_value ORDER BY proc_date) AS hash_id_prev
        FROM main
    ),
    main_3 AS (
        SELECT * 
        FROM main_2 
        WHERE hash_id != hash_id_prev OR hash_id_prev IS NULL
    ),
    main_4 AS (
        SELECT *,
               LEAD(DATE(proc_date), 1) OVER (PARTITION BY hash_value ORDER BY proc_date ASC) AS end_date
        FROM main_3
    )
    SELECT 
        * EXCEPT(end_date),
        COALESCE(DATE_SUB(end_date, 1), DATE('2400-01-01')) AS end_date
    FROM main_4
""")

In [0]:
spark.sql(f"""
MERGE INTO {catalog_name}.udp_wcm_silver_sap_erp.purchasing_price inf_tbl
    USING (
        SELECT hash_value, hash_id, start_date 
        FROM purchasing_price
        WHERE end_date = DATE '2400-01-01' 
    ) tmp_tbl
    ON inf_tbl.hash_value = tmp_tbl.hash_value 
        AND inf_tbl.hash_id != tmp_tbl.hash_id 
        AND inf_tbl.end_date = DATE '2400-01-01'
    WHEN MATCHED THEN UPDATE SET inf_tbl.end_date = DATE_SUB(tmp_tbl.start_date, 1)
    """)

In [0]:
spark.sql(f"""
MERGE INTO {catalog_name}.udp_wcm_silver_sap_erp.purchasing_price inf_tbl
    USING purchasing_price tmp_tbl
    ON inf_tbl.hash_id = tmp_tbl.hash_id 
        AND inf_tbl.proc_date = tmp_tbl.proc_date
    WHEN NOT MATCHED THEN INSERT * """)