In [0]:
dbutils.widgets.text("proc_date", "")
proc_date = dbutils.widgets.get("proc_date")

In [0]:
%run ../../../utils/common

In [0]:
dbutils.widgets.text("environment", "", "")
environment = dbutils.widgets.get("environment")

In [0]:
catalog_name = settings[environment]['catalog_name']

In [0]:
proc_date_str = str(proc_date)[:10]


In [0]:
spark.sql(f"""
CREATE TABLE IF NOT EXISTS {catalog_name}.udp_wcm_silver_sap_car.master_bom
(
   sapi_doc_no BIGINT,
   material_no BIGINT,
   bom_usage BIGINT,
   base_uom STRING,
   base_quantity FLOAT,
   bom_component FLOAT,
   component_quantity FLOAT,
   component_uom STRING,
   proc_date TIMESTAMP,
   alt_item_grp STRING,
   priority FLOAT,
   usage BIGINT,
   filename STRING,
   hash_id BIGINT,
   file_creation_ts TIMESTAMP
)
TBLPROPERTIES (
  'DELTA.AUTOOPTIMIZE.OPTIMIZEWRITE' = 'TRUE',
  'DELTA.AUTOOPTIMIZE.AUTOCOMPACT' = 'TRUE'
)
""")

In [0]:
spark.sql(f"""
CREATE OR REPLACE TEMP VIEW temp_master_bom AS
SELECT 
    *,
    farm_fingerprint(CONCAT(
        COALESCE(CAST(sapi_doc_no AS STRING), ''),
        COALESCE(CAST(material_no AS STRING), ''),
        COALESCE(CAST(bom_usage AS STRING), ''),
        COALESCE(CAST(base_uom AS STRING), ''),
        IFNULL(CASE WHEN CAST(base_quantity AS DOUBLE) = CAST(CAST(base_quantity AS DOUBLE) AS BIGINT) THEN CAST(CAST(base_quantity AS BIGINT) AS STRING) ELSE TRIM(TRAILING '0' FROM(CAST(base_quantity AS DECIMAL(38,10)))) END, ""),
        IFNULL(CASE WHEN CAST(bom_component AS DOUBLE) = CAST(CAST(bom_component AS DOUBLE) AS BIGINT) THEN CAST(CAST(bom_component AS BIGINT) AS STRING) ELSE TRIM(TRAILING '0' FROM(CAST(bom_component AS DECIMAL(38,10)))) END, ""),
        IFNULL(CASE WHEN CAST(component_quantity AS DOUBLE) = CAST(CAST(component_quantity AS DOUBLE) AS BIGINT) THEN CAST(CAST(component_quantity AS BIGINT) AS STRING) ELSE TRIM(TRAILING '0' FROM(CAST(component_quantity AS DECIMAL(38,10)))) END, ""),
        COALESCE(CAST(component_uom AS STRING), ''),
        COALESCE(CAST(alt_item_grp AS STRING), ''),
        IFNULL(CASE WHEN CAST(priority AS DOUBLE) = CAST(CAST(priority AS DOUBLE) AS BIGINT) THEN CAST(CAST(priority AS BIGINT) AS STRING) ELSE TRIM(TRAILING '0' FROM(CAST(priority AS DECIMAL(38,10)))) END, ""),
        COALESCE(CAST(usage AS STRING), '')
    )) AS hash_id
FROM (
    SELECT
        sapidocno AS sapi_doc_no,
        materialno AS material_no,
        bomusage AS bom_usage,
        baseuom AS base_uom,
        CAST(basequantity AS DOUBLE) AS base_quantity,
        CAST(bom_item.bomcomponent AS DOUBLE) AS bom_component,
        CAST(bom_item.componentquantity AS DOUBLE) AS component_quantity,
        bom_item.componentuom AS component_uom,
        FROM_UTC_TIMESTAMP(
            TO_TIMESTAMP(
              CONCAT(
                SPLIT(REPLACE(SPLIT(filename, '_')[4], '.xml', ''), '-')[0],
                SPLIT(REPLACE(SPLIT(filename, '_')[4], '.xml', ''), '-')[1]
              ),
              'yyyyMMddHHmmss'
            ),
            'Asia/Bangkok'
        ) AS proc_date,
        bom_item.altitemgrp AS alt_item_grp,
        CAST(bom_item.priority AS DOUBLE) AS priority,
        TRY_CAST(bom_item.usage AS BIGINT) AS usage,
        filename,
        file_creation_ts
    FROM (
        SELECT  
            line_item.*, proc_date, filename,file_creation_ts
        FROM {catalog_name}.udp_wcm_bronze_sap_car.master_bom
        WHERE DATE(proc_date) = '{proc_date_str}'
    ) line_item
    -- LATERAL VIEW EXPLODE(BomItem) AS bom_item
    LATERAL VIEW OUTER EXPLODE(bomitem) AS bom_item
) subquery
""")

In [0]:
spark.sql(f"""
CREATE OR REPLACE TEMP VIEW temp_master_bom AS
SELECT 
    *,
    farm_fingerprint(CONCAT(
        COALESCE(CAST(sapi_doc_no AS STRING), ''),
        COALESCE(CAST(material_no AS STRING), ''),
        COALESCE(CAST(bom_usage AS STRING), ''),
        COALESCE(CAST(base_uom AS STRING), ''),
        IFNULL(CASE WHEN CAST(base_quantity AS DOUBLE) = CAST(CAST(base_quantity AS DOUBLE) AS BIGINT) THEN CAST(CAST(base_quantity AS BIGINT) AS STRING) ELSE TRIM(TRAILING '0' FROM(CAST(base_quantity AS DECIMAL(38,10)))) END, ""),
        IFNULL(CASE WHEN CAST(bom_component AS DOUBLE) = CAST(CAST(bom_component AS DOUBLE) AS BIGINT) THEN CAST(CAST(bom_component AS BIGINT) AS STRING) ELSE TRIM(TRAILING '0' FROM(CAST(bom_component AS DECIMAL(38,10)))) END, ""),
        IFNULL(CASE WHEN CAST(component_quantity AS DOUBLE) = CAST(CAST(component_quantity AS DOUBLE) AS BIGINT) THEN CAST(CAST(component_quantity AS BIGINT) AS STRING) ELSE TRIM(TRAILING '0' FROM(CAST(component_quantity AS DECIMAL(38,10)))) END, ""),
        COALESCE(CAST(component_uom AS STRING), ''),
        COALESCE(CAST(alt_item_grp AS STRING), ''),
        IFNULL(CASE WHEN CAST(priority AS DOUBLE) = CAST(CAST(priority AS DOUBLE) AS BIGINT) THEN CAST(CAST(priority AS BIGINT) AS STRING) ELSE TRIM(TRAILING '0' FROM(CAST(priority AS DECIMAL(38,10)))) END, ""),
        COALESCE(CAST(usage AS STRING), '')
    )) AS hash_id
FROM (
    SELECT
        sapidocno AS sapi_doc_no,
        materialno AS material_no,
        bomusage AS bom_usage,
        baseuom AS base_uom,
        CAST(basequantity AS DOUBLE) AS base_quantity,
        CAST(bom_item.bomcomponent AS DOUBLE) AS bom_component,
        CAST(bom_item.componentquantity AS DOUBLE) AS component_quantity,
        bom_item.componentuom AS component_uom,
        FROM_UTC_TIMESTAMP(
            TO_TIMESTAMP(
              CONCAT(
                SPLIT(REPLACE(SPLIT(filename, '_')[4], '.xml', ''), '-')[0],
                SPLIT(REPLACE(SPLIT(filename, '_')[4], '.xml', ''), '-')[1]
              ),
              'yyyyMMddHHmmss'
            ),
            'Asia/Bangkok'
        ) AS proc_date,
        -- bom_item.AltItemGrp AS alt_item_grp,
        -- CASE WHEN TRIM(bom_item.AltItemGrp) = '' THEN NULL ELSE bom_item.AltItemGrp END AS alt_item_grp,
        NULLIF(TRIM(bom_item.altitemgrp), '') AS alt_item_grp,
        CAST(bom_item.priority AS DOUBLE) AS priority,
        TRY_CAST(bom_item.usage AS BIGINT) AS usage,
        filename,
        file_creation_ts
    FROM (
        SELECT  
            line_item.*, proc_date, filename,file_creation_ts
        FROM {catalog_name}.udp_wcm_bronze_sap_car.master_bom
        WHERE DATE(proc_date) = '{proc_date_str}'
    ) line_item
    -- LATERAL VIEW EXPLODE(BomItem) AS bom_item
    LATERAL VIEW OUTER EXPLODE(bomitem) AS bom_item
) subquery
""")

In [0]:
spark.sql(f"""
MERGE INTO {catalog_name}.udp_wcm_silver_sap_car.master_bom AS target
USING temp_master_bom AS source
ON target.hash_id = source.hash_id and target.proc_date = source.proc_date
WHEN NOT MATCHED THEN
    INSERT *
""")