In [0]:
dbutils.widgets.text("layer","")
layer = dbutils.widgets.get("layer")

In [0]:
%run ../../../utils/common

In [0]:
dbutils.widgets.text("environment", "", "")
environment = dbutils.widgets.get("environment")

In [0]:
catalog_name = settings[environment]['catalog_name']

In [0]:
proc_date_str = str(proc_date)[:10]

In [0]:
spark.sql(f"""
CREATE TABLE IF NOT EXISTS {catalog_name}.udp_wcm_silver_sap_erp.customers_master
(
  id STRING,
  name STRING,
  type STRING,
  phone_no STRING,
  mobile STRING,
  province_id STRING,
  district_id STRING,
  ward_id STRING,
  street_id STRING,
  house_no STRING,
  address STRING,
  contact_name STRING,
  region STRING,
  channel STRING,
  sales_district STRING,
  search_term1 STRING,
  search_term2 STRING,
  sales_organization STRING,
  distribution_channel STRING,
  division STRING,
  price_list STRING,
  price_group STRING,
  customer_group STRING,
  partner_function STRING,
  customer_partner STRING,
  address_number STRING,
  proc_date TIMESTAMP,
  filename STRING,
  hash_id BIGINT,
  file_creation_ts TIMESTAMP
)
TBLPROPERTIES (
  'DELTA.AUTOOPTIMIZE.OPTIMIZEWRITE' = 'TRUE',
  'DELTA.AUTOOPTIMIZE.AUTOCOMPACT' = 'TRUE'
)
"""
)

In [0]:
create_temp_view_with_clean_columns(
    spark,
    catalog_name,
    'udp_wcm_bronze_sap_erp',
    'sap_customers_master',
    proc_date,
    'temp_sap_customers_master'
)

In [0]:
spark.sql(f"""
CREATE OR REPLACE TEMP VIEW temp_customer_master AS
        SELECT *,
        farm_fingerprint(CONCAT(
            COALESCE(CAST(id AS STRING), ''),
            COALESCE(CAST(name AS STRING), ''),
            COALESCE(CAST(type AS STRING), ''),
            COALESCE(CAST(phone_no AS STRING), ''),
            COALESCE(CAST(mobile AS STRING), ''),
            COALESCE(CAST(province_id AS STRING), ''),
            COALESCE(CAST(district_id AS STRING), ''),
            COALESCE(CAST(ward_id AS STRING), ''),
            COALESCE(CAST(street_id AS STRING), ''),
            COALESCE(CAST(house_no AS STRING), ''),
            COALESCE(CAST(address AS STRING), ''),
            COALESCE(CAST(contact_name AS STRING), ''),
            COALESCE(CAST(region AS STRING), ''),
            COALESCE(CAST(channel AS STRING), ''),
            COALESCE(CAST(sales_district AS STRING), ''),
            COALESCE(CAST(search_term1 AS STRING), ''),
            COALESCE(CAST(search_term2 AS STRING), ''),
            COALESCE(CAST(sales_organization AS STRING), ''),
            COALESCE(CAST(distribution_channel AS STRING), ''),
            COALESCE(CAST(division AS STRING), ''),
            COALESCE(CAST(price_list AS STRING), ''),
            COALESCE(CAST(price_group AS STRING), ''),
            COALESCE(CAST(customer_group AS STRING), ''),
            COALESCE(CAST(partner_function AS STRING), ''),
            COALESCE(CAST(customer_partner AS STRING), ''),
            COALESCE(CAST(address_number AS STRING), '')
    )) AS hash_id 
    FROM (
            SELECT 
                id,
                CAST(name AS STRING) AS name,
                CAST(type AS STRING) AS type,
                CAST(phoneno AS STRING) AS phone_no,
                CAST(mobile AS STRING) AS mobile,
                CAST(provinceid AS STRING) AS province_id,
                CAST(districtid AS STRING) AS district_id,
                CAST(wardid AS STRING) AS ward_id,
                CAST(streetid AS STRING) AS street_id,
                CAST(housen0 AS STRING) AS house_no,
                CAST(address AS STRING) AS address,
                CAST(contactname AS STRING) AS contact_name,
                CAST(region AS STRING) AS region,
                CAST(channel AS STRING) AS channel,
                CAST(salesdistrict AS STRING) AS sales_district,
                CAST(searchterm1 AS STRING) AS search_term1,
                CAST(searchterm2 AS STRING) AS search_term2,
                CAST(salesdatatable.salesdata.salesorganization AS STRING) AS sales_organization,
                CAST(salesdatatable.salesdata.distributionchannel AS STRING) AS distribution_channel,
                CAST(salesdatatable.salesdata.division AS STRING) AS division,
                CAST(salesdatatable.salesdata.pricelist AS STRING) AS price_list,
                CAST(salesdatatable.salesdata.pricegroup AS STRING) AS price_group,
                CAST(salesdatatable.salesdata.customergroup AS STRING) AS customer_group,
                CAST(partnerrolestable.partnerroles.partnerfunction AS STRING) AS partner_function,
                REGEXP_REPLACE(partnerrolestable.partnerroles.customerpartner, '^0+', '') AS customer_partner,
                REGEXP_REPLACE(addressnumber, '^0+', '') AS address_number,
                TO_TIMESTAMP(
                CONCAT(
                SPLIT(REPLACE(SPLIT(filename, '_')[3], '.xml', ''), '-')[0],
                SPLIT(REPLACE(SPLIT(filename, '_')[3], '.xml', ''), '-')[1]
              ),
              'yyyyMMddHHmmss'
            ) AS proc_date,
            filename,
            file_creation_ts
            FROM (
                SELECT  
                    customer_master.*, proc_date,filename,file_creation_ts
                FROM temp_sap_customers_master
            )
            LATERAL VIEW EXPLODE(salesdata) salesdatatable AS salesdata
            LATERAL VIEW EXPLODE(partnerroles) partnerrolestable AS partnerroles 
        )
    """)

In [0]:
spark.sql(f"""
MERGE INTO {catalog_name}.udp_wcm_silver_sap_erp.customers_master AS target
USING temp_customer_master AS source
ON target.hash_id = source.hash_id and target.proc_date = source.proc_date
WHEN NOT MATCHED THEN
    INSERT *
""")