In [0]:
dbutils.widgets.text("proc_date", "")
proc_date = dbutils.widgets.get("proc_date")

In [0]:
%run ../../../utils/common

In [0]:
dbutils.widgets.text("environment", "")
environment = dbutils.widgets.get("environment")

In [0]:
catalog_name = settings[environment]['catalog_name']

In [0]:
create_temp_view_with_clean_columns(
    spark,
    catalog_name,
    'udp_wcm_bronze_sap_bw',
    'sap_bw_wcm_pcustomer',
    proc_date,
    'temp_sap_pcustomer'
)

In [0]:
spark.sql(f"""
CREATE TABLE IF NOT EXISTS {catalog_name}.udp_wcm_silver_sap_bw.sap_pcustomer
( hash_id BIGINT,
  customer STRING,
  objvers STRING,
  changed STRING,
  accnt_grp STRING,
  addr_numbr STRING,
  af_custdc STRING,
  af_custid STRING,
  altitude FLOAT,
  apo_locno STRING,
  bpartner STRING,
  city STRING,
  city_2 STRING,
  country STRING,
  cust_class STRING,
  cust_mkt STRING,
  cus_f_cons STRING,
  db87_sic1 STRING,
  dbansales STRING,
  dbareacod STRING,
  dbctrycod STRING,
  dbctynam STRING,
  dbcustat STRING,
  dbdistrcod STRING,
  dbduns_num STRING,
  dbempbnd STRING,
  dbemptot STRING,
  dbguduns STRING,
  dbhqduns STRING,
  dblglstat STRING,
  dblocacod STRING,
  dbregcod STRING,
  dbsalbnd STRING,
  dbsectcod STRING,
  dbyrsbnd STRING,
  dbyrstd STRING,
  db_maktcod STRING,
  db_matcds STRING,
  fax_num STRING,
  fiscvarnt STRING,
  id_txnumb3 STRING,
  id_xcpd STRING,
  industry STRING,
  ind_code_1 STRING,
  ind_code_2 STRING,
  ind_code_3 STRING,
  ind_code_4 STRING,
  ind_code_5 STRING,
  keyaccount STRING,
  langu STRING,
  latitude FLOAT,
  logsys STRING,
  longitude FLOAT,
  name STRING,
  name2 STRING,
  name3 STRING,
  nielsen_id STRING,
  outl_type STRING,
  pcompany STRING,
  phone STRING,
  plant STRING,
  pobox STRING,
  pobox_loc STRING,
  postal_cd STRING,
  postcd_box STRING,
  postcd_gis STRING,
  precisid STRING,
  region STRING,
  sortl STRING,
  srcid STRING,
  street STRING,
  tax_numb STRING,
  tax_numb2 STRING,
  usage_ind STRING,
  vendor STRING,
  visit_ryt STRING,
  dbtrdstyl STRING,
  dbaddres1 STRING,
  dbaddres2 STRING,
  dbctryna STRING,
  dbstprcod STRING,
  dbpostcod STRING,
  dbnatid STRING,
  dbnatidco STRING,
  dbtelphon STRING,
  dbceoname STRING,
  dblnofbus STRING,
  dbanslus STRING,
  dbcurncco STRING,
  dbemphere STRING,
  dbsubcode STRING,
  dbhqname STRING,
  dbhqctynm STRING,
  dbhqstpr STRING,
  dbhqctryn STRING,
  dbulduns STRING,
  dbulname STRING,
  dbulctynm STRING,
  dbguctryn STRING,
  dbguctryc STRING,
  dbguname STRING,
  dbnumfam STRING,
  dbhierco STRING,
  dbfamupdt STRING,
  dbbusname STRING,
  db_match STRING,
  vat_reg_no STRING,
  proc_date DATE,
  file_creation_ts TIMESTAMP
)
TBLPROPERTIES (
  'DELTA.AUTOOPTIMIZE.OPTIMIZEWRITE' = 'TRUE',
  'DELTA.AUTOOPTIMIZE.AUTOCOMPACT' = 'TRUE'
)
 
""")

In [0]:
spark.sql(f"""
    CREATE OR REPLACE TEMP VIEW sap_pcustomer AS
    WITH dat AS (
        SELECT 
            farm_fingerprint(CONCAT(
                COALESCE(CAST(customer AS STRING), ''), 
                COALESCE(CAST(objvers AS STRING), '')
            )) AS hash_id,
            ROW_NUMBER() OVER (PARTITION BY customer, objvers ORDER BY proc_date DESC) AS rn,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(customer) AS customer,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(objvers) AS objvers,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(changed) AS changed,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(accnt_grp) AS accnt_grp,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(addr_numbr) AS addr_numbr,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(af_custdc) AS af_custdc,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(af_custid) AS af_custid,
            altitude AS altitude,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(apo_locno) AS apo_locno,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(bpartner) AS bpartner,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(city) AS city,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(city_2) AS city_2,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(country) AS country,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(cust_class) AS cust_class,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(cust_mkt) AS cust_mkt,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(cus_f_cons) AS cus_f_cons,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(db87_sic1) AS db87_sic1,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(dbansales) AS dbansales,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(dbareacod) AS dbareacod,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(dbctrycod) AS dbctrycod,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(dbctynam) AS dbctynam,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(dbcustat) AS dbcustat,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(dbdistrcod) AS dbdistrcod,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(dbduns_num) AS dbduns_num,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(dbempbnd) AS dbempbnd,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(dbemptot) AS dbemptot,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(dbguduns) AS dbguduns,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(dbhqduns) AS dbhqduns,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(dblglstat) AS dblglstat,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(dblocacod) AS dblocacod,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(dbregcod) AS dbregcod,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(dbsalbnd) AS dbsalbnd,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(dbsectcod) AS dbsectcod,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(dbyrsbnd) AS dbyrsbnd,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(dbyrstd) AS dbyrstd,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(db_maktcod) AS db_maktcod,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(db_matcds) AS db_matcds,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(fax_num) AS fax_num,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(fiscvarnt) AS fiscvarnt,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(id_txnumb3) AS id_txnumb3,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(id_xcpd) AS id_xcpd,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(industry) AS industry,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(ind_code_1) AS ind_code_1,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(ind_code_2) AS ind_code_2,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(ind_code_3) AS ind_code_3,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(ind_code_4) AS ind_code_4,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(ind_code_5) AS ind_code_5,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(keyaccount) AS keyaccount,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(langu) AS langu,
            latitude AS latitude,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(logsys) AS logsys,
            longitude AS longitude,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(name) AS name,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(name2) AS name2,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(name3) AS name3,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(nielsen_id) AS nielsen_id,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(outl_type) AS outl_type,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(pcompany) AS pcompany,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(phone) AS phone,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(plant) AS plant,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(pobox) AS pobox,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(pobox_loc) AS pobox_loc,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(postal_cd) AS postal_cd,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(postcd_box) AS postcd_box,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(postcd_gis) AS postcd_gis,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(precisid) AS precisid,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(region) AS region,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(sortl) AS sortl,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(srcid) AS srcid,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(street) AS street,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(tax_numb) AS tax_numb,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(tax_numb2) AS tax_numb2,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(usage_ind) AS usage_ind,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(vendor) AS vendor,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(visit_ryt) AS visit_ryt,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(dbtrdstyl) AS dbtrdstyl,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(dbaddres1) AS dbaddres1,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(dbaddres2) AS dbaddres2,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(dbctryna) AS dbctryna,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(dbstprcod) AS dbstprcod,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(dbpostcod) AS dbpostcod,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(dbnatid) AS dbnatid,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(dbnatidco) AS dbnatidco,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(dbtelphon) AS dbtelphon,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(dbceoname) AS dbceoname,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(dblnofbus) AS dblnofbus,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(dbanslus) AS dbanslus,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(dbcurncco) AS dbcurncco,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(dbemphere) AS dbemphere,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(dbsubcode) AS dbsubcode,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(dbhqname) AS dbhqname,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(dbhqctynm) AS dbhqctynm,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(dbhqstpr) AS dbhqstpr,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(dbhqctryn) AS dbhqctryn,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(dbulduns) AS dbulduns,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(dbulname) AS dbulname,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(dbulctynm) AS dbulctynm,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(dbguctryn) AS dbguctryn,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(dbguctryc) AS dbguctryc,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(dbguname) AS dbguname,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(dbnumfam) AS dbnumfam,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(dbhierco) AS dbhierco,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(dbfamupdt) AS dbfamupdt,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(dbbusname) AS dbbusname,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(db_match) AS db_match,
            {catalog_name}.default.fn_CLEAN_SAP_STRING(vat_reg_no) AS vat_reg_no,
            proc_date,
            file_creation_ts
        FROM temp_sap_pcustomer
    )
    SELECT
    * EXCEPT(RN) FROM dat WHERE rn = 1
""")

In [0]:
spark.sql(f"""
MERGE INTO {catalog_name}.udp_wcm_silver_sap_bw.sap_pcustomer main_tbl
USING sap_pcustomer temp_tbl 
ON main_tbl.hash_id = temp_tbl.hash_id
WHEN NOT MATCHED THEN INSERT *
""")