In [0]:
dbutils.widgets.text("proc_date", "")
proc_date = dbutils.widgets.get("proc_date")

In [0]:
%run ../../../utils/common

In [0]:
dbutils.widgets.text("environment", "", "")
environment = dbutils.widgets.get("environment")

In [0]:
catalog_name = settings[environment]['catalog_name']

In [0]:
create_temp_view_with_clean_columns(
    spark,
    catalog_name,
    'udp_wcm_bronze_sap_bw',
    'sap_bw_wcm_vccmmim52',
    proc_date,
    'temp_sap_vccmmim52'
)

In [0]:
spark.sql(f"""
CREATE TABLE IF NOT EXISTS {catalog_name}.udp_wcm_silver_sap_bw.sap_vcdmmim02 
(
   hash_id BIGINT,
   stor_loc STRING,
   comp_code STRING,
   zc_ltstgr DATE,
   material STRING,
   plant STRING,
   stockcat STRING,
   vendor STRING,
   customer STRING,
   dataorigin STRING,
   movetype STRING,
   calday DATE,
   upd_date DATE,
   loc_currcy STRING,
   zk_tp_val DOUBLE,
   zk_ltstgr INT,
   issvs_val DOUBLE,
   recvs_val DOUBLE,
   proc_date DATE,
   file_creation_ts TIMESTAMP
)
TBLPROPERTIES (
  'DELTA.AUTOOPTIMIZE.OPTIMIZEWRITE' = 'TRUE',
  'DELTA.AUTOOPTIMIZE.AUTOCOMPACT' = 'TRUE'
)
""")

In [0]:
spark.sql(f"""
CREATE OR REPLACE TEMP VIEW sap_vcdmmim02 AS
WITH main AS (
SELECT 
reqtsn,
datapakid,
record,
CASE WHEN stor_loc = '' THEN null ELSE stor_loc END AS stor_loc,
CASE WHEN comp_code = '' THEN null ELSE comp_code END AS comp_code,
CASE WHEN _bic_zc_ltstgr = '00000000' THEN null ELSE TO_DATE(_bic_zc_ltstgr,"yyyyMMdd") END AS zc_ltstgr, --
CASE WHEN REGEXP(material,'^[0-9]+$') THEN CAST(CAST(material AS BIGINT) AS STRING) ELSE material END AS material,
CASE WHEN plant = '' THEN null ELSE LTRIM('0',plant) END AS plant,
CASE WHEN stockcat = '' THEN null ELSE stockcat END AS stockcat,
CASE WHEN vendor = '' THEN null ELSE vendor END AS vendor,
CASE WHEN customer = '' THEN null ELSE customer END AS customer,
CASE WHEN dataorigin = '' THEN null ELSE dataorigin END AS dataorigin,
CASE WHEN movetype = '' THEN null ELSE movetype END AS movetype,
CASE WHEN calday = '00000000' THEN null ELSE TO_DATE(calday,"yyyyMMdd") END AS calday,
CASE WHEN upd_date = '00000000' THEN null ELSE TO_DATE(upd_date,"yyyyMMdd") END AS upd_date,
CASE WHEN loc_currcy = '' THEN null ELSE loc_currcy END AS loc_currcy,
SUM(_bic_zk_tp_val * 100) AS zk_tp_val,
SUM(CAST(_bic_zk_ltstgr AS BIGINT)) AS zk_ltstgr,
SUM(issvs_val * 100) AS issvs_val,
SUM(recvs_val * 100) AS recvs_val,
MAX(proc_date) AS proc_date,
MAX(file_creation_ts) as file_creation_ts
FROM (
    SELECT * EXCEPT(rn) FROM (
        SELECT *, ROW_NUMBER() OVER (PARTITION BY reqtsn, datapakid, record ORDER BY proc_date DESC) rn 
        FROM temp_sap_vccmmim52 
    ) a WHERE rn = 1 
) a
GROUP BY
    reqtsn,
    datapakid,
    record,
    stor_loc,
    comp_code,
    _bic_zc_ltstgr,
    material,
    plant,
    stockcat,
    vendor,
    customer,
    dataorigin,
    movetype,
    calday,
    upd_date,
    loc_currcy
)
SELECT 
farm_fingerprint(CONCAT(
    IFNULL(CAST(reqtsn AS STRING), ""),
    IFNULL(CAST(datapakid AS STRING), ""),
    IFNULL(CAST(record AS STRING), ""),
    IFNULL(CAST(stor_loc AS STRING), ""),
    IFNULL(CAST(comp_code AS STRING), ""),
    IFNULL(CAST(zc_ltstgr AS STRING), ""),
    IFNULL(CAST(material AS STRING), ""),
    IFNULL(CAST(plant AS STRING), ""),
    IFNULL(CAST(stockcat AS STRING), ""),
    IFNULL(CAST(vendor AS STRING), ""),
    IFNULL(CAST(customer AS STRING), ""),
    IFNULL(CAST(dataorigin AS STRING), ""),
    IFNULL(CAST(movetype AS STRING), ""),
    IFNULL(CAST(calday AS STRING), ""),
    IFNULL(CAST(upd_date AS STRING), ""),
    IFNULL(CAST(loc_currcy AS STRING), "")
)) AS hash_id,
reqtsn,
datapakid,
record,
stor_loc,
comp_code,
zc_ltstgr,
material,
plant,
stockcat,
vendor,
customer,
dataorigin,
movetype,
calday,
upd_date,
loc_currcy,
zk_tp_val,
zk_ltstgr,
issvs_val,
recvs_val,
proc_date,
file_creation_ts
FROM main
"""
)

In [0]:
calday_query = f"""
SELECT
    CONCAT("'", CONCAT_WS("','", COLLECT_SET(CAST(TO_DATE(calday, 'yyyy-MM-dd') AS STRING))), "'") AS calday_in_str
FROM {catalog_name}.udp_wcm_silver_sap_bw.sap_vcdmmim02
WHERE proc_date = date_sub(TO_DATE('{proc_date}', 'yyyy-MM-dd'), 1)
"""
calday_df = spark.sql(calday_query)
calday_in_str = calday_df.collect()[0][0]

print(calday_in_str)

In [0]:
silver_count = spark.sql(f"SELECT COUNT(*) FROM {catalog_name}.udp_wcm_silver_sap_bw.sap_vcdmmim02").collect()[0][0]
if silver_count == 0:
    spark.sql(f"""
    MERGE INTO {catalog_name}.udp_wcm_silver_sap_bw.sap_vcdmmim02 main_tbl
    USING sap_vcdmmim02 temp_tbl
    ON main_tbl.hash_id = temp_tbl.hash_id 
    WHEN NOT MATCHED THEN INSERT * 
    """).display()
elif len(calday_in_str) > 0 and calday_in_str != "''":
    spark.sql(f"""
    MERGE INTO {catalog_name}.udp_wcm_silver_sap_bw.sap_vcdmmim02 main_tbl
    USING sap_vcdmmim02 temp_tbl
    ON main_tbl.hash_id = temp_tbl.hash_id 
    AND main_tbl.calday IN ({calday_in_str})
    WHEN NOT MATCHED THEN INSERT * 
    """).display()
else:
    print("calday_in_str is empty. Please provide valid date values.")