In [0]:
%run ../utils/common

In [0]:
dbutils.widgets.text("environment", "", "")
dbutils.widgets.text("target_dataset", "VCM_DWH_PRD") #udp_wcm_gold_vcm_dwh
dbutils.widgets.text("target_table", "f_sale_prod")
dbutils.widgets.text("metadata_schema", "udp_wcm_metadata_dev")
dbutils.widgets.text(
    "dependency_table","VCM_STG_INF.SAP_VCDPOSD13, VCM_STG_INF.SAP_VCDPOSD07" #udp_wcm_silver_sap_bw.SAP_VCDPOSD13,udp_wcm_silver_sap_bw.SAP_VCDPOSD07
)

In [0]:
catalog_name = settings[environment]['catalog_name']

In [0]:
dbutils.widgets.text("field_calday", "CALDAY")
dbutils.widgets.text("field_id", "HASH_ID")

In [0]:
environment = dbutils.widgets.get("environment")
target_dataset = dbutils.widgets.get("target_dataset")
target_table = dbutils.widgets.get("target_table")
metadata_schema = dbutils.widgets.get("metadata_schema")
dependency_table = dbutils.widgets.get("dependency_table")
dependency_table = [x.strip().upper() for x in dependency_table.split(",")]
dependency_table = "'" + "','".join(dependency_table) + "'"

print(f"environment: {environment}")
print(f"target_dataset: {target_dataset}")
print(f"target_table: {target_table}")
print(f"catalog_name: {catalog_name}")
print(f"metadata_schema: {metadata_schema}")
print(f"dependency_table: {dependency_table}")

In [0]:
field_calday = dbutils.widgets.get("field_calday")
field_id = dbutils.widgets.get("field_id")

print(f"field_calday: {field_calday}")
print(f"field_id: {field_id}")

In [0]:
%run ../common/common_etl_load

In [0]:
spark.sql(f"""
CREATE TABLE IF NOT EXISTS {catalog_name}.udp_wcm_gold_vcm_dwh.f_sale_prod
(
hash_id BIGINT,
calday DATE,
base_uom STRING,
sales_unit STRING,
transaction_index STRING,
transaction_id STRING,
product_id STRING,
business_unit STRING,
store_id STRING,
promotion STRING,
transaction_no STRING,
transaction_type STRING,
time_of_day BIGINT,
sale_time BIGINT,
credit_card_id STRING,
payment_method STRING,
department STRING,
pos_number STRING,
offer STRING,
discount_type STRING,
discount_type_group STRING,
recordmode STRING,
return_qty1 FLOAT,
return_qty2 FLOAT,
return_value FLOAT,
promotion_qty FLOAT,
base_sale_qty FLOAT,
receipt_count FLOAT,
sale_in_cost_value FLOAT,
promotion_sale FLOAT,
sale_qty FLOAT,
sale_amount FLOAT,
promotion_sale_l FLOAT,
return_sale_l FLOAT,
discount_item_no FLOAT,
discount_value FLOAT,
tax_amount FLOAT,
tender_value FLOAT,
vinid_card_no STRING,
is_sll BOOLEAN,
proc_date DATE
)
TBLPROPERTIES (
  'DELTA.AUTOOPTIMIZE.OPTIMIZEWRITE' = 'TRUE',
  'DELTA.AUTOOPTIMIZE.AUTOCOMPACT' = 'TRUE'
)
""")

In [0]:
if CALDAY_IN_STR != '':
    spark.sql(f"""
    CREATE OR REPLACE TEMP VIEW temp_f_sale_prod
    AS
    WITH vcdposd13_decimal AS (
        SELECT
            CAST((hash_id) AS BIGINT) AS hash_id,
            SUM(ROUND(rpa_rqtyb,3)) AS return_qty1,
            SUM(ROUND(rpa_rqtyv,3)) AS return_qty2,
            SUM(rpa_retsal) AS return_value,
            SUM(ROUND(rpa_prmqty,3)) AS promotion_qty,
            SUM(ROUND(base_qty,3)) AS base_sale_qty,
            SUM(rpa_cnr) AS receipt_count,
            SUM(rpa_cst) AS sale_in_cost_value,
            SUM(rpa_prmsal) AS promotion_sale,
            SUM(ROUND(rpa_rlq,3)) AS sale_qty,
            SUM(rpa_slc) AS sale_amount,
            SUM(rpa_psl) AS promotion_sale_l,
            SUM(rpa_rsl) AS return_sale_l,
            SUM(rpa_cco) AS discount_item_no,
            SUM(rpa_rea) AS discount_value,
            SUM(rpa_tam) AS tax_amount,
            SUM(rpa_tat) AS tender_value
        FROM {catalog_name}.udp_wcm_silver_sap_bw.sap_vcdposd13
        WHERE calday IN ({CALDAY_IN_STR})
        GROUP BY hash_id  -- calday function
    ),

    vcdposd13_char AS (
        SELECT
            ROW_NUMBER() OVER (PARTITION BY hash_id ORDER BY reqtsn DESC, datapakid DESC, record DESC) AS rn,
            CAST((hash_id) AS BIGINT) AS hash_id,
            calday,
            base_uom,
            sales_unit,
            rpa_tix AS transaction_index,
            LPAD(CONCAT(DATE_FORMAT(calday,'yyyyMMdd'), plant, rpa_tix), 50, '0') AS transaction_id,
            material AS product_id,
            segment AS business_unit,
            plant AS store_id,
            rt_promo AS promotion,
            rpa_tnr AS transaction_no,
            rpa_ttc AS transaction_type,
            rt_daytcl AS time_of_day,
            rt_salhour AS sale_time,
            rpa_pca AS credit_card_id,
            rpa_tcd AS payment_method,
            rpa_dep AS department,
            rpa_wid AS pos_number,
            rt_offer AS offer,
            rpa_dtc AS discount_type,
            rpa_dtg AS discount_type_group,
            recordmode
        FROM {catalog_name}.udp_wcm_silver_sap_bw.sap_vcdposd13  
        WHERE calday IN ({CALDAY_IN_STR})-- calday function
    ),

    vcdposd13 AS (
        SELECT a.*, b.* EXCEPT (hash_id) 
        FROM vcdposd13_char a 
        INNER JOIN vcdposd13_decimal b on a.hash_id=b.hash_id 
    ),

    vcdposd07 AS (
        SELECT
            ROW_NUMBER() OVER (PARTITION BY hash_id ORDER BY reqtsn DESC, datapakid DESC, record DESC) AS rn,
            *
        FROM {catalog_name}.udp_wcm_silver_sap_bw.sap_vcdposd07
        WHERE calday IN ({CALDAY_IN_STR})
    ),

    transaction_sll AS (
        SELECT DISTINCT
            calday,
            plant AS store_id,
            rpa_tnr AS transaction_no
        FROM {catalog_name}.udp_wcm_silver_sap_bw.sap_vcdext01
        WHERE _bic_zc_fgroup = 'OGID'
        AND _bic_zc_fname = 'B2B'
        AND _bic_zc_fvalue LIKE 'B2B%'
        AND calday IN ({CALDAY_IN_STR})
    )
    SELECT 
        a.*,
        a1.bic_zc_vinid AS vinid_card_no,
        CAST(IF(b.transaction_no IS NOT NULL, 'True', 'False') AS BOOLEAN) AS is_sll,
        proc_date
    FROM (
        SELECT * EXCEPT (rn) 
        FROM vcdposd13 
        WHERE rn = 1
    ) a
    LEFT JOIN (
        SELECT * EXCEPT (rn) 
        FROM vcdposd07 
        WHERE rn = 1 AND bic_zk_loypnt > 0 AND rpa_slc > 0
    ) a1 
    ON a.calday = a1.calday 
    AND a.store_id = a1.plant 
    AND a.transaction_no = a1.rpa_tnr 
    AND a.pos_number = a1.rpa_wid
    LEFT JOIN transaction_sll b
    ON a.calday = b.calday 
    AND a.store_id = b.store_id
    AND LTRIM('0',a.transaction_no) = LTRIM('0',b.transaction_no);
    """)

In [0]:
if CALDAY_IN_STR != '':
    # Get the calday list string from temp table
    calday_query = """
    SELECT
        CONCAT("'", CONCAT_WS("','", COLLECT_SET(CAST(TO_DATE(calday, 'yyyy-MM-dd') AS STRING))), "'") AS calday_in_str
    FROM temp_f_sale_prod
    """

    calday_df = spark.sql(calday_query)
    calday_in_str = calday_df.collect()[0][0]

    # Only run DELETE if the string is non-empty
    if calday_in_str.strip("'"):  # checks if there's any date inside the quotes
        spark.sql(f"""
        DELETE FROM {catalog_name}.udp_wcm_gold_vcm_dwh.f_sale_prod
        WHERE calday IN ({calday_in_str})
        """)
    else:
        print("No valid CALDAYs found to delete.")
        
else:
    print(f"No valid calday's found In common-etl to create the view: temp_{target_table}")

In [0]:
if CALDAY_IN_STR != "":
    # Insert rows from temp table into main table
    INSERT_ROW = ", ".join(
        spark.table(f"{catalog_name}.udp_wcm_gold_vcm_dwh.f_sale_prod").columns
    )
    spark.sql(
        f"""
        INSERT INTO {catalog_name}.udp_wcm_gold_vcm_dwh.f_sale_prod ({INSERT_ROW})
        SELECT {INSERT_ROW} FROM temp_{target_table}
    """
    ).display()
else:
    print(
        f"temp_{target_table} could't be created as no valid calday's found In common-etl so {catalog_name}.udp_wcm_gold_vcm_dwh.f_sale_prod can't be inserted using calday's from temp_{target_table}"
    )

In [0]:
if CALDAY_IN_STR == '':
    dbutils.notebook.exit("No valid calday's found In common-etl to insert data into Table: ETL_LOG_DEPENDENCIES and ETL_DELTA_TABLE and ")

In [0]:
spark.sql(f"DROP VIEW IF EXISTS temp_f_sale_prod")