In [0]:
%run ../utils/common

In [0]:
dbutils.widgets.text("environment", "DEV")
dbutils.widgets.text("target_dataset", "VCM_DWH_PRD")
dbutils.widgets.text("target_table", "f_sale_prod_kdol_wmp")
dbutils.widgets.text("metadata_schema", "udp_wcm_metadata_dev")
dbutils.widgets.text(
    "dependency_table",
    "VCM_DWH_PRD.F_SALE_PROD, VCM_STG_INF.POS_WCM_SALEOUT_BILL_HEADER, VCM_STG_INF.POS_WCM_SALEOUT_TRANLINE"
)

In [0]:
dbutils.widgets.text("field_calday", "CALDAY")
dbutils.widgets.text("field_id", "HASH_ID")

In [0]:
dbutils.widgets.text("environment", "", "")
environment = dbutils.widgets.get("environment")

In [0]:
catalog_name = settings[environment]['catalog_name']

In [0]:
environment = dbutils.widgets.get("environment")
target_dataset = dbutils.widgets.get("target_dataset")
target_table = dbutils.widgets.get("target_table")
metadata_schema = dbutils.widgets.get("metadata_schema")
dependency_table = dbutils.widgets.get("dependency_table")
dependency_table = [x.strip() for x in dependency_table.split(",")]
dependency_table = "'" + "','".join(dependency_table) + "'"
print(f"environment: {environment}")
print(f"target_dataset: {target_dataset}")
print(f"catalog_name: {catalog_name}")
print(f"metadata_schema: {metadata_schema}")
print(f"dependency_table: {dependency_table}")

In [0]:
field_calday = dbutils.widgets.get("field_calday")
field_id = dbutils.widgets.get("field_id")

print(f"field_calday: {field_calday}")
print(f"field_id: {field_id}")

In [0]:
%run "../common/common_etl_load"

In [0]:
spark.sql(f"""
CREATE TABLE IF NOT EXISTS {catalog_name}.udp_wcm_gold_vcm_dwh.f_sale_prod_kdol_wmp
(
  hash_id BIGINT,
  calday DATE,
  base_uom STRING,
  sales_unit STRING,
  transaction_index STRING,
  transaction_id STRING,
  product_id STRING,
  business_unit STRING,
  store_id STRING,
  promotion STRING,
  transaction_no STRING,
  transaction_type STRING,
  time_of_day INT,
  sale_time INT,
  credit_card_id STRING,
  payment_method STRING,
  department STRING,
  pos_number STRING,
  offer STRING,
  discount_type STRING,
  discount_type_group STRING,
  recordmode STRING,
  return_qty1 FLOAT,
  return_qty2 FLOAT,
  return_value FLOAT,
  promotion_qty FLOAT,
  base_sale_qty FLOAT,
  receipt_count FLOAT,
  sale_in_cost_value FLOAT,
  promotion_sale FLOAT,
  sale_qty FLOAT,
  sale_amount FLOAT,
  promotion_sale_l FLOAT,
  return_sale_l FLOAT,
  discount_item_no FLOAT,
  discount_value FLOAT,
  tax_amount FLOAT,
  tender_value FLOAT,
  vinid_card_no STRING,
  brand STRING,
  vinid_csn STRING,
  original_transaction_id STRING,
  channel STRING,
  cx_transaction_id STRING
)
TBLPROPERTIES (
  'DELTA.AUTOOPTIMIZE.OPTIMIZEWRITE' = 'TRUE',
  'DELTA.AUTOOPTIMIZE.AUTOCOMPACT' = 'TRUE'
) 
""")

In [0]:
if CALDAY_IN_STR != '':
    spark.sql(f"""
    CREATE OR REPLACE TEMP VIEW temp_f_sale_prod_kdol_wmp AS
    WITH
    order_online AS (
        SELECT
            receipt_no,
            ARRAY_AGG(DISTINCT brand) brand
        FROM {catalog_name}.udp_wcm_silver_realtime.pos_wcm_saleout_tranline 
        GROUP BY 1
    ),
    f_sale_prod AS (
    SELECT * EXCEPT (is_sll) 
    FROM {catalog_name}.udp_wcm_gold_vcm_dwh.f_sale_prod
    WHERE calday in ({CALDAY_IN_STR})
    )

    SELECT
        c.*,
        ARRAY_JOIN(a.brand, ',') brand,
        b.vinid_csn,
        b.header_ref_01 original_transaction_id,
        b.header_ref_02 channel,
        b.header_ref_03 cx_transaction_id
    FROM
        order_online a
        LEFT JOIN {catalog_name}.udp_wcm_silver_realtime.pos_wcm_saleout_bill_header b ON a.receipt_no = b.receipt_no
        LEFT JOIN f_sale_prod c ON a.receipt_no = LTRIM('0',c.transaction_no)
    WHERE
        c.calday IN ({CALDAY_IN_STR}) AND
        (b.header_ref_02 IN ('Hotline','Shopee','Zalo','Winmart-Website','FaceBook') OR array_contains(a.brand, 'VMP'))
    """)

In [0]:
if CALDAY_IN_STR != '':
    # Get the calday list string from temp table
    calday_query = """
    SELECT
        CONCAT("'", CONCAT_WS("','", COLLECT_SET(CAST(TO_DATE(calday, 'yyyy-MM-dd') AS STRING))), "'") AS calday_in_str
    FROM temp_f_sale_prod_kdol_wmp
    """
    
    calday_in_str_new = spark.sql(calday_query).collect()[0][0]
    
    if calday_in_str_new != '':
        calday_in_str = calday_in_str_new
    
        # Only run DELETE if the string is non-empty
        if calday_in_str.strip("'"):  # checks if there's any date inside the quotes
            spark.sql(f"""
            DELETE FROM {catalog_name}.udp_wcm_gold_vcm_dwh.f_sale_prod_kdol_wmp
            WHERE calday IN ({calday_in_str})
            """)
            # Insert rows from temp table into main table
            INSERT_ROW = ", ".join(
                spark.table(f"{catalog_name}.udp_wcm_gold_vcm_dwh.f_sale_prod_kdol_wmp").columns
            )
            spark.sql(f"""
                INSERT INTO {catalog_name}.udp_wcm_gold_vcm_dwh.f_sale_prod_kdol_wmp ({INSERT_ROW})
                SELECT {INSERT_ROW} FROM temp_{target_table}
            """).display()
        else:
            print("No valid CALDAYs found to delete.")
            print(f"temp_{target_table} could't be created as no valid calday's found In common-etl so {catalog_name}.udp_wcm_gold_vcm_dwh.f_sale_prod_kdol_wmp can't be inserted using calday's from temp_{target_table}")