In [0]:
%run ../utils/common

In [0]:
dbutils.widgets.text("environment", "", "")
dbutils.widgets.text("target_dataset", "VCM_DWH_PRD") 
dbutils.widgets.text("target_table", "f_po_item")
dbutils.widgets.text("metadata_schema", "udp_wcm_metadata_dev")
dbutils.widgets.text(
    "dependency_table","VCM_DWH_PRD.F_ORDER_REPORT, VCM_DWH_PRD.F_PO_DOCTYPE, VCM_DWH_PRD.F_MOVEMENT_PO, VCM_DWH_PRD.F_DISPATCH_PO"
)

In [0]:
dbutils.widgets.text("field_calday", "CALDAY")
dbutils.widgets.text("field_id", "HASH_ID")

In [0]:
catalog_name = settings[environment]['catalog_name']

In [0]:
environment = dbutils.widgets.get("environment")
target_dataset = dbutils.widgets.get("target_dataset")
target_table = dbutils.widgets.get("target_table")
metadata_schema = dbutils.widgets.get("metadata_schema")
dependency_table = dbutils.widgets.get("dependency_table")
dependency_table = [x.strip().upper() for x in dependency_table.split(",")]
dependency_table = "'" + "','".join(dependency_table) + "'"

print(f"environment: {environment}")
print(f"target_dataset: {target_dataset}")
print(f"target_table: {target_table}")
print(f"catalog_name: {catalog_name}")
print(f"metadata_schema: {metadata_schema}")
print(f"dependency_table: {dependency_table}")

In [0]:
field_calday = dbutils.widgets.get("field_calday")
field_id = dbutils.widgets.get("field_id")

print(f"field_calday: {field_calday}")
print(f"field_id: {field_id}")

In [0]:
%run "../common/common_etl_load"

In [0]:
spark.sql(f"""
CREATE TABLE IF NOT EXISTS {catalog_name}.udp_wcm_gold_vcm_dwh.f_po_item (
  po_id STRING,
  po_doc STRING,
  po_item BIGINT,
  product_id STRING,
  product_name STRING,
  doctype STRING,
  store_id STRING,
  dc_site STRING,
  dc_id STRING,
  bu_id STRING,
  region STRING,
  city STRING,
  brand STRING,
  manufacturer STRING,
  vendor STRING,
  merchant_name STRING,
  mch3_name STRING,
  po_type_id STRING,
  po_type STRING,
  po_status STRING,
  po_unit STRING,
  calday DATE,
  creation_date DATE,
  pur_reason STRING,
  purch_org STRING,
  pur_group STRING,
  leadtime INT,
  compl_del STRING,
  base_unit STRING,
  promotion_code STRING,
  po_qty DOUBLE,
  po_value FLOAT,
  po_gr_date DATE,
  gr_date DATE,
  po_gr_qty DOUBLE,
  gr_qty DOUBLE,
  po_gr_value FLOAT,
  gr_value DOUBLE,
  po_gr_re_ind STRING,
  po_gi_qty DOUBLE,
  gi_value DOUBLE,
  gi_from_date DATE,
  gi_to_date DATE,
  order_date_request DATE,
  delivery_confirmation_date DATE,
  webportal_status STRING,
  delivery_method STRING,
  sub_range_id STRING,
  cluster STRING
)
USING delta
TBLPROPERTIES (
    'DELTA.AUTOOPTIMIZE.OPTIMIZEWRITE' = 'TRUE',
    'DELTA.AUTOOPTIMIZE.AUTOCOMPACT' = 'TRUE'
  ) """)

In [0]:
if CALDAY_IN_STR != '':
    spark.sql(f"""
    CREATE OR REPLACE TEMP VIEW temp_f_po_item
    AS
    WITH 
    orpt AS (
    SELECT 
        delivery_date,
        order_date_request,
        delivery_confirmation_date,
        webportal_status,
        po_doc,
        po_item
    FROM {catalog_name}.udp_wcm_gold_vcm_dwh.f_order_report       
    WHERE ORDER_DATE IN ({CALDAY_IN_STR})      
    ),
    fme AS (
    SELECT 
        po_doc, 
        po_item, 
        MAX(gr_date) AS gr_date, 
        SUM(gr_qty) AS gr_qty,
        SUM(gr_value) AS gr_value
    FROM {catalog_name}.udp_wcm_gold_vcm_dwh.f_movement_po 
    GROUP BY po_doc, po_item
    ),
    gi AS (
    SELECT
        po_doc,
        po_item,
        MIN(calday) AS gi_from_date,
        MAX(calday) AS gi_to_date,
        SUM(gi_qty) AS gi_qty,
        SUM(gi_value) AS gi_value
    FROM {catalog_name}.udp_wcm_gold_vcm_dwh.f_dispatch_po 
    GROUP BY po_doc, po_item
    ),
    temp AS (
    SELECT
        po.po_id,
        po.po_doc,
        po.po_item,
        po.product_id,
        po.store_id,
        po.dc_id,
        po.bu_id,
        po.vendor,
        po.doctype,
        po.po_status,
        po.calday,
        po.po_unit,
        po.creation_date,
        po.pur_reason,
        po.purch_org,
        po.pur_group,
        orpt.delivery_date AS gr_date,
        po.gr_re_ind,
        po.compl_del,
        po.base_unit,
        po.promotion_code,
        po.delivery_method,
        orpt.order_date_request,
        orpt.delivery_confirmation_date,
        orpt.webportal_status,
        po.po_value,
        po.gr_value,
        po.po_qty,
        po.gr_qty
    FROM {catalog_name}.udp_wcm_gold_vcm_dwh.f_po_doctype po
        LEFT JOIN orpt ON (po.po_doc = orpt.po_doc AND po.po_item = orpt.po_item)
    WHERE CALDAY IN ({CALDAY_IN_STR})
    ),
    main AS (
    SELECT
        temp.po_id,
        temp.po_doc,
        temp.po_item,
        temp.doctype,
        temp.product_id, 
        prod.product_name,
        temp.store_id,
        store.store_name AS dc_site, 
        temp.dc_id,
        temp.bu_id, 
        store.region, 
        store.city, 
        prod.brand, 
        prod.manufacturer, 
        temp.vendor,  
        prod.mch3_name, 
        temp.doctype AS po_type,
        temp.po_status, 
        temp.calday,
        temp.po_unit,
        temp.creation_date, 
        temp.pur_reason,
        temp.purch_org,
        temp.pur_group,
        7 AS leadtime,
        temp.gr_date,
        temp.gr_re_ind,
        temp.compl_del,
        temp.base_unit,
        temp.promotion_code,
        temp.delivery_method,
        temp.po_value,
        temp.gr_value,
        temp.po_qty, 
        temp.gr_qty,
        store.region_domain,
        temp.order_date_request,
        temp.delivery_confirmation_date,
        temp.webportal_status
    FROM temp
        LEFT JOIN {catalog_name}.udp_wcm_gold_vcm_dwh.d_product prod ON (temp.product_id = prod.product_id)
        LEFT JOIN {catalog_name}.udp_wcm_gold_vcm_dwh.d_store store ON (temp.store_id = store.store_id)
    )
    SELECT
    main.po_id,
    main.po_doc,
    main.po_item,
    main.product_id, 
    main.product_name,
    main.doctype,
    main.store_id,
    main.dc_site, 
    main.dc_id,
    main.bu_id, 
    main.region, 
    main.city, 
    main.brand, 
    main.manufacturer, 
    main.vendor, 
    vd.company_name AS merchant_name, 
    main.mch3_name,
    main.po_type AS po_type_id,
    potype.po_type_name AS po_type, 
    main.po_status,
    main.po_unit, 
    main.calday, 
    main.creation_date, 
    main.pur_reason,
    main.purch_org,
    main.pur_group,
    main.leadtime,
    main.compl_del,
    main.base_unit,
    main.promotion_code,
    ROUND(main.po_qty * IFNULL(uc.denomintr, 1) / IFNULL(uc.numerator, 1), 3) AS po_qty,
    main.po_value,
    main.gr_date AS po_gr_date,
    fme.gr_date, 
    ROUND(main.gr_qty * IFNULL(uc.denomintr, 1) / IFNULL(uc.numerator, 1), 3) AS po_gr_qty,
    ROUND(fme.gr_qty * IFNULL(uc.denomintr, 1) / IFNULL(uc.numerator, 1), 3) AS gr_qty, 
    main.gr_value AS po_gr_value,
    fme.gr_value,
    main.gr_re_ind AS po_gr_re_ind,
    ROUND(gi.gi_qty * (IFNULL(pu.denomintr, 1) / IFNULL(pu.numerator, 1)), 3) AS po_gi_qty,
    gi.gi_value,
    gi.gi_from_date,
    gi.gi_to_date,
    main.order_date_request,
    main.delivery_confirmation_date,
    main.webportal_status,
    main.delivery_method,
    pir.subrange_id AS sub_range_id,
    ac.cluster
    FROM main
    LEFT JOIN {catalog_name}.udp_wcm_gold_vcm_dwh.d_vendor_master vd ON main.vendor = vd.vendor_id	
    LEFT JOIN {catalog_name}.udp_wcm_gold_vcm_dwh.d_assortment_cluster ac 
        ON main.store_id = ac.store_id 
        AND main.product_id = ac.product_id 
        AND main.creation_date >= ac.start_date 
        AND main.creation_date <= ac.end_date
    LEFT JOIN {catalog_name}.udp_wcm_gold_vcm_dwh.d_po_type potype ON main.po_type = potype.po_type
    LEFT JOIN {catalog_name}.udp_wcm_gold_vcm_dwh.d_unit_conv uc 
        ON main.product_id = uc.material AND main.po_unit = uc.mat_unit
    LEFT JOIN (
        SELECT * FROM {catalog_name}.udp_wcm_silver_sap_bw.sap_pmat_unit 
        WHERE start_date <> end_date
    ) pu 
        ON main.product_id = pu.material 
        AND main.po_unit = pu.mat_unit 
        AND IFNULL(main.gr_date, main.calday) >= pu.start_date 
        AND IFNULL(main.gr_date, main.calday) <= pu.end_date 
    LEFT JOIN fme ON main.po_doc = fme.po_doc AND main.po_item = fme.po_item
    LEFT JOIN {catalog_name}.udp_wcm_gold_vcm_dwh.d_pir_current pir 
        ON LTRIM(main.vendor,'0') = pir.vendor_id 
        AND main.product_id = pir.product_id
    LEFT JOIN gi ON main.po_doc = gi.po_doc AND main.po_item = gi.po_item
    """)
else:
    print(
        f"temp_{target_table} could't be created as no valid calday's found In common-etl so {catalog_name}.udp_wcm_gold_vcm_dwh.f_po_item can't be deleted using calday's from temp_f_po_item"
    )

In [0]:
if CALDAY_IN_STR != "":
    # Get the calday list string from temp table
    calday_query = f"""
    SELECT
        CONCAT("'", CONCAT_WS("','", COLLECT_SET(CAST(TO_DATE(calday, 'yyyy-MM-dd') AS STRING))), "'") AS calday_in_str
    FROM temp_{target_table}
    """

    calday_df = spark.sql(calday_query)
    calday_in_str = calday_df.collect()[0][0]

    # Only run DELETE if the string is non-empty
    if calday_in_str.strip("'"):  # checks if there's any date inside the quotes
        spark.sql(
            f"""
        DELETE FROM {catalog_name}.udp_wcm_gold_vcm_dwh.f_po_item
        WHERE calday IN ({calday_in_str})
        """
        ).display()
    else:
        print("no valid calday's found to delete.")
else:
    print(
        f"temp_{target_table} could't be created as no valid calday's found In common-etl so {catalog_name}.udp_wcm_gold_vcm_dwh.f_po_item can't be deleted using calday's from temp_f_po_item"
    )

In [0]:
if CALDAY_IN_STR != "":
    # Insert rows from temp table into main table
    INSERT_ROW = ", ".join(
        spark.table(f"{catalog_name}.udp_wcm_gold_vcm_dwh.f_po_item").columns
    )
    spark.sql(
        f"""
        INSERT INTO {catalog_name}.udp_wcm_gold_vcm_dwh.f_po_item ({INSERT_ROW})
        SELECT {INSERT_ROW} FROM temp_{target_table}
    """
    ).display()
else:
    print(
        f"temp_{target_table} could't be created as no valid calday's found In common-etl so {catalog_name}.udp_wcm_gold_vcm_dwh.f_po_item can't be inserted using calday's from temp_{target_table}"
    )

In [0]:
if CALDAY_IN_STR == '':
    dbutils.notebook.exit("No valid calday's found In common-etl to insert data into Table: ETL_LOG_DEPENDENCIES and ETL_DELTA_TABLE and ")

In [0]:
%run "../common/common_etl_update"