In [0]:
%run ../utils/common

In [0]:
dbutils.widgets.text("environment", "", "")
dbutils.widgets.text("target_dataset", "VCM_DWH_PRD") 
dbutils.widgets.text("target_table", "f_order_report")
dbutils.widgets.text("metadata_schema", "udp_wcm_metadata_dev")
dbutils.widgets.text(
    "dependency_table","VCM_STG_INF.order_report"
)

In [0]:
catalog_name = settings[environment]['catalog_name']

In [0]:
dbutils.widgets.text("field_calday", "CALDAY")
dbutils.widgets.text("field_id", "HASH_ID")

In [0]:
field_calday = dbutils.widgets.get("field_calday")
field_id = dbutils.widgets.get("field_id")

print(f"field_calday: {field_calday}")
print(f"field_id: {field_id}")

In [0]:
environment = dbutils.widgets.get("environment")
target_dataset = dbutils.widgets.get("target_dataset")
target_table = dbutils.widgets.get("target_table")
metadata_schema = dbutils.widgets.get("metadata_schema")
dependency_table = dbutils.widgets.get("dependency_table")
dependency_table = [x.strip().upper() for x in dependency_table.split(",")]
dependency_table = "'" + "','".join(dependency_table) + "'"

print(f"environment: {environment}")
print(f"target_dataset: {target_dataset}")
print(f"target_table: {target_table}")
print(f"catalog_name: {catalog_name}")
print(f"metadata_schema: {metadata_schema}")
print(f"dependency_table: {dependency_table}")

In [0]:
%run "../common/common_etl_load"

In [0]:
spark.sql(f"""
CREATE TABLE IF NOT EXISTS {catalog_name}.udp_wcm_gold_vcm_dwh.f_order_report(
    po_doc STRING,
    product_id STRING,
    po_item INT,
    store_id STRING,
    purchase_name STRING,
    max_order_user STRING,
    order_date DATE,
    order_date_request DATE,
    order_date_expected_delivery DATE,
    delivery_confirmation_date DATE,
    delivery_date DATE,
    webportal_status STRING,
    po_qty STRING,
    supplier_qty STRING,
    gr_qty STRING,
    time_frame_for_delivery_confirmation STRING,
    vendor_id STRING,
    vendor_name STRING
)
TBLPROPERTIES (
  'DELTA.AUTOOPTIMIZE.OPTIMIZEWRITE' = 'TRUE',
  'DELTA.AUTOOPTIMIZE.AUTOCOMPACT' = 'TRUE'
)
""")

In [0]:
if CALDAY_IN_STR != "":
    spark.sql(f"""
    CREATE OR REPLACE TEMP VIEW temp_{target_table} AS
    WITH tmp AS (
        SELECT
            ord.order_id PO_DOC,
            ord.product_id,
            CAST(ord.product_stt AS INT) PO_ITEM,
            ord.store_id,
            TO_DATE(ord.order_date, 'yyyy-MM-dd') AS order_date,
            TO_DATE(ord.order_date_request, 'yyyy-MM-dd') AS order_date_request,
            DATE(TRY_TO_TIMESTAMP(ord.order_date_expected_delivery, 'yyyy-MM-dd HH:mm:ss')) AS order_date_expected_delivery,
            DATE(TRY_TO_TIMESTAMP(ord.delivery_confirmation_date, 'yyyy-MM-dd HH:mm:ss')) AS delivery_confirmation_date,
            ord.upd_date,
            MIN(CASE WHEN ord.delivery_date IS NOT NULL THEN DATE(TRY_TO_TIMESTAMP(ord.delivery_date, 'yyyy-MM-dd HH:mm:ss')) END) AS delivery_date,
            ord.status WEBPORTAL_STATUS,
            ord.order_user purchase_name,
            ord.purchase_order_quantity po_qty,
            ord.purchase_order_quantity_delivery supplier_qty,
            ord.purchase_order_quantity_delivered_actual gr_qty,
            ord.time_frame_for_delivery_confirmation,
            ord.vendor_id,
            ord.vendor_name
        FROM {catalog_name}.udp_wcm_silver_rop.order_report ord
        WHERE order_date IN ({CALDAY_IN_STR})
        GROUP BY
            ord.order_id,
            ord.product_id,
            ord.product_stt,
            ord.store_id,
            ord.order_date,
            ord.order_date_request,
            ord.order_date_expected_delivery,
            ord.delivery_confirmation_date,
            ord.upd_date,
            ord.status,
            ord.order_user,
            ord.purchase_order_quantity,
            ord.purchase_order_quantity_delivery,
            ord.purchase_order_quantity_delivered_actual,
            ord.time_frame_for_delivery_confirmation,
            ord.vendor_id,
            ord.vendor_name
    ),
    main AS (
        SELECT
            po_doc,
            product_id,
            po_item,
            store_id,
            purchase_name,
            order_date,
            order_date_request,
            order_date_expected_delivery,
            delivery_confirmation_date,
            delivery_date,
            webportal_status,
            po_qty,
            supplier_qty,
            gr_qty,
            time_frame_for_delivery_confirmation,
            vendor_id,
            vendor_name
        FROM tmp
        QUALIFY ROW_NUMBER() OVER (PARTITION BY po_doc, po_item ORDER BY delivery_date DESC, upd_date DESC) = 1
    )
    SELECT
        m.po_doc,
        m.product_id,
        m.po_item,
        m.store_id,
        m.purchase_name,
        MAX(m.purchase_name) OVER (PARTITION BY m.po_doc, m.po_item) MAX_ORDER_USER,
        m.order_date,
        m.order_date_request,
        m.order_date_expected_delivery,
        m.delivery_confirmation_date,
        m.delivery_date,
        m.webportal_status,
        m.po_qty,
        m.supplier_qty,
        m.gr_qty,
        m.time_frame_for_delivery_confirmation,
        m.vendor_id,
        m.vendor_name
    FROM main m
    """)
else:
    print(f"No valid calday's found In common-etl to create the view: temp_{target_table}")

In [0]:
if CALDAY_IN_STR != "":
    # Get the calday list string from temp table
    calday_query = f"""
    SELECT
        CONCAT("'", CONCAT_WS("','", COLLECT_SET(CAST(TO_DATE(order_date, 'yyyy-MM-dd') AS STRING))), "'") AS calday_in_str
    FROM temp_{target_table}
    """

    calday_df = spark.sql(calday_query)
    calday_in_str = calday_df.collect()[0][0]

    # Only run DELETE if the string is non-empty
    if calday_in_str.strip("'"):  # checks if there's any date inside the quotes
        spark.sql(
            f"""
        DELETE FROM {catalog_name}.udp_wcm_gold_vcm_dwh.f_order_report
        WHERE order_date IN ({calday_in_str})
        """
        ).display()
    else:
        print("no valid calday's found to delete.")
else:
    print(
        f"temp_{target_table} could't be created as no valid calday's found In common-etl so {catalog_name}.udp_wcm_gold_vcm_dwh.f_order_report can't be deleted using calday's from temp_a_billcnt_store"
    )

In [0]:
if CALDAY_IN_STR != "":
    # Insert rows from temp table into main table
    INSERT_ROW = ", ".join(
        spark.table(f"{catalog_name}.udp_wcm_gold_vcm_dwh.f_order_report").columns
    )
    spark.sql(
        f"""
        INSERT INTO {catalog_name}.udp_wcm_gold_vcm_dwh.f_order_report ({INSERT_ROW})
        SELECT {INSERT_ROW} FROM temp_{target_table}
    """
    ).display()
else:
    print(
        f"temp_{target_table} could't be created as no valid calday's found In common-etl so {catalog_name}.udp_wcm_gold_vcm_dwh.f_order_report can't be inserted using calday's from temp_{target_table}"
    )

In [0]:
spark.sql(f"""DROP VIEW IF EXISTS temp_{target_table}""")

In [0]:
if CALDAY_IN_STR == '':
    dbutils.notebook.exit("No valid calday's found In common-etl to insert data into Table: ETL_LOG_DEPENDENCIES and ETL_DELTA_TABLE and ")

In [0]:
%run "../common/common_etl_update"