In [0]:
%run ../utils/common

In [0]:
dbutils.widgets.text("environment", "", "")
dbutils.widgets.text("target_dataset", "VCM_DMT_PRD")
dbutils.widgets.text("target_table", "f_corrected_sellrate")
dbutils.widgets.text("metadata_schema", "udp_wcm_metadata_dev")
dbutils.widgets.text(
    "dependency_table",
    "VCM_DMT_PRD.F_CORRECTED_SALE, VCM_DWH_PRD.D_FRANCHISE_CONFIG, VCM_DMT_PRD.F_SEASONALITY_INDEX_BY_WEEK, VCM_DB_INVENTORY.D_MDQ_STORE_DC_SKU_DAILY"
)


In [0]:
dbutils.widgets.text("field_calday", "CALDAY")
dbutils.widgets.text("field_id", "HASH_ID")

In [0]:
dbutils.widgets.text("proc_date", "")

In [0]:
dbutils.widgets.text("environment", "", "")
environment = dbutils.widgets.get("environment")


In [0]:
catalog_name = settings[environment]['catalog_name']

In [0]:
environment = dbutils.widgets.get("environment")
target_dataset = dbutils.widgets.get("target_dataset")
target_table = dbutils.widgets.get("target_table")
metadata_schema = dbutils.widgets.get("metadata_schema")
dependency_table = dbutils.widgets.get("dependency_table")
dependency_table = [x.strip().upper() for x in dependency_table.split(",")]
dependency_table = "'" + "','".join(dependency_table) + "'"

print(f"environment: {environment}")
print(f"target_dataset: {target_dataset}")
print(f"target_table: {target_table}")
print(f"catalog_name: {catalog_name}")
print(f"metadata_schema: {metadata_schema}")
print(f"dependency_table: {dependency_table}")

In [0]:
field_calday = dbutils.widgets.get("field_calday")
field_id = dbutils.widgets.get("field_id")

print(f"field_calday: {field_calday}")
print(f"field_id: {field_id}")

In [0]:
%run "../common/common_etl_load"

In [0]:
spark.sql(f"""
CREATE TABLE IF NOT EXISTS {catalog_name}.udp_wcm_gold_vcm_dmt.f_corrected_sellrate (
  calday STRING,
  store_id STRING,
  product_id STRING,
  note STRING,
  ma30_corrected_base_sale_qty FLOAT,
  ma60_corrected_base_sale_qty FLOAT,
  ma07_corrected_base_sale_qty FLOAT,
  sell_rate FLOAT,
  sale_trend FLOAT,
  core_range STRING,
  is_adhoc STRING,
  seasonality_index FLOAT
)
TBLPROPERTIES (
  'DELTA.AUTOOPTIMIZE.OPTIMIZEWRITE' = 'TRUE',
  'DELTA.AUTOOPTIMIZE.AUTOCOMPACT' = 'TRUE'
)
""")

In [0]:
if CALDAY_IN_STR != '':
    spark.sql(
        f"""
CREATE OR REPLACE TEMP VIEW f_corrected_sellrate AS
WITH sellrate_30day AS (
    SELECT 
        a.store_id,
        a.product_id,
        AVG(IFNULL(a.corrected_base_sale_qty, 0)) AS ma30_corrected_base_sale_qty
    FROM {catalog_name}.udp_wcm_gold_vcm_dmt.f_corrected_sale a
    WHERE a.calday >= DATE(CURRENT_TIMESTAMP() + INTERVAL 7 HOURS) -30
      AND a.calday <= DATE(CURRENT_TIMESTAMP() + INTERVAL 7 HOURS) -1
    GROUP BY 1,2
),
sellrate_60day AS (
    SELECT 
        a.store_id,
        a.product_id,
        AVG(IFNULL(a.corrected_base_sale_qty, 0)) AS ma60_corrected_base_sale_qty
    FROM {catalog_name}.udp_wcm_gold_vcm_dmt.f_corrected_sale a
    WHERE a.calday >= DATE(CURRENT_TIMESTAMP() + INTERVAL 7 HOURS) - 60
      AND a.calday <= DATE(CURRENT_TIMESTAMP() + INTERVAL 7 HOURS) -1
    GROUP BY 1,2
),
sellrate_7day AS (
    SELECT 
        a.store_id,
        a.product_id,
        AVG(IFNULL(a.corrected_base_sale_qty, 0)) AS ma07_corrected_base_sale_qty
    FROM {catalog_name}.udp_wcm_gold_vcm_dmt.f_corrected_sale a
    WHERE a.calday >=DATE(CURRENT_TIMESTAMP() + INTERVAL 7 HOURS) - 7 
      AND a.calday <= DATE(CURRENT_TIMESTAMP() + INTERVAL 7 HOURS) -1
    GROUP BY 1,2
),
main AS (
    SELECT      
        a.calday,
        IFNULL(a3.store_id, a.store_id) AS store_id,
        a.product_id,
        a.note,
        a1.ma30_corrected_base_sale_qty,
        a9.ma60_corrected_base_sale_qty,
        a2.ma07_corrected_base_sale_qty,
        CASE WHEN a.note = "TEMPORARY INNACTIVE" THEN 0 ELSE a1.ma30_corrected_base_sale_qty END AS sell_rate,
        CASE    
            WHEN IFNULL(CASE WHEN ma30_corrected_base_sale_qty = 0 THEN 0 ELSE ma07_corrected_base_sale_qty / ma30_corrected_base_sale_qty END, 0) = 0 THEN 1
            ELSE CASE WHEN ma30_corrected_base_sale_qty = 0 THEN 0 ELSE ma07_corrected_base_sale_qty / ma30_corrected_base_sale_qty END
        END AS sale_trend,
        a5.core_range,
        'NO' is_adhoc,
        IFNULL(a10.seasonality_index,1) AS seasonality_index
    FROM {catalog_name}.udp_wcm_gold_vcm_dmt.f_corrected_sale a
    LEFT JOIN sellrate_30day a1 ON a.store_id = a1.store_id AND a.product_id = a1.product_id
    LEFT JOIN sellrate_7day a2 ON a.store_id = a2.store_id AND a.product_id = a2.product_id
    LEFT JOIN sellrate_60day a9 ON a.store_id = a9.store_id AND a.product_id = a9.product_id
    LEFT JOIN {catalog_name}.udp_wcm_gold_vcm_dwh.d_store a4 ON a.store_id = a4.store_id
    LEFT JOIN {catalog_name}.udp_wcm_gold_vcm_dwh.d_core_range_assortment a5 ON a.store_id = a5.store_id AND a.product_id = a5.product_id
    LEFT JOIN {catalog_name}.udp_wcm_gold_vcm_dwh.d_product a6 ON a.product_id = a6.product_id
    LEFT JOIN {catalog_name}.udp_wcm_gold_vcm_dwh.d_franchise_config a3 ON a.store_id = a3.ref_fc_store

    LEFT JOIN (SELECT BUSINESS_UNIT, REGION, MCH5_ID, YEAR_WEEK_APPLY, SEASONALITY_INDEX
            FROM {catalog_name}.udp_wcm_gold_vcm_dmt.f_seasonality_index_by_week
            QUALIFY ROW_NUMBER() OVER(PARTITION BY BUSINESS_UNIT, REGION, MCH5_ID, YEAR_WEEK_APPLY ORDER BY RN) = 1
        ) a10
        ON CONCAT(DATE_FORMAT(DATE(CURRENT_TIMESTAMP() + INTERVAL 7 HOURS), 'yyyy'), '.', LPAD(CAST(EXTRACT(WEEK FROM DATE(CURRENT_TIMESTAMP() + INTERVAL 7 HOURS)) AS STRING), 2, '0')) = a10.year_week_apply

        AND a10.BUSINESS_UNIT = a4.BUSINESS_UNIT
        AND a10.REGION = a4.REGION_DOMAIN_VN
        AND a6.MCH5_ID = a10.MCH5_ID
    WHERE a.calday = DATE(CURRENT_TIMESTAMP() + INTERVAL 7 HOURS) - 1
),
adhoc AS (
        SELECT
            a.calday,
            a.store_id,
            a.product_id,
            a.note,
            a.ma30_corrected_base_sale_qty,
            a.ma60_corrected_base_sale_qty,
            a.ma07_corrected_base_sale_qty,
            a.sell_rate,
            a.sale_trend,
            a.core_range,
            IF(
                DATE(current_timestamp() + INTERVAL 7 HOURS) BETWEEN "2025-08-01" AND "2025-08-31"
                AND (b.mch4_id IN ("1020609", "1020612", "2020101") OR b.mch5_id IN ("102010109", "102060207", "102060511", "102060401", "102030403", "202010205", "102050404", "102050403")),
                "YES",
                a.is_adhoc
            ) is_adhoc,
            IF(
                DATE(current_timestamp() + INTERVAL 7 HOURS) BETWEEN "2025-08-01" AND "2025-08-31"
                AND (b.mch4_id IN ("1020609", "1020612", "2020101") OR b.mch5_id IN ("102010109", "102060207", "102060511", "102060401", "102030403", "202010205", "102050404", "102050403")),
                LEAST(a.seasonality_index, 1),
                a.seasonality_index
            ) seasonality_index
        FROM main a
        LEFT JOIN {catalog_name}.udp_wcm_gold_vcm_dwh.d_product b
            USING (product_id)
    )
    SELECT  
        calday,
        store_id,
        product_id,
        note,
        ma30_corrected_base_sale_qty,
        ma60_corrected_base_sale_qty,
        ma07_corrected_base_sale_qty,
        sell_rate,
        sale_trend,
        core_range,
        is_adhoc,
        seasonality_index
    FROM adhoc
 
    """
    )

In [0]:
if CALDAY_IN_STR != '':
    spark.sql(
        f"""
    DELETE FROM {catalog_name}.udp_wcm_gold_vcm_dmt.f_sale_trend_range 
    WHERE calday = DATE(CURRENT_TIMESTAMP() + INTERVAL 7 HOURS) - 1
    AND EXTRACT(dayofweek FROM DATE(CURRENT_TIMESTAMP() + INTERVAL 7 HOURS)) = 2
    """
    )

    spark.sql(
        f"""
        INSERT INTO {catalog_name}.udp_wcm_gold_vcm_dmt.f_sale_trend_range
        SELECT DISTINCT
            DATE(CURRENT_TIMESTAMP() + INTERVAL 7 HOURS) - 1 calday,
            PERCENTILE_CONT(0.05) WITHIN GROUP (ORDER BY IFNULL(c.sale_trend, 1)) OVER() pct_05,
            PERCENTILE_CONT(0.10) WITHIN GROUP (ORDER BY IFNULL(c.sale_trend, 1)) OVER() pct_10,
            PERCENTILE_CONT(0.15) WITHIN GROUP (ORDER BY IFNULL(c.sale_trend, 1)) OVER() pct_15,
            PERCENTILE_CONT(0.20) WITHIN GROUP (ORDER BY IFNULL(c.sale_trend, 1)) OVER() pct_20,
            PERCENTILE_CONT(0.80) WITHIN GROUP (ORDER BY IFNULL(c.sale_trend, 1)) OVER() pct_80,
            PERCENTILE_CONT(0.85) WITHIN GROUP (ORDER BY IFNULL(c.sale_trend, 1)) OVER() pct_85,
            PERCENTILE_CONT(0.90) WITHIN GROUP (ORDER BY IFNULL(c.sale_trend, 1)) OVER() pct_90,
            PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY IFNULL(c.sale_trend, 1)) OVER() pct_95
        FROM {catalog_name}.udp_wcm_gold_vcm_db_inventory.d_mdq_store_dc_sku_daily
         a
        LEFT JOIN {catalog_name}.udp_wcm_gold_vcm_dwh.d_product b
            USING(product_id)
        LEFT JOIN f_corrected_sellrate c
            USING(store_id, product_id)
        WHERE b.mch2_id IN ('101','102','201','202','203')
            AND a.replenishment_mode IS NOT NULL
            AND EXTRACT(dayofweek FROM DATE(CURRENT_TIMESTAMP() + INTERVAL 7 HOURS)) = 2;
    """
    )

In [0]:
if CALDAY_IN_STR != '':
    spark.sql(
        f"""
    DELETE FROM {catalog_name}.udp_wcm_gold_vcm_dmt.f_corrected_sellrate 
    WHERE calday = DATE(CURRENT_TIMESTAMP() + INTERVAL 7 HOURS) - 1;
    """
    )
    spark.sql(
        f"""
    INSERT INTO {catalog_name}.udp_wcm_gold_vcm_dmt.f_corrected_sellrate
    SELECT  
        a.calday,
        a.store_id,
        a.product_id,
        a.note,
        a.ma30_corrected_base_sale_qty,
        a.ma60_corrected_base_sale_qty,
        a.ma07_corrected_base_sale_qty,
        a.sell_rate,
        CASE
            WHEN b.mch2_id IN ('101','102','201','202','203') AND a.sale_trend > IFNULL(c.upper_value, 1) THEN IFNULL(c.upper_value, 1)
            WHEN b.mch2_id IN ('101','102','201','202','203') AND a.sale_trend < IFNULL(c.lower_value, 0) THEN IFNULL(c.lower_value, 0)
            ELSE a.sale_trend
        END sale_trend,
        a.core_range,
        a.is_adhoc,
        a.seasonality_index
    FROM f_corrected_sellrate a
    LEFT JOIN {catalog_name}.udp_wcm_gold_vcm_dwh.d_product b
        USING(product_id)
    LEFT JOIN (
        SELECT 
            pct_05 lower_value,
            pct_80 upper_value
        FROM {catalog_name}.udp_wcm_gold_vcm_dmt.f_sale_trend_range
        WHERE calday = (SELECT MAX(calday) FROM {catalog_name}.udp_wcm_gold_vcm_dmt.f_sale_trend_range) AND DATE(CURRENT_TIMESTAMP() + INTERVAL 7 HOURS) >= '2024-08-05'
    ) c ON 1=1
    ;
    """
    )

In [0]:
if CALDAY_IN_STR == '':
    dbutils.notebook.exit("No valid calday's found In common-etl to insert data into Table: ETL_LOG_DEPENDENCIES and ETL_DELTA_TABLE and ")

In [0]:
%run "../common/common_etl_update"