In [0]:
%run ../utils/common

In [0]:
spark.conf.set("spark.sql.legacy.timeParserPolicy", "LEGACY")

In [0]:
dbutils.widgets.text("environment", "", "")
dbutils.widgets.text("target_dataset", "VCM_DMT_PRD")
dbutils.widgets.text("target_table", "f_seasonality_index_by_week")
dbutils.widgets.text("metadata_schema", "udp_wcm_metadata_dev")
dbutils.widgets.text(
    "dependency_table","VCM_DMT_PRD.A_STORE_SKU_DAILY"
)

In [0]:
dbutils.widgets.text("field_calday", "CALDAY")
dbutils.widgets.text("field_id", "HASH_ID")

In [0]:
catalog_name = settings[environment]['catalog_name']

In [0]:
environment = dbutils.widgets.get("environment")
target_dataset = dbutils.widgets.get("target_dataset")
target_table = dbutils.widgets.get("target_table")
metadata_schema = dbutils.widgets.get("metadata_schema")
dependency_table = dbutils.widgets.get("dependency_table")
dependency_table = [x.strip().upper() for x in dependency_table.split(",")]
dependency_table = "'" + "','".join(dependency_table) + "'"

print(f"environment: {environment}")
print(f"target_dataset: {target_dataset}")
print(f"target_table: {target_table}")
print(f"catalog_name: {catalog_name}")
print(f"metadata_schema: {metadata_schema}")
print(f"dependency_table: {dependency_table}")

In [0]:
field_calday = dbutils.widgets.get("field_calday")
field_id = dbutils.widgets.get("field_id")

print(f"field_calday: {field_calday}")
print(f"field_id: {field_id}")

In [0]:
%run "../common/common_etl_load"

In [0]:
spark.sql(f"""
CREATE OR REPLACE TABLE {catalog_name}.udp_wcm_gold_vcm_dmt.f_seasonality_index_by_week
WITH cnt_weeks_per_year AS (
    SELECT 
        DATE_FORMAT(calday, 'yyyy') AS calyear,
        COUNT(DISTINCT isoweek) AS cnt_isoweek
    FROM {catalog_name}.udp_wcm_gold_vcm_dwh.d_time
    WHERE calday >= TRUNC(add_months(current_date(), -24), 'YEAR')
      AND calday <= add_months(date_sub(add_months(trunc(current_date(), 'YEAR'), 12), 1), 24)
      AND date_format(CALDAY, 'YYYY') >= CAST(year(current_date()) - 2 AS STRING)
    GROUP BY DATE_FORMAT(calday, 'yyyy')
),
table_date AS (
    SELECT 
        a.calday, 
        DATE_FORMAT(a.calday, 'yyyy') AS calyear,
        a.isoweek AS calweek,
        b.cnt_isoweek
    FROM {catalog_name}.udp_wcm_gold_vcm_dwh.d_time a
    JOIN cnt_weeks_per_year b
        ON DATE_FORMAT(a.calday, 'yyyy') = b.calyear
    WHERE a.calday <= date_sub(to_date(current_timestamp() + INTERVAL 7 HOURS), 1)
),
bu_region AS (
    SELECT DISTINCT business_unit, region_domain_vn region
    FROM {catalog_name}.udp_wcm_gold_vcm_dwh.d_store
    WHERE region_domain_vn IS NOT NULL AND business_unit IN ('1500', '2000')
),
mch5 AS (
    SELECT mch5_id
    FROM {catalog_name}.udp_wcm_gold_vcm_dwh.d_mch5
    WHERE mch5_desc <> '[DO NOT USE]' AND mch5_desc NOT LIKE '[Block]%'
),
calweek_bu_region_mch5 AS (
    SELECT DISTINCT business_unit, region, mch5_id, calyear, calweek, cnt_isoweek
    FROM table_date
    CROSS JOIN bu_region
    CROSS JOIN mch5
),
revenue AS (
    SELECT 
        d.calyear, d.calweek, b.business_unit, b.region_domain_vn region, c.mch5_id, c.mch5_desc, 
        SUM(GREATEST(IFNULL(a.revenue.rev_amt_pos, 0) - IFNULL(a.revenue.rev_promo_amt, 0), 0)) rev_normal_sale
    FROM {catalog_name}.udp_wcm_gold_vcm_dmt.a_store_sku_daily a
    INNER JOIN {catalog_name}.udp_wcm_gold_vcm_dwh.d_store b
        USING(store_id)
    INNER JOIN {catalog_name}.udp_wcm_gold_vcm_dwh.d_product c
        USING(product_id)
    INNER JOIN table_date d
        USING(calday)
    GROUP BY 1,2,3,4,5,6
),
calweek_bu_region_mch5_revenue AS (
    SELECT
        business_unit, region, mch5_id, calyear, calweek, cnt_isoweek,
        IFNULL(rev_normal_sale, 0) rev_normal_sale
    FROM calweek_bu_region_mch5
    LEFT JOIN revenue USING (calyear, calweek, business_unit, region, mch5_id)
),
seasonality AS (
    SELECT *
    FROM (
        SELECT
            business_unit, region, mch5_id, mch5_desc, calyear, calweek, cnt_isoweek,
            CONCAT(calyear, '.', LPAD(CAST(calweek AS STRING), 2, '0')) year_week,
            rev_normal_sale AS seasonality_index_origin,
            IFNULL(
                AVG(rev_normal_sale) OVER(PARTITION BY calyear, business_unit, region, mch5_id ORDER BY calweek ROWS BETWEEN CURRENT ROW AND 3 FOLLOWING) /
                NULLIF(AVG(rev_normal_sale) OVER(PARTITION BY calyear, business_unit, region, mch5_id), 0),
                1
            ) AS seasonality_index
        FROM calweek_bu_region_mch5_revenue
        LEFT JOIN {catalog_name}.udp_wcm_gold_vcm_dwh.d_mch5 USING(mch5_id)
    )
),
pct AS (
    SELECT DISTINCT
        business_unit, region, mch5_id,
        PERCENTILE_CONT(0.05) WITHIN GROUP (ORDER BY seasonality_index)
            OVER (PARTITION BY business_unit, region, mch5_id) AS pct_05,
        PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY seasonality_index)
            OVER (PARTITION BY business_unit, region, mch5_id) AS pct_95
    FROM seasonality
    WHERE seasonality_index > 0
    AND YEAR_WEEK >= date_format(date_sub(current_date(), 51 * 7), 'YYYY.ww')
    --   AND year_week >= CONCAT(
    --         date_format(date_sub(to_date(current_timestamp() + INTERVAL 7 HOURS), 51 * 7), 'yyyy'),
    --         '.',
    --         lpad(weekofyear(date_sub(to_date(current_timestamp() + INTERVAL 7 HOURS), 51 * 7)), 2, '0')
    --   )
),
adhoc AS (
    SELECT * FROM VALUES
        ('2024.51', '2023.52'),
        ('2024.52', '2024.02'),
        ('2025.01', '2024.03'),
        ('2025.02', '2024.04'),
        ('2025.03', '2024.05'),
        ('2025.04', '2024.06'),
        ('2025.05', '2024.07'),
        ('2025.06', '2024.08'),
        ('2025.07', '2024.09'),
        ('2025.08', '2024.10')
    AS adhoc(year_week_apply, year_week)
),
main AS (
    SELECT 
        a.business_unit, a.region, a.mch5_id, a.mch5_desc, a.calyear, a.calweek, a.cnt_isoweek, a.year_week, 
        CONCAT(
            IF(a.calweek - 2 <= 0, CAST(a.calyear AS INT), CAST(a.calyear AS INT) + 1), '.',  
            LPAD(CAST(IF(a.calweek - 2 <= 0, a.calweek - 2 + a.cnt_isoweek, a.calweek - 2) AS STRING), 2, '0')
        ) AS year_week_apply,
        b.pct_05 AS lower_value, b.pct_95 AS upper_value,
        a.seasonality_index_origin,
        CASE
            WHEN a.seasonality_index < b.pct_05 THEN b.pct_05 
            WHEN a.seasonality_index > b.pct_95 THEN b.pct_95
            ELSE a.seasonality_index
        END AS seasonality_index
    FROM seasonality a
    LEFT JOIN pct b USING(business_unit, region, mch5_id)
)
SELECT
    a.business_unit, a.region, a.mch5_id, a.mch5_desc, 
    a.calyear, a.calweek, a.cnt_isoweek, 
    a.year_week, 
    IFNULL(b.year_week_apply, a.year_week_apply) AS year_week_apply,
    ROW_NUMBER() OVER(PARTITION BY a.business_unit, a.region, a.mch5_id, IFNULL(b.year_week_apply, a.year_week_apply)
                      ORDER BY IF(c.year_week IS NULL, 1e9, 1)) AS rn,
    a.lower_value, a.upper_value,
    a.seasonality_index_origin, a.seasonality_index
FROM main a
LEFT JOIN adhoc b ON a.year_week = b.year_week
LEFT JOIN adhoc c ON a.year_week = c.year_week AND a.year_week_apply = c.year_week
""")


In [0]:
%run "../common/common_etl_update"