In [0]:
%run ../utils/common

In [0]:
dbutils.widgets.text("environment", "", "")
dbutils.widgets.text("target_dataset", "VCM_DWH_PRD")
dbutils.widgets.text("target_table", "f_cogs_360d")
dbutils.widgets.text("metadata_schema", "udp_wcm_metadata_dev")
dbutils.widgets.text(
    "dependency_table","VCM_DWH_PRD.F_COGS"
)

In [0]:
catalog_name = settings[environment]['catalog_name']

In [0]:
dbutils.widgets.text("field_calday", "CALDAY")
dbutils.widgets.text("field_id", "HASH_ID")

In [0]:
field_calday = dbutils.widgets.get("field_calday")
field_id = dbutils.widgets.get("field_id")

print(f"field_calday: {field_calday}")
print(f"field_id: {field_id}")

In [0]:
environment = dbutils.widgets.get("environment")
target_dataset = dbutils.widgets.get("target_dataset")
target_table = dbutils.widgets.get("target_table")
metadata_schema = dbutils.widgets.get("metadata_schema")
dependency_table = dbutils.widgets.get("dependency_table")
dependency_table = [x.strip().upper() for x in dependency_table.split(",")]
dependency_table = "'" + "','".join(dependency_table) + "'"

print(f"environment: {environment}")
print(f"target_dataset: {target_dataset}")
print(f"target_table: {target_table}")
print(f"catalog_name: {catalog_name}")
print(f"metadata_schema: {metadata_schema}")
print(f"dependency_table: {dependency_table}")

In [0]:
%run "../common/common_etl_load"

In [0]:
spark.sql(f"""
CREATE TABLE IF NOT EXISTS {catalog_name}.udp_wcm_gold_vcm_dwh.f_cogs_360d
(
  hash_id BIGINT,
  calday DATE,
  product_id STRING,
  store_id STRING,
  cogs_360d FLOAT,
  cogs_b2b_360d FLOAT,
  cogs_qty_360d FLOAT,
  cogs_qty_b2b_360d FLOAT,
  cogs_180d FLOAT,
  cogs_b2b_180d FLOAT,
  cogs_qty_180d FLOAT,
  cogs_qty_b2b_180d FLOAT
)
TBLPROPERTIES (
  'DELTA.AUTOOPTIMIZE.OPTIMIZEWRITE' = 'TRUE',
  'DELTA.AUTOOPTIMIZE.AUTOCOMPACT' = 'TRUE'
)
""")

In [0]:
spark.sql(f"""
CREATE OR REPLACE TEMP VIEW temp_f_cogs_360d AS
WITH
table_date AS (
    SELECT explode(sequence(
        to_date({START_DATE}), 
        to_date({END_DATE})
    )) AS date_gen_c
),
calculate_table AS (
    SELECT
        calday,
        product_id,
        store_id,
        SUM(cost) cogs,
        SUM(cost_b2b) cogs_b2b,
        SUM(quantity) quantity,
        SUM(quantity_b2b) quantity_b2b
    FROM {catalog_name}.udp_wcm_gold_vcm_dwh.f_cogs
    WHERE calday >= date_sub(to_date({START_DATE}), 360)
        AND calday <= to_date({END_DATE})
        AND product_id != 'DUMMY'
    GROUP BY 
        calday,
        product_id,
        store_id
),
main AS (
    SELECT
        FARM_FINGERPRINT(CONCAT(
            IFNULL(CAST(d.date_gen_c AS STRING), ""),
            IFNULL(CAST(c.store_id AS STRING), ""),
            IFNULL(CAST(c.product_id AS STRING), "")
        )) hash_id,
        d.date_gen_c calday,
        c.product_id,
        c.store_id,
        SUM(c.cogs) cogs_360d,
        SUM(c.cogs_b2b) cogs_b2b_360d,
        SUM(c.quantity) cogs_qty_360d,
        SUM(c.quantity_b2b) cogs_qty_b2b_360d,
        SUM(IF(c.calday > date_sub(d.date_gen_c, 180), c.cogs, 0)) cogs_180d,
        SUM(IF(c.calday > date_sub(d.date_gen_c, 180), c.cogs_b2b, 0)) cogs_b2b_180d,
        SUM(IF(c.calday > date_sub(d.date_gen_c, 180), c.quantity, 0)) cogs_qty_180d,
        SUM(IF(c.calday > date_sub(d.date_gen_c, 180), c.quantity_b2b, 0)) cogs_qty_b2b_180d
    FROM table_date d
    INNER JOIN calculate_table c
        ON c.calday > date_sub(d.date_gen_c, 360)
            AND d.date_gen_c >= c.calday
    GROUP BY
        d.date_gen_c,
        c.product_id,
        c.store_id
)
SELECT
    *
FROM main
""")

In [0]:
# Get the calday list string from temp table
calday_query = """
SELECT
    CONCAT("'", CONCAT_WS("','", COLLECT_SET(CAST(TO_DATE(calday, 'yyyy-MM-dd') AS STRING))), "'") AS calday_in_str
FROM temp_f_cogs_360d
"""

calday_df = spark.sql(calday_query)
calday_in_str = calday_df.collect()[0][0]

# Only run DELETE if the string is non-empty
if calday_in_str.strip("'"):  # checks if there's any date inside the quotes
    spark.sql(f"""
    DELETE FROM {catalog_name}.udp_wcm_gold_vcm_dwh.f_cogs_360d
    WHERE calday IN ({calday_in_str})
    """)
else:
    print("No valid CALDAYs found to delete.")

In [0]:
# Insert rows from temp table into main table
spark.sql(f"""
INSERT INTO {catalog_name}.udp_wcm_gold_vcm_dwh.f_cogs_360d
(
  hash_id,
  calday,
  product_id ,
  store_id,
  cogs_360d ,
  cogs_b2b_360d,
  cogs_qty_360d,
  cogs_qty_b2b_360d,
  cogs_180d ,
  cogs_b2b_180d ,
  cogs_qty_180d ,
  cogs_qty_b2b_180d )
SELECT 
  hash_id,
  calday,
  product_id ,
  store_id,
  cogs_360d ,
  cogs_b2b_360d,
  cogs_qty_360d,
  cogs_qty_b2b_360d,
  cogs_180d ,
  cogs_b2b_180d ,
  cogs_qty_180d ,
  cogs_qty_b2b_180d
FROM temp_f_cogs_360d
""")