In [0]:
%run ../utils/common

In [0]:
dbutils.widgets.text("environment", "", "")
environment = dbutils.widgets.get("environment")

In [0]:
catalog_name = settings[environment]['catalog_name']

In [0]:
spark.sql(f"""
CREATE TABLE IF NOT EXISTS {catalog_name}.udp_wcm_gold_vcm_dwh.f_cogs_min
(
  calday DATE,
  store_id STRING,
  product_id STRING,
  cogs FLOAT,
  cogs_30d FLOAT,
  cogs_60d FLOAT,
  cogs_90d FLOAT,
  cogs_180d FLOAT,
  cogs_360d FLOAT,
  cost_per_unit FLOAT
)
""")

In [0]:
start_date = (datetime.now() - timedelta(days=365)).date()
end_date = datetime.now().date()
start_date = start_date.strftime('%Y-%m-%d')
end_date = end_date.strftime('%Y-%m-%d')
print(start_date, end_date)

In [0]:
spark.sql(f"""
CREATE OR REPLACE TEMP VIEW temp_f_cogs_min AS
WITH _90d AS (
    SELECT 
        IFNULL(a.calday, b.calday) calday,
        IFNULL(a.store_id, b.store_id) store_id, 
        IFNULL(a.product_id, b.product_id) product_id,
        CASE
        WHEN ROUND(b.cogs_qty_30d, 3) = 0 OR b.cogs_qty_30d IS NULL THEN 0
        ELSE b.cogs_30d / ROUND(b.cogs_qty_30d, 3)
        END AS cogs_30d,
        CASE
        WHEN ROUND(b.cogs_qty_60d, 3) = 0 OR b.cogs_qty_60d IS NULL THEN 0
        ELSE b.cogs_60d / ROUND(b.cogs_qty_60d, 3)
        END AS cogs_60d,       
        CASE
        WHEN ROUND(b.cogs_qty_90d, 3) = 0 OR b.cogs_qty_90d IS NULL THEN 0
        ELSE b.cogs_90d / ROUND(b.cogs_qty_90d, 3)
        END AS cogs_90d,
        CASE
        WHEN ROUND(a.quantity, 3) = 0 OR a.quantity IS NULL THEN 0
        ELSE a.cost / ROUND(a.quantity, 3)
        END AS cogs
    FROM (SELECT * FROM {catalog_name}.udp_wcm_gold_vcm_dwh.f_cogs WHERE calday BETWEEN DATE('{start_date}') AND DATE('{end_date}')) a
    FULL JOIN (SELECT * FROM {catalog_name}.udp_wcm_gold_vcm_dwh.f_cogs_90d WHERE calday BETWEEN DATE('{start_date}') AND DATE('{end_date}')) b
        USING(calday, store_id, product_id)
),
main AS (
    SELECT 
        IFNULL(a.calday, b.calday) calday,
        IFNULL(a.store_id, b.store_id) store_id, 
        IFNULL(a.product_id, b.product_id) product_id,
        IFNULL(a.cogs, 0) cogs,
        IFNULL(a.cogs_30d, 0) cogs_30d,
        IFNULL(a.cogs_60d, 0) cogs_60d,
        IFNULL(a.cogs_90d, 0) cogs_90d,
        CASE
        WHEN ROUND(b.cogs_qty_180d, 3) = 0 OR b.cogs_qty_180d IS NULL THEN 0
        ELSE b.cogs_180d / ROUND(b.cogs_qty_180d, 3)
        END AS cogs_180d,
        CASE
        WHEN ROUND(b.cogs_qty_360d, 3) = 0 OR b.cogs_qty_360d IS NULL THEN 0
        ELSE b.cogs_360d / ROUND(b.cogs_qty_360d, 3)
        END AS cogs_360d
    FROM _90d a
    FULL JOIN (SELECT * FROM {catalog_name}.udp_wcm_gold_vcm_dwh.f_cogs_360d WHERE calday BETWEEN DATE('{start_date}') AND DATE('{end_date}')) b
        USING(calday, store_id, product_id)
)
SELECT 
    calday, store_id, product_id,
    cogs, cogs_30d, cogs_60d, cogs_90d, cogs_180d, cogs_360d,
    (
        SELECT MIN(val) 
        FROM (SELECT EXPLODE(ARRAY(cogs, cogs_30d, cogs_60d, cogs_90d, cogs_180d, cogs_360d)) AS val)
        WHERE val > 0
    ) AS cost_per_unit
FROM main
""")

In [0]:
# Get the calday list string from temp table
calday_query = """
SELECT
    CONCAT("'", CONCAT_WS("','", COLLECT_SET(CAST(TO_DATE(calday, 'yyyy-MM-dd') AS STRING))), "'") AS calday_in_str
FROM temp_f_cogs_min
"""

calday_df = spark.sql(calday_query)
calday_in_str = calday_df.collect()[0][0]

# Only run DELETE if the string is non-empty
if calday_in_str.strip("'"):  # checks if there's any date inside the quotes
    spark.sql(f"""
    DELETE FROM {catalog_name}.udp_wcm_gold_vcm_dwh.f_cogs_min
    WHERE calday IN ({calday_in_str})
    """)
else:
    print("No valid CALDAYs found to delete.")

In [0]:
# Insert rows from temp table into main table
spark.sql(f"""
INSERT INTO {catalog_name}.udp_wcm_gold_vcm_dwh.f_cogs_min 
(calday,
store_id,
product_id,
cogs,
cogs_30d,
cogs_60d,
cogs_90d,
cogs_180d,
cogs_360d,
cost_per_unit)
SELECT 
    calday,
    store_id,
    product_id,
    cogs,
    cogs_30d,
    cogs_60d,
    cogs_90d,
    cogs_180d,
    cogs_360d,
    cost_per_unit
FROM temp_f_cogs_min
""")

In [0]:
spark.sql(f"DROP VIEW IF EXISTS temp_f_cogs_min")