In [0]:
%run ../utils/common

In [0]:
dbutils.widgets.text("environment", "", "")
dbutils.widgets.text("target_dataset", "VCM_DWH_PRD")
dbutils.widgets.text("target_table", "f_cogs_90d")
dbutils.widgets.text("metadata_schema", "udp_wcm_metadata_dev")
dbutils.widgets.text(
    "dependency_table","VCM_DWH_PRD.F_COGS"
)

In [0]:
catalog_name = settings[environment]['catalog_name']

In [0]:
dbutils.widgets.text("field_calday", "CALDAY")
dbutils.widgets.text("field_id", "HASH_ID")

In [0]:
field_calday = dbutils.widgets.get("field_calday")
field_id = dbutils.widgets.get("field_id")

print(f"field_calday: {field_calday}")
print(f"field_id: {field_id}")

In [0]:
environment = dbutils.widgets.get("environment")
target_dataset = dbutils.widgets.get("target_dataset")
target_table = dbutils.widgets.get("target_table")
metadata_schema = dbutils.widgets.get("metadata_schema")
dependency_table = dbutils.widgets.get("dependency_table")
dependency_table = [x.strip().upper() for x in dependency_table.split(",")]
dependency_table = "'" + "','".join(dependency_table) + "'"
print(f"environment: {environment}")
print(f"target_dataset: {target_dataset}")
print(f"target_table: {target_table}")
print(f"catalog_name: {catalog_name}")
print(f"metadata_schema: {metadata_schema}")
print(f"dependency_table: {dependency_table}")

In [0]:
%run "../common/common_etl_load"

In [0]:
spark.sql(f"""
CREATE TABLE IF NOT EXISTS {catalog_name}.udp_wcm_gold_vcm_dwh.f_cogs_90d
(
    hash_id BIGINT,
    calday DATE,
    product_id STRING,
    store_id STRING,
    cogs_90d FLOAT,
    cogs_b2b_90d FLOAT,
    cogs_qty_90d FLOAT,
    cogs_qty_b2b_90d FLOAT,
    cogs_60d FLOAT,
    cogs_b2b_60d FLOAT,
    cogs_qty_60d FLOAT,
    cogs_qty_b2b_60d FLOAT,
    cogs_30d FLOAT,
    cogs_b2b_30d FLOAT,
    cogs_qty_30d FLOAT,
    cogs_qty_b2b_30d FLOAT,
    unit_soh_value FLOAT
)
TBLPROPERTIES (
  'DELTA.AUTOOPTIMIZE.OPTIMIZEWRITE' = 'TRUE',
  'DELTA.AUTOOPTIMIZE.AUTOCOMPACT' = 'TRUE'
)
""")

In [0]:
if CALDAY_IN_STR != '':
    spark.sql(f"""
    CREATE OR REPLACE TEMP VIEW temp_f_cogs_90d AS
    WITH table_date AS (
        SELECT EXPLODE(SEQUENCE(date({START_DATE}), date({END_DATE}), INTERVAL 1 DAY)) AS date_gen_c
    ),
    calculate_table AS (
        SELECT
            calday,
            product_id,
            store_id,
            SUM(cost) AS cogs,
            SUM(cost_b2b) AS cogs_b2b,
            SUM(quantity) AS quantity,
            SUM(quantity_b2b) AS quantity_b2b
        FROM {catalog_name}.udp_wcm_gold_vcm_dwh.f_cogs
        WHERE calday >= DATE_SUB(DATE({START_DATE}), 90)
            AND calday <= DATE({END_DATE})
            AND product_id != 'DUMMY'
        GROUP BY 
            calday,
            product_id,
            store_id
    ),
    main AS (
        SELECT
            farm_fingerprint(CONCAT(
                IFNULL(CAST(d.date_gen_c AS STRING), ""),
                IFNULL(CAST(c.store_id AS STRING), ""),
                IFNULL(CAST(c.product_id AS STRING), "")
            )) AS hash_id,
            d.date_gen_c AS calday,
            c.product_id,
            c.store_id,
            SUM(c.cogs) AS cogs_90d,
            SUM(c.cogs_b2b) AS cogs_b2b_90d,
            SUM(c.quantity) AS cogs_qty_90d,
            SUM(c.quantity_b2b) AS cogs_qty_b2b_90d,
            SUM(IF(c.calday > DATE_SUB(DATE(d.date_gen_c), 60), c.cogs, 0)) AS cogs_60d,
            SUM(IF(c.calday > DATE_SUB(DATE(d.date_gen_c), 60), c.cogs_b2b, 0)) AS cogs_b2b_60d,
            SUM(IF(c.calday > DATE_SUB(DATE(d.date_gen_c), 60), c.quantity, 0)) AS cogs_qty_60d,
            SUM(IF(c.calday > DATE_SUB(DATE(d.date_gen_c), 60), c.quantity_b2b, 0)) AS cogs_qty_b2b_60d,
            SUM(IF(c.calday > DATE_SUB(DATE(d.date_gen_c), 30), c.cogs, 0)) AS cogs_30d,
            SUM(IF(c.calday > DATE_SUB(DATE(d.date_gen_c), 30), c.cogs_b2b, 0)) AS cogs_b2b_30d,
            SUM(IF(c.calday > DATE_SUB(DATE(d.date_gen_c), 30), c.quantity, 0)) AS cogs_qty_30d,
            SUM(IF(c.calday > DATE_SUB(DATE(d.date_gen_c), 30), c.quantity_b2b, 0)) AS cogs_qty_b2b_30d
        FROM table_date d
        INNER JOIN calculate_table c
            ON c.calday > DATE_SUB(DATE(d.date_gen_c), 90)
                AND d.date_gen_c >= c.calday
        GROUP BY
            d.date_gen_c,
            c.product_id,
            c.store_id
    )

    SELECT
    *,
    IFNULL(
        try_divide(cogs_30d, ROUND(cogs_qty_30d, 3)),
        IFNULL(
        try_divide(cogs_60d, ROUND(cogs_qty_60d, 3)),
        try_divide(cogs_90d, ROUND(cogs_qty_90d, 3))
        )
    ) AS unit_soh_value
    FROM main
    """)

In [0]:
if CALDAY_IN_STR != '':
    # Get the calday list string from temp table
    calday_query = """
    SELECT
        CONCAT("'", CONCAT_WS("','", COLLECT_SET(CAST(TO_DATE(calday, 'yyyy-MM-dd') AS STRING))), "'") AS calday_in_str
    FROM temp_f_cogs_90d
    """

    calday_df = spark.sql(calday_query)
    calday_in_str = calday_df.collect()[0][0]

    # Only run DELETE if the string is non-empty
    if calday_in_str.strip("'"):  # checks if there's any date inside the quotes
        spark.sql(f"""
        DELETE FROM {catalog_name}.udp_wcm_gold_vcm_dwh.f_cogs_90d
        WHERE calday IN ({calday_in_str})
        """)
    else:
        print("No valid CALDAYs found to delete.") 

In [0]:
if CALDAY_IN_STR != "":
    # Insert rows from temp table into main table
    INSERT_ROW = ", ".join(
        spark.table(f"{catalog_name}.udp_wcm_gold_vcm_dwh.f_cogs_90d").columns
    )
    spark.sql(
        f"""
        INSERT INTO {catalog_name}.udp_wcm_gold_vcm_dwh.f_cogs_90d ({INSERT_ROW})
        SELECT {INSERT_ROW} FROM temp_{target_table}
    """
    ).display()
else:
    print(
        f"temp_{target_table} could't be created as no valid calday's found In common-etl so {catalog_name}.udp_wcm_gold_vcm_dwh.f_cogs_90d can't be inserted using calday's from temp_{target_table}"
    )

In [0]:
if CALDAY_IN_STR == '':
    dbutils.notebook.exit("No valid calday's found In common-etl to insert data into Table: ETL_LOG_DEPENDENCIES and ETL_DELTA_TABLE and ")

In [0]:
%run "../common/common_etl_update"

In [0]:
spark.sql(f"""DROP VIEW IF EXISTS temp_{target_table}""")