In [0]:
%run ../utils/common

In [0]:
dbutils.widgets.text("environment", "DEV")
dbutils.widgets.text("target_dataset", "VCM_DWH_PRD")
dbutils.widgets.text("target_table", "f_inventory_00")
dbutils.widgets.text("metadata_schema", "udp_wcm_metadata_dev")
dbutils.widgets.text(
    "dependency_table",
    "VCM_DWH_PRD.F_STAGING_INVENTORY"
)

In [0]:
dbutils.widgets.text("field_calday", "CALDAY")
dbutils.widgets.text("field_id", "HASH_ID")

In [0]:
dbutils.widgets.text("environment", "", "")
environment = dbutils.widgets.get("environment")

In [0]:
catalog_name = settings[environment]['catalog_name']

In [0]:
environment = dbutils.widgets.get("environment")
target_dataset = dbutils.widgets.get("target_dataset")
target_table = dbutils.widgets.get("target_table")
metadata_schema = dbutils.widgets.get("metadata_schema")
dependency_table = dbutils.widgets.get("dependency_table")
dependency_table = [x.strip() for x in dependency_table.split(",")]
dependency_table = "'" + "','".join(dependency_table) + "'"

print(f"environment: {environment}")
print(f"target_dataset: {target_dataset}")
print(f"catalog_name: {catalog_name}")
print(f"metadata_schema: {metadata_schema}")
print(f"dependency_table: {dependency_table}")

In [0]:
field_calday = dbutils.widgets.get("field_calday")
field_id = dbutils.widgets.get("field_id")

print(f"field_calday: {field_calday}")
print(f"field_id: {field_id}")

In [0]:
%run "../common/common_etl_load"

In [0]:
spark.sql(
    f"""
CREATE TABLE IF NOT EXISTS {catalog_name}.udp_wcm_gold_vcm_dwh.f_inventory_00 (
  hash_id BIGINT,
  calday DATE,
  store_id STRING,
  product_id STRING,
  closing_stock_quantity DOUBLE,
  closing_stock_value DOUBLE)
USING delta
TBLPROPERTIES (
  'DELTA.AUTOOPTIMIZE.OPTIMIZEWRITE' = 'TRUE',
  'DELTA.AUTOOPTIMIZE.AUTOCOMPACT' = 'TRUE'
)"""
)

In [0]:
if CALDAY_IN_STR != "":
    spark.sql(f"""
CREATE OR REPLACE TEMP VIEW temp_f_inventory_00 AS
WITH table_date AS (
  SELECT date_gen
  FROM (
    SELECT explode(sequence(to_date({START_DATE}), current_date(), interval 1 day)) AS date_gen
  ) AS seq_dates
  WHERE date_gen <= current_date()
)
SELECT 
    hash_id,
    calday,
    store_id,
    product_id,
    closing_stock_quantity,
    closing_stock_value
FROM (
    SELECT 
        farm_fingerprint(CONCAT(
            IFNULL(CAST(date_gen AS STRING), ""), 
            IFNULL(CAST(store_id AS STRING), ""), 
            IFNULL(CAST(product_id AS STRING), "")
        )) hash_id,
        date_gen calday,
        store_id,
        product_id,
        closing_stock_quantity,
        closing_stock_value,
        ROW_NUMBER() OVER (
            PARTITION BY store_id, product_id, CAST(closing_stock_quantity AS DOUBLE), CAST(closing_stock_value AS DOUBLE) 
            ORDER BY date_gen ASC
        ) AS rn,
        date_from,
        DATE_SUB(IFNULL(date_to, '2400-01-01'), 1) date_to
    FROM table_date 
    INNER JOIN (
        SELECT
            calday,
            store_id,
            product_id,
            SUM(closing_stock_quantity) OVER (PARTITION BY store_id, product_id ORDER BY calday ASC) closing_stock_quantity,
            SUM(closing_stock_value) OVER (PARTITION BY store_id, product_id ORDER BY calday ASC) closing_stock_value,
            calday date_from,
            LEAD(calday) OVER (PARTITION BY store_id, product_id ORDER BY calday ASC) date_to
        FROM (
            SELECT calday, store_id, product_id, closing_stock_quantity, closing_stock_value 
            FROM {catalog_name}.udp_wcm_gold_vcm_dwh.f_staging_inventory 
            WHERE calday >= {START_DATE} AND calday <= {END_DATE} AND calday <= CURRENT_DATE()
            UNION ALL
            SELECT calday, store_id, product_id, closing_stock_quantity, closing_stock_value 
            FROM {catalog_name}.udp_wcm_gold_vcm_dwh.f_inventory 
            WHERE calday = DATE_SUB({START_DATE}, 1)
            UNION ALL
            SELECT calday, store_id, product_id, closing_stock_quantity, closing_stock_value 
            FROM {catalog_name}.udp_wcm_gold_vcm_dwh.f_inventory_00 
            WHERE calday = DATE_SUB({START_DATE}, 1)
        ) a
    ) i 
    ON date_gen >= calday AND (date_gen < date_to OR date_to IS NULL)
) a 
WHERE (closing_stock_quantity != 0 OR closing_stock_value != 0 OR rn = 1) = FALSE
""")

In [0]:
if CALDAY_IN_STR != '':
    # Get the calday list string from temp table
    calday_query = """
    SELECT
        CONCAT("'", CONCAT_WS("','", COLLECT_SET(CAST(TO_DATE(calday, 'yyyy-MM-dd') AS STRING))), "'") AS calday_in_str
    FROM temp_f_inventory_00
    """
    
    calday_in_str_new = spark.sql(calday_query).collect()[0][0]
    
    if calday_in_str_new != '':
        calday_in_str = calday_in_str_new
    
        # Only run DELETE if the string is non-empty
        if calday_in_str.strip("'"):  # checks if there's any date inside the quotes
            spark.sql(f"""
            DELETE FROM {catalog_name}.udp_wcm_gold_vcm_dwh.f_inventory_00
            WHERE calday IN ({calday_in_str})
            """)
            # Insert rows from temp table into main table
            INSERT_ROW = ", ".join(
                spark.table(f"{catalog_name}.udp_wcm_gold_vcm_dwh.f_inventory_00").columns
            )
            spark.sql(f"""
                INSERT INTO {catalog_name}.udp_wcm_gold_vcm_dwh.f_inventory_00 ({INSERT_ROW})
                SELECT {INSERT_ROW} FROM temp_{target_table}
            """).display()
        else:
            print("No valid CALDAYs found to delete.")
            print(f"temp_{target_table} could't be created as no valid calday's found In common-etl so {catalog_name}.udp_wcm_gold_vcm_dwh.f_inventory_00 can't be inserted using calday's from temp_{target_table}")