In [0]:
%run ../utils/common

In [0]:
dbutils.widgets.text("target_dataset", "VCM_DMT_PRD")
dbutils.widgets.text("metadata_schema", "udp_wcm_metadata_dev")
dbutils.widgets.text("target_table", "a_dr_dio_rp_store_sku")
dbutils.widgets.text(
    "dependency_table","VCM_DWH_PRD.F_DR_DIO_RAWDATA,VCM_DWH_PRD.F_DR_DIO_RAWDATA_00, VCM_DWH_PRD.D_STORE, VCM_DWH_PRD.D_PRODUCT, VCM_DWH_PRD.D_VENDOR_MASTER"
)

In [0]:
dbutils.widgets.text("field_calday", "CALDAY")
dbutils.widgets.text("field_id", "HASH_ID")

field_calday = dbutils.widgets.get("field_calday")
field_id = dbutils.widgets.get("field_id")

print(f"field_calday: {field_calday}")
print(f"field_id: {field_id}")

In [0]:
catalog_name = settings[environment]['catalog_name']

In [0]:
environment = dbutils.widgets.get("environment")
target_dataset = dbutils.widgets.get("target_dataset")
target_table = dbutils.widgets.get("target_table")
metadata_schema = dbutils.widgets.get("metadata_schema")
dependency_table = dbutils.widgets.get("dependency_table")
dependency_table = [x.strip().upper() for x in dependency_table.split(",")]
dependency_table = "'" + "','".join(dependency_table) + "'"

print(f"environment: {environment}")
print(f"target_dataset: {target_dataset}")
print(f"target_table: {target_table}")
print(f"catalog_name: {catalog_name}")
print(f"metadata_schema: {metadata_schema}")
print(f"dependency_table: {dependency_table}")

In [0]:
%run "../common/common_etl_load"

In [0]:
spark.sql(f"""
CREATE TABLE IF NOT EXISTS {catalog_name}.udp_wcm_gold_vcm_dmt.a_dr_dio_rp_store_sku
(
  calday DATE,
  business_unit STRING,
  tinh_tp STRING,
  qlkv STRING,
  mien STRING,
  gdv STRING,
  gdm STRING,
  mch5_id STRING,
  mch5_name STRING,
  store_id STRING,
  product_id STRING,
  product_name STRING,
  vendor_id STRING,
  vendor STRING,
  cluster STRING,
  order_method STRING,
  delivery_method STRING,
  reporting_region STRING,
  manufacturer STRING,
  dr_type BIGINT,
  dio_type BIGINT,
  material_type STRING,
  issue_sku BIGINT,
  dr_sum BIGINT,
  dr_count BIGINT,
  closing_stock_value DOUBLE,
  closing_stock_quantity DOUBLE,
  cogs_90d DOUBLE,
  cogs_b2b_90d DOUBLE,
  sale_qty_90d DOUBLE,
  sale_qty_b2b_90d DOUBLE,
  cogs_30d DOUBLE,
  cogs_b2b_30d DOUBLE,
  sale_qty_30d DOUBLE,
  sale_qty_b2b_30d DOUBLE
)  
TBLPROPERTIES (
  'DELTA.AUTOOPTIMIZE.OPTIMIZEWRITE' = 'TRUE',
  'DELTA.AUTOOPTIMIZE.AUTOCOMPACT' = 'TRUE'
);
""")

In [0]:
if CALDAY_IN_STR != "":
    spark.sql(
        f"""
    CREATE OR REPLACE TEMP VIEW temp_{target_table} AS
    WITH 
    temp AS (
        SELECT * EXCEPT(stt,rn) 
        FROM (
            SELECT *, ROW_NUMBER() OVER (PARTITION BY date_gen,store_id,product_id ORDER BY stt) rn 
            FROM (
                SELECT 
                    date_gen,
                    store_id,
                    product_id,
                    vendor_id,
                    cluster,
                    order_method,
                    delivery_method,
                    issue_sku,
                    material_type,
                    dio_type,
                    dr_type,                            
                    dr,
                    closing_stock_value,
                    closing_stock_quantity,
                    cogs_90d,
                    cogs_b2b_90d,
                    sale_qty_90d,
                    sale_qty_b2b_90d,
                    cogs_30d, 
                    cogs_b2b_30d, 
                    sale_qty_30d, 
                    sale_qty_b2b_30d,
                    1 stt
                FROM {catalog_name}.udp_wcm_gold_vcm_dwh.f_dr_dio_rawdata
                WHERE date_gen IN ({CALDAY_IN_STR}) AND (closing_stock_value != 0 OR cogs_b2b_90d != 0 OR cogs_90d != 0 OR dr_type != 0 OR material_type IN ('Meat Deli','Fresh'))
                
                UNION ALL 
                SELECT 
                    date_gen,
                    store_id,
                    product_id,
                    vendor_id,
                    cluster,
                    order_method,
                    delivery_method,
                    issue_sku,
                    material_type,
                    dio_type,
                    dr_type,                            
                    dr,
                    closing_stock_value,
                    closing_stock_quantity,
                    cogs_90d,
                    cogs_b2b_90d,
                    sale_qty_90d,
                    sale_qty_b2b_90d,
                    cogs_30d, 
                    cogs_b2b_30d, 
                    sale_qty_30d, 
                    sale_qty_b2b_30d,
                    2 stt
                from {catalog_name}.udp_wcm_gold_vcm_dwh.f_dr_dio_rawdata_00
                    WHERE date_gen IN ({CALDAY_IN_STR}) AND (closing_stock_value != 0 OR cogs_b2b_90d != 0 OR cogs_90d != 0 OR dr_type != 0 OR material_type IN ('Meat Deli','Fresh'))
            ) a
        ) a WHERE rn = 1
    ),
    FDD AS (
        SELECT 
            a.*, 
            mch2_id,
            ds.city tinh_tp,
            ds.mien,
            ds.qlkv,
            ds.gdv,
            ds.gdm,
            ds.reporting_region,
            ds.business_unit
        FROM temp a
            INNER JOIN {catalog_name}.udp_wcm_gold_vcm_dwh.d_store ds ON a.store_id = ds.store_id AND ds.store_status_id = "OPEN"
            INNER JOIN {catalog_name}.udp_wcm_gold_vcm_dwh.d_product dp ON dp.product_id = a.product_id
    )
    SELECT
        rp.date_gen calday,
        rp.business_unit,
        rp.tinh_tp,
        rp.qlkv,
        rp.mien,
        rp.gdv,
        rp.gdm,
        dp.mch5_id,
        dp.mch5_name,
        rp.store_id,
        rp.product_id,
        dp.product_name,
        rp.vendor_id,
        vd.vendor_name vendor,
        rp.cluster,
        rp.order_method,
        rp.delivery_method,
        rp.reporting_region,
        dp.manufacturer,
        rp.dr_type,
        rp.dio_type,
        rp.material_type,
        rp.issue_sku,
        rp.dr_sum,
        rp.dr_count,
        rp.closing_stock_value,
        rp.closing_stock_quantity,
        rp.cogs_90d,
        rp.cogs_b2b_90d,
        rp.sale_qty_90d,
        rp.sale_qty_b2b_90d,
        rp.cogs_30d, 
        rp.cogs_b2b_30d, 
        rp.sale_qty_30d, 
        rp.sale_qty_b2b_30d
    FROM (
        SELECT
            date_gen,
            tinh_tp,
            mien,
            qlkv,
            gdv,
            gdm,
            reporting_region,
            business_unit,
            product_id,
            store_id,
            cluster,
            order_method,
            delivery_method,
            dr_type,
            dio_type,
            vendor_id,
            issue_sku,
            material_type,
            COUNT(dr_type) dr_count,
            SUM(IF(dr_type = 1,dr,0)) dr_sum,
            SUM(closing_stock_value) closing_stock_value,
            SUM(closing_stock_quantity) closing_stock_quantity,
            SUM(cogs_90d) cogs_90d,
            SUM(cogs_b2b_90d) cogs_b2b_90d,
            SUM(sale_qty_90d) sale_qty_90d,
            SUM(sale_qty_b2b_90d) sale_qty_b2b_90d,
            SUM(cogs_30d) cogs_30d,
            SUM(cogs_b2b_30d) cogs_b2b_30d,
            SUM(sale_qty_30d) sale_qty_30d,
            SUM(sale_qty_b2b_30d) sale_qty_b2b_30d
        FROM FDD
        GROUP BY 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18
    ) rp
        LEFT JOIN {catalog_name}.udp_wcm_gold_vcm_dwh.d_product dp ON rp.product_id = dp.product_id
        LEFT JOIN {catalog_name}.udp_wcm_gold_vcm_dwh.d_vendor_master vd ON rp.vendor_id = vd.vendor_id
""")

In [0]:
if CALDAY_IN_STR != "":
    # Get the calday list string from temp table
    calday_query = f"""
    SELECT
        CONCAT("'", CONCAT_WS("','", COLLECT_SET(CAST(TO_DATE(calday, 'yyyy-MM-dd') AS STRING))), "'") AS calday_in_str
    FROM temp_{target_table}
    """

    calday_df = spark.sql(calday_query)
    calday_in_str = calday_df.collect()[0][0]

    # Only run DELETE if the string is non-empty
    if calday_in_str.strip("'"):  # checks if there's any date inside the quotes
        spark.sql(
            f"""
        DELETE FROM {catalog_name}.udp_wcm_gold_vcm_dmt.a_dr_dio_rp_store_sku
        WHERE calday IN ({calday_in_str})
        """
        ).display()
    else:
        print("no valid calday's found to delete.")
else:
    print(
        f"temp_{target_table} couldn't be created as no valid calday's found in common-etl so {catalog_name}.udp_wcm_gold_vcm_dmt.a_dr_dio_rp_store_sku can't be deleted using calday's from temp_{target_table}"
    )

In [0]:
if CALDAY_IN_STR != "":
    # Insert rows from temp table into main table
    INSERT_ROW = ", ".join(
        spark.table(f"{catalog_name}.udp_wcm_gold_vcm_dmt.a_dr_dio_rp_store_sku").columns
    )
    spark.sql(
        f"""
        INSERT INTO {catalog_name}.udp_wcm_gold_vcm_dmt.a_dr_dio_rp_store_sku ({INSERT_ROW})
        SELECT {INSERT_ROW} FROM temp_{target_table}
    """
    ).display()
else:
    print(
        f"temp_{target_table} couldn't be created as no valid calday's found in common-etl so {catalog_name}.udp_wcm_gold_vcm_dmt.a_dr_dio_rp_store_sku can't be inserted using calday's from temp_{target_table}"
    )

In [0]:
spark.sql(f"""DROP VIEW IF EXISTS temp_{target_table}""")