In [0]:
%run ../utils/common

In [0]:
dbutils.widgets.text("environment", "", "")
environment = dbutils.widgets.get("environment")

In [0]:
catalog_name = settings[environment]['catalog_name']

In [0]:
spark.sql(f"""
CREATE TABLE IF NOT EXISTS {catalog_name}.udp_wcm_gold_vcm_dwh.d_product
(
  product_id STRING,
  product_name STRING,
  base_uom STRING,
  createdon DATE,
  eanupc STRING,
  gross_content FLOAT,
  gross_weight FLOAT,
  height FLOAT,
  length FLOAT,
  width FLOAT,
  product_cat STRING,
  product_group STRING,
  product_type STRING,
  net_content FLOAT,
  net_weight FLOAT,
  order_unit STRING,
  unit_dim STRING,
  unit_of_wt STRING,
  vendor STRING,
  volume FLOAT,
  volumeunit STRING,
  external_product_group STRING,
  mch1_id STRING,
  mch2_id STRING,
  mch3_id STRING,
  mch4_id STRING,
  mch5_id STRING,
  mch6_id STRING,
  mch1_name STRING,
  mch2_name STRING,
  mch3_name STRING,
  mch4_name STRING,
  mch5_name STRING,
  mch6_name STRING,
  mch1_desc STRING,
  mch2_desc STRING,
  mch3_desc STRING,
  mch4_desc STRING,
  mch5_desc STRING,
  mch6_desc STRING,
  total_shelf_life FLOAT,
  remaining_shelf_life STRING,
  box_size STRING,
  model STRING,
  attribute1 STRING,
  eligible STRING,
  warranty STRING,
  valuation_class STRING,
  promotion_upper_qty FLOAT,
  manufacturer_id STRING,
  manufacturer_name STRING,
  manufacturer STRING,
  sub_manufacturer_id STRING,
  sub_manufacturer_name STRING,
  brand_id STRING,
  brand STRING,
  brand_name STRING,
  sub_brand_id STRING,
  sub_brand_name STRING,
  abc_indicator STRING,
  changed_on DATE,
  name STRING,
  product_status STRING,
  category_vmp_1 STRING,
  category_vmp_2 STRING,
  category_vmp_3 STRING,
  temperature STRING,
  planogram_char_001 STRING,
  attribute3 STRING,
  attribute4 STRING,
  attribute12 STRING,
  origin STRING,
  country_of_origin STRING,
  model_desc STRING,
  vat_code STRING,
  vat_desc STRING
)
TBLPROPERTIES (
  'DELTA.AUTOOPTIMIZE.OPTIMIZEWRITE' = 'TRUE',
  'DELTA.AUTOOPTIMIZE.AUTOCOMPACT' = 'TRUE'
)""")

In [0]:
spark.sql(f"""
CREATE OR REPLACE TEMP VIEW temp_d_product
AS
WITH
material AS (
    SELECT * EXCEPT(rn) 
    FROM (
        SELECT *, 
                ROW_NUMBER() OVER (PARTITION BY material ORDER BY end_date DESC) rn 
        FROM {catalog_name}.udp_wcm_silver_sap_bw.sap_0material
    ) WHERE rn = 1
),
t_material AS (
    SELECT * EXCEPT(rn) 
    FROM (
        SELECT *, ROW_NUMBER() OVER (PARTITION BY material ORDER BY end_date DESC) rn 
        FROM {catalog_name}.udp_wcm_silver_sap_bw.sap_tmaterial
    ) a WHERE rn = 1
),
product AS (
    SELECT 
        a.* EXCEPT(name),
        b.txtmd name, 
        b.txtmd material_desc 
    FROM material a INNER JOIN t_material b ON a.material = b.material
),
main AS (
    SELECT
        m1.material product_id,
        {catalog_name}.default.fn_normalize_name(material_desc) product_name,
        m1.base_uom,
        m1.createdon,
        m1.eanupc,
        m1.gross_cont gross_content,
        m1.gross_wt gross_weight,
        m1.height,
        m1.lenght length,
        m1.width,
        m1.matl_cat product_cat,
        m1.matl_group product_group,
        m1.matl_type product_type,
        m1.net_cont net_content,
        m1.net_weight,
        IFNULL(m1.po_unit, m1.base_uom) order_unit,
        m1.unit_dim,
        m1.unit_of_wt,
        m1.vendor vendor,
        m1.volume,
        m1.volumeunit,
        m1.extmatlgrp external_product_group,
        m1._bic_zc_mchlv1 mch1_id,
        m1._bic_zc_mchlv2 mch2_id,
        m1._bic_zc_mchlv3 mch3_id,
        m1._bic_zc_mchlv4 mch4_id,
        m1._bic_zc_mchlv5 mch5_id,
        m1.extmatlgrp mch6_id,
        mch1_name, 
        mch2_name,
        mch3_name, 
        mch4_name,
        mch5_name, 
        mch6_name, 
        mch1_desc,
        mch2_desc, 
        mch3_desc,    
        mch4_desc,  
        mch5_desc,
        mch6_desc,
        m1._bic_zc_shllif total_shelf_life,
        m1._bic_zc_attr5 remaining_shelf_life,
        m1._bic_zc_size box_size,
        m1._bic_zc_model model,
        m1._bic_zc_attr1 attribute1,
        m1._bic_zc_eligbl eligible,
        m1._bic_zc_warnty warranty,
        val_class valuation_class,
        m1._bic_zprtupqty promotion_upper_qty,
        m1._bic_zc_manftr manufacturer_id,
        dm.manufacturer_name,
        dm.manufacturer_name manufacturer, 
        m1._bic_zc_subman sub_manufacturer_id,
        dms.sub_manufacturer_name sub_manufacturer_name,
        m1._bic_zc_brand brand_id,
        m1._bic_zc_brand brand,
        brand.brand_name,
        m1._bic_zc_subbrd sub_brand_id,
        sub_brand.sub_brand_name,
        ai.abc_indicator_desc abc_indicator,
        m1.aedat changed_on,
        m1.name name,
        m1._bic_zc_artsts product_status,
        m1._bic_zc_cvmp1 category_vmp_1,
        m1._bic_zc_cvmp2 category_vmp_2,
        m1._bic_zc_cvmp3 category_vmp_3,
        m1._bic_zc_tempre temperature,
        m1._bic_zc_pla001 planogram_char_001,
        m1._bic_zc_attr3 attribute3,
        m1._bic_zc_attr4 attribute4,
        m1._bic_zc_attr12 attribute12,
        m1._bic_zc_origin origin,
        m1.wm_coo country_of_origin,
        CASE CAST(m1._bic_zc_model AS BIGINT)
            WHEN 1 THEN 'NHR1'
            WHEN 2 THEN 'NHR2'
            WHEN 3 THEN 'NKTT'
            WHEN 4 THEN 'Hàng thường'
        END model_desc,
        m1._bic_zc_attr11 vat_code,
        CASE CAST(m1._bic_zc_attr11 AS BIGINT)
            WHEN 1 THEN 'Không chịu thuế'
            WHEN 2 THEN 'Thuế suất 0%'
            WHEN 3 THEN 'Thuế suất 5%'
            WHEN 4 THEN 'Thuế suất 10%'
            WHEN 5 THEN 'Thuế suất 5% hàng tươi sống'
            WHEN 6 THEN 'Thuế suất 8%'
            WHEN 9 THEN 'Thuế suất 0% hàng tươi sống'
        END vat_desc
    FROM product m1
        LEFT JOIN {catalog_name}.udp_wcm_gold_vcm_dwh.d_mch1 mch1 ON m1._bic_zc_mchlv1 = mch1.mch1_id
        LEFT JOIN {catalog_name}.udp_wcm_gold_vcm_dwh.d_mch2 mch2 ON m1._bic_zc_mchlv2 = mch2.mch2_id
        LEFT JOIN {catalog_name}.udp_wcm_gold_vcm_dwh.d_mch3 mch3 ON m1._bic_zc_mchlv3 = mch3.mch3_id
        LEFT JOIN {catalog_name}.udp_wcm_gold_vcm_dwh.d_mch4 mch4 ON m1._bic_zc_mchlv4 = mch4.mch4_id
        LEFT JOIN {catalog_name}.udp_wcm_gold_vcm_dwh.d_mch5 mch5 ON m1._bic_zc_mchlv5 = mch5.mch5_id
        LEFT JOIN {catalog_name}.udp_wcm_gold_vcm_dwh.d_mch6 mch6 ON m1.extmatlgrp = mch6.mch6_id
        LEFT JOIN {catalog_name}.udp_wcm_silver_dim.abc_indicator ai ON m1.abckey = ai.abc_indicator
        LEFT JOIN {catalog_name}.udp_wcm_gold_vcm_dwh.d_manufacturer dm ON m1._bic_zc_manftr = dm.manufacturer_id
        LEFT JOIN {catalog_name}.udp_wcm_gold_vcm_dwh.d_sub_manufacturer dms ON m1._bic_zc_subman = dms.sub_manufacturer_id
        LEFT JOIN {catalog_name}.udp_wcm_gold_vcm_dwh.d_brand brand ON m1._bic_zc_brand = brand.brand_id
        LEFT JOIN {catalog_name}.udp_wcm_gold_vcm_dwh.d_sub_brand sub_brand ON m1._bic_zc_subbrd = sub_brand.sub_brand_id
),
rank AS (
    SELECT *,
        ROW_NUMBER() OVER (PARTITION BY product_id ORDER BY changed_on DESC) rn
    FROM main
)
SELECT * EXCEPT(rn)
FROM rank
WHERE rn = 1 
          """)

In [0]:
# Load the temp table schema
df = spark.table(f"{catalog_name}.udp_wcm_gold_vcm_dwh.d_product")
columns = df.columns

# Create INSERT column list
insert_columns = ", ".join(columns)

# Create UPDATE SET clause
update_set_clause = ", ".join([f"main_tbl.{col} = temp_tbl.{col}" for col in columns])

# Construct the MERGE SQL
merge_sql = f"""
MERGE INTO {catalog_name}.udp_wcm_gold_vcm_dwh.d_product AS main_tbl
USING temp_d_product AS temp_tbl
ON main_tbl.product_id = temp_tbl.product_id
WHEN MATCHED THEN 
  UPDATE SET {update_set_clause}
WHEN NOT MATCHED THEN 
  INSERT ({insert_columns}) 
  VALUES ({', '.join(['temp_tbl.' + col for col in columns])})
"""

# Run the SQL
spark.sql(merge_sql)

In [0]:
spark.sql("""drop view if exists temp_d_product""")