In [0]:
%run ../utils/common

In [0]:
dbutils.widgets.text("environment", "", "")
environment = dbutils.widgets.get("environment")

In [0]:
catalog_name = settings[environment]['catalog_name']

In [0]:
spark.sql(f"""
CREATE TABLE IF NOT EXISTS {catalog_name}.udp_wcm_gold_vcm_dmt.a_finance_bill_sll 
(
   calday DATE, 
   store_id STRING, 
   transaction_no STRING, 
   vinid_card_no STRING, 
   product_id STRING, 
   total_prd_of_bill BIGINT, 
   total_bill_of_store_sku BIGINT, 
   pre_dataset STRING, 
   is_voucher STRING, 
   is_vinid STRING, 
   is_promo_wcm STRING, 
   base_sale_qty DOUBLE, 
   sale_amt DOUBLE, 
   sale_excl_vat DOUBLE, 
   sale_per_qty DOUBLE, 
   dataset STRING
   ) 
TBLPROPERTIES (
  'DELTA.AUTOOPTIMIZE.OPTIMIZEWRITE' = 'TRUE',
  'DELTA.AUTOOPTIMIZE.AUTOCOMPACT' = 'TRUE'
)
""")

In [0]:
spark.sql(f"""
CREATE OR REPLACE TEMP VIEW temp_a_finance_bill_sll AS
WITH raw_data AS (
  SELECT
    a.*
  FROM
    {catalog_name}.udp_wcm_gold_vcm_dwh.f_sale_prod a
    LEFT JOIN  {catalog_name}.udp_wcm_gold_vcm_dwh.d_store b ON a.store_id = b.store_id
    LEFT JOIN  {catalog_name}.udp_wcm_gold_vcm_dwh.d_product c ON a.product_id = c.product_id
  WHERE
    calday BETWEEN (DATE(CURRENT_TIMESTAMP() + INTERVAL 7 HOURS) - 2)  AND (DATE(CURRENT_TIMESTAMP() + INTERVAL 7 HOURS) -1) 
    AND b.business_unit IN ('1500','2000')
    AND mch2_id IN ('101','102','201','202','203')
    AND (is_sll IS FALSE OR is_sll IS NULL)
    AND (sale_amount IS NOT NULL OR discount_type IN ('ZRTA', 'ZCRT'))
),
bill_01 AS (  
  SELECT
    calday,
    store_id,
    a.product_id,
    a.transaction_no,
    vinid_card_no,
    
    SUM(base_sale_qty) base_sale_qty,
    SUM(sale_amount) sale_amt,
    SUM(CASE WHEN discount_type IN ('ZRTA', 'ZCRT') THEN discount_value ELSE 0 END) vat_2,
    IFNULL(SUM(sale_amount),0) - IFNULL(SUM(tax_amount),0) + SUM(CASE WHEN discount_type IN ('ZRTA', 'ZCRT') THEN discount_value ELSE 0 END) sale_excl_vat,
    TRY_DIVIDE(IFNULL(SUM(sale_amount),0)-IFNULL(SUM(tax_amount),0)+SUM(IF(discount_type IN ('ZRTA', 'ZCRT'), discount_value, 0)),SUM(base_sale_qty)) sale_perunit
  FROM
    raw_data a
    LEFT JOIN  {catalog_name}.udp_wcm_gold_vcm_dwh.d_product b ON a.product_id = b.product_id
  WHERE
    calday BETWEEN DATE(CURRENT_TIMESTAMP() + INTERVAL 7 HOURS) - 2 
    AND DATE(CURRENT_TIMESTAMP() + INTERVAL 7 HOURS) - 1
    AND b.mch2_id IN ('102','202')
    AND b.mch3_id NOT IN ('10204','10205')
  GROUP BY 1,2,3,4,5
),
bill_00 AS (
  SELECT DISTINCT
    calday,
    store_id,
    transaction_no,
    product_id,
    vinid_card_no,
    IF(sale_excl_vat>=800000,'DATASET 1234', 'DATASET5') pre_dataset
  FROM bill_01
  where 1=1 AND sale_excl_vat >= 300000 AND sale_perunit <= 1000000
),
-- count prd per bill
bill_02 AS (
  SELECT DISTINCT
    a.transaction_no,
    COUNT(DISTINCT b.product_id) cnt_prd
  FROM
    bill_01 a
    LEFT JOIN raw_data b ON a.transaction_no = b.transaction_no
  GROUP BY 1
),
bill_03 AS (
  SELECT DISTINCT
    calday,
    store_id,
    product_id,
    pre_dataset,
    COUNT(DISTINCT transaction_no) cnt_bill
  FROM bill_00
  GROUP BY 1,2,3,4
),
use_voucher AS (
  SELECT DISTINCT transaction_no
  FROM {catalog_name}.udp_wcm_gold_vcm_dwh.f_sale_prod
  WHERE
    payment_method IN ('E-ZIVO','ZFIV','ZIVO','ZPVO','ZUFV','ZUVO','ZVIV')
    AND tender_value > 0
    AND calday BETWEEN DATE(CURRENT_TIMESTAMP() + INTERVAL 7 HOURS) - 2  AND DATE(CURRENT_TIMESTAMP() + INTERVAL 7 HOURS) - 1
),
promo_tyledautu AS (
  SELECT DISTINCT
    bonus_buy_id,
    store_id,
    zbbprtven
  FROM  {catalog_name}.udp_wcm_gold_vcm_dwh.d_promotion
  WHERE promo_end_date >= DATE(CURRENT_TIMESTAMP() + INTERVAL 7 HOURS) - 2  AND zbbprtven < 100
),
bill_wcm_promo AS (
  SELECT DISTINCT
    transaction_no,
    a.store_id,
    a.product_id
  FROM
    {catalog_name}.udp_wcm_gold_vcm_dwh.f_sale_prod a
    INNER JOIN promo_tyledautu b ON a.promotion = b.bonus_buy_id AND a.store_id = b.store_id
    LEFT JOIN  {catalog_name}.udp_wcm_gold_vcm_dwh.d_product c ON a.product_id = c.product_id
  WHERE
    a.calday BETWEEN DATE(CURRENT_TIMESTAMP() + INTERVAL 7 HOURS) - 2 AND DATE(CURRENT_TIMESTAMP() + INTERVAL 7 HOURS) - 1
    AND c.mch2_id IN ('102','202')
    AND c.mch3_id NOT IN ('10204','10205')
),
bill_04 AS (
  SELECT DISTINCT
    a.calday,
    a.store_id,
    a.transaction_no,
    a.vinid_card_no,
    a.product_id,
    c.cnt_prd total_prd_of_bill,
    d.cnt_bill total_bill_of_store_sku, 
    d.pre_dataset,
    CASE WHEN v1.transaction_no IS NOT NULL THEN 'YES' ELSE 'NO' END is_voucher,
    CASE WHEN a.vinid_card_no IS NOT NULL THEN 'YES' ELSE 'NO' END is_vinid,
    CASE WHEN d1.transaction_no IS NOT NULL THEN 'YES' ELSE 'NO' END is_promo_wcm,
    b.base_sale_qty,
    b.sale_amt,
    b.sale_excl_vat,
    TRY_DIVIDE(b.sale_excl_vat, b.base_sale_qty) sale_per_qty
  FROM
    bill_00 a
    LEFT JOIN bill_01 b ON a.calday = b.calday AND a.store_id = b.store_id AND a.transaction_no = b.transaction_no AND a.product_id = b.product_id 
    LEFT JOIN bill_02 c ON a.transaction_no = c.transaction_no
    LEFT JOIN bill_03 d ON a.store_id = d.store_id AND a.calday = d.calday AND a.product_id = d.product_id AND a.pre_dataset = d.pre_dataset
    LEFT JOIN use_voucher v1 ON a.transaction_no = v1.transaction_no
    LEFT JOIN (SELECT DISTINCT transaction_no, product_id FROM bill_wcm_promo) d1 ON a.transaction_no = d1.transaction_no AND a.product_id = d1.product_id
),
main AS (
  SELECT
    a.*,
    CASE
    WHEN (sale_excl_vat >= 2000000 AND pre_dataset = 'DATASET 1234' 
          AND total_prd_of_bill <= 4 
          AND (is_voucher = 'YES' OR is_vinid = 'YES' OR is_promo_wcm = 'YES'))
         THEN 'DATASET1'
         
    WHEN (sale_excl_vat >= 2000000 AND pre_dataset = 'DATASET 1234' 
          AND total_prd_of_bill <= 4 
          AND (is_voucher = 'NO' AND is_vinid = 'NO' AND is_promo_wcm = 'NO'))
         THEN 'DATASET3'
         
    WHEN (sale_excl_vat < 2000000 
          AND pre_dataset = 'DATASET 1234' 
          AND total_prd_of_bill <= 4 
          AND total_bill_of_store_sku >= 4 
          AND (is_voucher = 'YES' OR is_vinid = 'YES' OR is_promo_wcm = 'YES'))
         THEN 'DATASET2'
         
    WHEN (sale_excl_vat < 2000000 
          AND pre_dataset = 'DATASET 1234' 
          AND total_prd_of_bill <= 4 
          AND total_bill_of_store_sku >= 4 
          AND (is_voucher = 'NO' AND is_vinid = 'NO' AND is_promo_wcm = 'NO'))
         THEN 'DATASET4'
         
    WHEN (a.sale_excl_vat < 800000 
          AND pre_dataset = 'DATASET5' 
          AND total_bill_of_store_sku >= 15 
          AND total_prd_of_bill <= 4)
         THEN 'DATASET5' 
    ELSE 'NA'
END AS dataset
FROM bill_04 a
WHERE 1 = 1

)
SELECT * FROM main WHERE dataset <> 'NA'          
""")

In [0]:
spark.sql(f"""
  DELETE FROM {catalog_name}.udp_wcm_gold_vcm_dmt.a_finance_bill_sll WHERE calday BETWEEN DATE(CURRENT_TIMESTAMP() + INTERVAL 7 HOURS) - 2 AND DATE(CURRENT_TIMESTAMP() + INTERVAL 7 HOURS) - 1;
""")

In [0]:
spark.sql(f"""
INSERT INTO {catalog_name}.udp_wcm_gold_vcm_dmt.a_finance_bill_sll
SELECT
  calday, store_id, transaction_no, vinid_card_no, product_id, total_prd_of_bill, total_bill_of_store_sku, pre_dataset, is_voucher, is_vinid, is_promo_wcm, base_sale_qty, sale_amt, sale_excl_vat, sale_per_qty, dataset
FROM temp_a_finance_bill_sll
""")