In [0]:
import pyspark.sql.functions as F
from pyspark.sql.window import Window

In [0]:
watermark_ts = spark.sql("select * from otc.silver.watermark where table_name='orders' ").select('watermark_ts').collect()[0][0]

eligible_orders_records = spark.sql(f"""
                                      select * from otc.bronze.src_orders
                                      where ingest_ts > TIMESTAMP '{watermark_ts}'
                                      """)

# Dedup in silver
eligible_orders_records = eligible_orders_records.withColumn('SequenceOfRecord', F.row_number().over( Window.partitionBy('order_id').orderBy( F.desc('ingest_ts') ) ) )\
    .filter( F.col('SequenceOfRecord') == 1)\
        .drop('SequenceOfRecord')

eligible_orders_records.display()
eligible_orders_records.createOrReplaceTempView("eligible_orders_records")

In [0]:
%sql

MERGE INTO otc.silver.orders AS c
USING eligible_orders_records AS ec
ON c.order_id = ec.order_id
WHEN MATCHED THEN 
    UPDATE SET
    order_id = ec.order_id,
    customer_id = ec.customer_id,
    order_ts = ec.order_ts,
    order_status = ec.order_status,
    updated_at = ec.updated_at,
    ingest_ts = ec.ingest_ts
WHEN NOT MATCHED THEN
    INSERT (order_id, customer_id, order_ts, order_status, updated_at, ingest_ts)
    VALUES (ec.order_id, ec.customer_id, ec.order_ts, ec.order_status, ec.updated_at, ec.ingest_ts)

In [0]:
%sql
UPDATE otc.silver.watermark
  SET watermark_ts = (select max(ingest_ts) from otc.silver.orders)
  WHERE table_name = 'orders'

In [0]:
%sql
select * from otc.silver.orders

In [0]:
%sql
select * from otc.silver.watermark