In [0]:
import pyspark.sql.functions as F
from pyspark.sql.window import Window

In [0]:
watermark_ts = spark.sql("select * from otc.silver.watermark where table_name='payments' ").select('watermark_ts').collect()[0][0]

eligible_payments_records = spark.sql(f"""
                                      select * from otc.bronze.src_payments
                                      where ingest_ts > TIMESTAMP '{watermark_ts}'
                                      """)

# Dedup in silver
eligible_payments_records = eligible_payments_records.withColumn('SequenceOfRecord', F.row_number().over( Window.partitionBy('payment_event_id').orderBy( F.desc('ingest_ts') ) ) )\
    .filter( F.col('SequenceOfRecord') == 1)\
        .drop('SequenceOfRecord')

eligible_payments_records = eligible_payments_records.withColumn('amount', F.col('amount').cast('decimal(10,2)'))
eligible_payments_records.display()
eligible_payments_records.createOrReplaceTempView("eligible_payments_records")

In [0]:
# from pyspark.sql.window import Window

# schema = "struct< attempts:array< struct<attempt_no:string, attempt_ts:string, failure_reason:string, result:string>>, currency:string, gateway:string, method:string>"

# eligible_payments_records\
#   .withColumn(
#       "payment_payload",
#           F.regexp_replace(F.col("payment_payload"), "None", "null"),
#       )\
#     .withColumn('intermediate_status', F.from_json('payment_payload', schema))\
#       .select("*", F.inline( F.col('intermediate_status').attempts ))\
#         .withColumn('MaxAttempt', F.max('attempt_no').over( Window.partitionBy('payment_event_id') ))\
#           .filter( F.col('attempt_no') == F.col('MaxAttempt') )\
#             .withColumn('payment_status', F.col('result'))\
#               .withColumn('amount', F.col('amount').cast("decimal(10,2)") )\
#                 .display()

## Ingestion

In [0]:
%sql
describe otc.silver.payments

In [0]:
%sql

INSERT INTO otc.silver.payments (payment_event_id, order_id, payment_ts, payment_status, amount, ingest_ts)
SELECT payment_event_id, order_id, payment_ts, payment_status, amount, ingest_ts FROM eligible_payments_records

In [0]:
%sql
UPDATE otc.silver.watermark
  SET watermark_ts = (select max(ingest_ts) from otc.silver.payments)
  WHERE table_name = 'payments'

In [0]:
%sql
select * from otc.silver.payments

In [0]:
%sql
select * from otc.silver.watermark