In [0]:
%sql
select * from delta.`/mnt/bronze/multiplexbronzetable` where source='payments';

In [0]:
df=spark.readStream.format("delta").load("/mnt/bronze/multiplexbronzetable").filter("source=='payments'")

In [0]:
from pyspark.sql.types import StructType, StructField, StringType, TimestampType, DoubleType, BooleanType
payment_schema = StructType([
    StructField("payment_id", StringType(), True),
    StructField("order_id", StringType(), True),
    StructField("customer_id", StringType(), True),
    StructField("payment_date", TimestampType(), True),
    StructField("payment_method", StringType(), True),
    StructField("payment_status", StringType(), True),
    StructField("transaction_amount", DoubleType(), True),
    StructField("currency", StringType(), True),
    StructField("payment_provider", StringType(), True),
    StructField("fraud_check_passed", BooleanType(), True)
])

In [0]:
from pyspark.sql.functions import from_json, col, cast

In [0]:
parsed_df=df.withColumn("parsed",from_json(col('raw_payload').cast('string'),payment_schema)).select('parsed.*','ingesttimestamp')\
    
    
    
deduped_df= parsed_df\
        .withWatermark('ingesttimestamp','10 minutes')\
        .dropDuplicates(['payment_id'])

In [0]:
display(deduped_df)

In [0]:
%sql
create table if not exists payments_silver(
  payment_id string,order_id string,customer_id string,payment_date timestamp,payment_method string,payment_status string,transaction_amount double,currency string,payment_provider string,fraud_check_passed boolean,ingesttimestamp timestamp
) 
partitioned by (ingesttimestamp)
Location "/mnt/silver/payments_silver";

In [0]:
deduped_df.writeStream \
    .format("delta") \
    .option("checkpointLocation", "/mnt/checkpoint/silver/payments2") \
    .outputMode("append") \
    .option("mergeSchema", "true") \
    .partitionBy("ingesttimestamp") \
    .start("/mnt/silver/payments_silver")

In [0]:
%sql
select * from payments_silver;