In [0]:
%run ../../config/config 

In [0]:
%run ../../config/sqlconfig

In [0]:
from pyspark.sql.types import StructType, StringType, IntegerType, StructField, DecimalType
from pyspark.sql.functions import col, lit,filter
import os

In [0]:
class SalesSilverStream:
    def __init__(self, spark):
        self.spark = spark

    def upsert_to_silver(self, microBatchDF, batchId):

        # Deduplicate within the micro-batch
        dedup_df = microBatchDF.dropDuplicates(["sales_id"])

        # Create temp view
        dedup_df.createOrReplaceTempView("v_sales_batch")

        # MERGE into silver table
        microBatchDF.sparkSession.sql(f"""
            MERGE INTO {catalog}.{schema}.silver_fact_sales t
            USING v_sales_batch s
            ON t.sales_id = s.sales_id
            WHEN NOT MATCHED THEN
              INSERT *
        """)

    def run(self):
        (
            self.spark.readStream
            .table(f"{catalog}.{schema}.fact_sales")
            .writeStream
            .foreachBatch(self.upsert_to_silver)
            .option("checkpointLocation", f"{base_path}/_checkpoints/sales_silver")
            .trigger(availableNow=True)
            .start()
        )

        print("Running Sales Silver Stream")

In [0]:
obj = SalesSilverStream(spark)
obj.run()

In [0]:
#dbutils.fs.rm(f"{base_path}/_checkpoints/sales_silver", recurse=True)
