In [0]:
from pyspark.sql.functions import *
from delta.tables import DeltaTable

In [0]:
container = dbutils.secrets.get("scope-mbc", "secret-env-container")
storage_account = dbutils.secrets.get("scope-mbc", "secret-env-storage-account")
path_base = f"abfss://{container}@{storage_account}.dfs.core.windows.net"

path_checkpoint = f"{path_base}/checkpoints/silver/silver.products/"

In [0]:
def upsert_to_silver(batch_df, batch_id):

    # 1. Transformaciones de casteo
    df_transformed = (
        batch_df
        .select(
            col("product_id").cast("int"),
            col("product_name").cast("string"),
            col("brand_id").cast("int"),
            col("category_id").cast("int"),
            col("model_year").cast("smallint"),
            col("list_price").cast("decimal(10,2)")
        )
        .dropDuplicates(["product_id"])  # deduplicación 
    )

    # 2. Upsert con MERGE a tabla destino Delta
    delta_target = DeltaTable.forName(spark, "silver.products")

    (
        delta_target.alias("target")
        .merge(
            df_transformed.alias("source"),
            "target.product_id = source.product_id"
        )
        .whenMatchedUpdateAll()
        .whenNotMatchedInsertAll()
        .execute()
    )

In [0]:
df_stream = (
    spark.readStream
    .table("kyndryl.bronze.products")
    .writeStream
    .foreachBatch(upsert_to_silver)
    .option("checkpointLocation", path_checkpoint)
    .outputMode("append")  # o "append"
    .start()
)