In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *
from delta.tables import *


**Define paths**

In [0]:
silver_path ="abfss://silver@stretailenvdev.dfs.core.windows.net/s_Products"
gold_path = "abfss://gold@stretailenvdev.dfs.core.windows.net/dim_products"

In [0]:
df= spark.read.format("delta").load(silver_path)

In [0]:
df.display()

In [0]:
df.printSchema()

**Creare delta lake dim_product if not exist**

In [0]:
spark.sql(f"""
          CREATE TABLE IF NOT EXISTS retail_cata.gold.dim_product(
              product_key BIGINT GENERATED ALWAYS AS IDENTITY(START WITH 1 INCREMENT BY 1),
              ProductID  INT,
              ProductName  STRING,
              CategoryID  INT,
              SubCategoryID INT,
              Price DECIMAL(18,2),
              LaunchDate TIMESTAMP,
              startdate TIMESTAMP,
              enddate TIMESTAMP,
              isActive BOOLEAN
          )USING DELTA
          LOCATION '{gold_path}'
          """)

**Load silver data**

In [0]:
silver_df = spark.read.format("delta").load(silver_path)

In [0]:
silver_df.display()

**Add SCD2 columns**

In [0]:
stg_product = silver_df.withColumn("startdate", current_date())\
                       .withColumn("enddate", lit(None).cast("timestamp"))\
                       .withColumn("isActive", lit(True))\
                       

In [0]:
stg_product.limit(10).display()


**Load Gold Delta Table**

In [0]:
if DeltaTable.isDeltaTable(spark, gold_path):
    dim_product = DeltaTable.forPath(spark, gold_path)
else:
    stg_product.write.format("delta").mode("append").save(gold_path)
    dim_product = DeltaTable.forPath(spark, gold_path)

**Merge SCD2**

In [0]:
(
    dim_product.alias("t")
    .merge(
        stg_product.alias("s"),
        "t.ProductID = s.ProductID and t.isActive = TRUE"
    )
    .whenMatchedUpdate(
        condition="""
            t.ProductName <> s.ProductName OR
            t.CategoryID <> s.CategoryID OR
            t.SubCategoryID <> s.SubCategoryID OR
            t.Price <> s.Price OR
            t.LaunchDate <> s.LaunchDate
         """,
         set={
             "enddate": "current_date()",
             "isActive": "False"
         }
    )
    .whenNotMatchedInsert(
        values={
         "ProductID": "s.ProductID",
            "ProductName": "s.ProductName",
            "CategoryID": "s.CategoryID",
            "SubCategoryID": "s.SubCategoryID",
            "Price": "s.Price",
            "LaunchDate": "s.LaunchDate",
            "startdate": "s.startdate",
            "enddate": "s.enddate",
            "isActive": "s.isActive"
        }
    )
    .execute()
)

In [0]:
%sql
SELECT *
FROM retail_cata.gold.dim_product
ORDER BY product_key DESC
LIMIT 10;