In [0]:
from pyspark.sql.functions import *
from pyspark.sql.window import *
from delta.tables import DeltaTable

**Define paths**

In [0]:
silver_path  = "abfss://silver@stretailenvdev.dfs.core.windows.net/s_Employee"
gold_path = "abfss://gold@stretailenvdev.dfs.core.windows.net/dim_employee"

**Create delta lake dim_employee if not exists**

In [0]:
spark.sql(f"""
          CREATE TABLE IF NOT EXISTS retail_cata.gold.dim_employee(
              Employee_key BIGINT GENERATED ALWAYS AS IDENTITY(START WITH 1 INCREMENT BY 1), 
              EmployeeID   INT,
              FirstName    STRING,
              LastName     STRING,
              Role         STRING,
              StoreID      STRING,
              HireDate     TIMESTAMP,
              StartDate    TIMESTAMP,
              Enddate      TIMESTAMP,
              IsActive     BOOLEAN 
          )USING DELTA
          LOCATION '{gold_path}'
          """)

**Load silver data**

In [0]:
silver_df = spark.read.format("delta").load(silver_path)

**Add SCD2 Columns**

In [0]:
stg_employee = silver_df.withColumn("StartDate",current_date())\
                        .withColumn("EndDate", lit(None).cast("timestamp"))\
                        .withColumn("IsActive", lit(True))

In [0]:
stg_employee.limit(5).display()

**Load Gold Delta Table**

In [0]:
if DeltaTable.isDeltaTable(spark, gold_path):
    dim_employee = DeltaTable.forPath(spark, gold_path)
else:
    # First-time load 
    stg_employee.write.format("delta").mode("append").save(gold_path)
    dim_employee = DeltaTable.forPath(spark, gold_path)




**Merge for SCD2**

In [0]:
(
    dim_employee.alias("t")
    .merge(
        stg_employee.alias("s"),
        "t.EmployeeID = s.EmployeeID AND t.IsActive = true"
    )
    .whenMatchedUpdate(
        condition ="""
             t.FirstName <> s.FirstName OR
             t.LastName <> s.LastName OR
             t.Role <> s.Role OR
             t.StoreID <> s.StoreID OR
             t.HireDate <> s.HireDate 
        """,
        set={
            "EndDate" : "current_date()",
            "IsActive" : "False"
        }
    )
    .whenNotMatchedInsert(
        values={
            "EmployeeID" : "s.EmployeeID",
            "FirstName"  : "s.FirstName",
            "LastName"   : "s.LastName",
            "Role"       : "s.Role",
            "StoreID"    : "s.StoreID",
            "HireDate"   : "s.HireDate",
            "StartDate"  : "s.StartDate",
            "EndDate"    : "s.EndDate",
            "IsActive"   : "s.IsActive"
        }
    )
    .execute()
)

In [0]:
%sql
SELECT *
FROM retail_cata.gold.dim_employee
ORDER BY employee_key DESC
LIMIT 10