### **Import packages**

In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *
from pyspark.sql.window import Window
from delta.tables import DeltaTable

### **Define paths**

In [0]:
silver_path = "abfss://silver@stfinancedev.dfs.core.windows.net/s_employees"
gold_path = "abfss://gold@stfinancedev.dfs.core.windows.net/dim_employees"

### **Create delta table schema**

In [0]:
spark.sql(f"""
          CREATE TABLE IF NOT EXISTS finance_cata.gold.dim_employees(
              Employeekey BIGINT GENERATED ALWAYS AS IDENTITY(START WITH 1 INCREMENT BY 1),
              EmployeeID STRING,
              FirstName STRING,
              LastName STRING,
              DateOfBirth DATE,
              Email STRING,
              PhoneNumber STRING,
              JobTitle STRING,
              BranchID STRING,
              DateOfJoining TIMESTAMP,
              PAN STRING,
              Aadhaar STRING,
              StartDate TIMESTAMP,
              EndDate TIMESTAMP,
              IsActive BOOLEAN    
        )USING DELTA
        LOCATION '{gold_path}'
        """)

### **Load silver data**

In [0]:
silver_df = spark.read.format("delta").load(silver_path)

### ****Add SCD 2 columns****

In [0]:
silver_df = silver_df.withColumn("DateOfBirth", col("DateOfBirth").cast("date"))\
                     .withColumn("DateOfJoining", col("DateOfJoining").cast("timestamp"))\
                     .withColumn("StartDate", current_timestamp())\
                     .withColumn("EndDate", lit(None).cast("timestamp"))\
                     .withColumn("IsActive", lit(True))

In [0]:
silver_df.display()

### **Load gold data**

In [0]:
if DeltaTable.isDeltaTable(spark, gold_path):
    dim_employees = DeltaTable.forPath(spark, gold_path)
else:
    # First-time load 
    silver_df.write.format("delta").mode("append").save(gold_path)
    dim_employees = DeltaTable.forPath(spark, gold_path)

### **Delta merge & Load data in gold layer**

In [0]:
(
    dim_employees.alias("t")
    .merge(
        silver_df.alias("s"),
        "t.EmployeeID = s.EmployeeID AND t.IsActive = true"
    )
    .whenMatchedUpdate(
        condition="""
        t.FirstName <> s.FirstName OR
        t.LastName <> s.LastName OR
        t.DateOfBirth <> s.DateOfBirth OR
        t.Email <> s.Email OR
        t.PhoneNumber <> s.PhoneNumber OR
        t.JobTitle <> s.JobTitle OR
        t.BranchID <> s.BranchID OR
        t.DateOfJoining <> s.DateOfJoining OR
        t.PAN <> s.PAN OR
        t.Aadhaar <> s.Aadhaar 
        """,
        set={
            "EndDate" : current_timestamp(),
            "IsActive" : "false"
        }
    )
    .whenNotMatchedInsert(
        values ={
            "EmployeeID" : "s.EmployeeID",
            "FirstName" : "s.FirstName",
            "LastName" : "s.LastName",
            "DateOfBirth" : "s.DateOfBirth",
            "Email" : "s.Email",
            "PhoneNumber" : "s.PhoneNumber",
            "JobTitle" : "s.JobTitle",
            "BranchID" : "s.BranchID",
            "DateOfJoining" : "s.DateOfJoining",
            "PAN" : "s.PAN",
            "Aadhaar" : "s.Aadhaar",
            "StartDate" : "s.StartDate",
            "EndDate" : "s.EndDate",
            "IsActive" : "s.IsActive"
        }
    )
    .execute()
)

In [0]:
%sql
select * from finance_cata.gold.dim_employees