### **Import packages**

In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *
from pyspark.sql.window import Window
from delta.tables import DeltaTable

### **Define paths**

In [0]:
silver_path = "abfss://silver@stfinancedev.dfs.core.windows.net/s_accounts"
gold_path = "abfss://gold@stfinancedev.dfs.core.windows.net/dim_accounts"

### **Create delta lake table schema**

In [0]:
spark.sql(f"""
    CREATE TABLE IF NOT EXISTS finance_cata.gold.dim_accounts(
        Accountkey BIGINT GENERATED ALWAYS AS IDENTITY(START WITH 1 INCREMENT BY 1),
        AccountID  STRING,
        CustomerID  STRING,
        BranchID   STRING,
        AccountType STRING,
        Balance DECIMAL(10,2),
        Currency STRING,
        AccountStatus STRING,
        DateOpened TIMESTAMP,
        HandledByEmployeeID STRING,
        IFSC STRING,
        StartDate TIMESTAMP,
        EndDate TIMESTAMP,
        IsActive BOOLEAN
    )USING DELTA
    LOCATION '{gold_path}'
    """)

In [0]:
spark.read.format("delta").load(silver_path).printSchema()

### **Load silver data**

In [0]:
silver_df = spark.read.format("delta").load(silver_path)

### **Add SCD 2 columns**

In [0]:
silver_df = silver_df.withColumn("StartDate", current_timestamp())\
                     .withColumn("EndDate", lit(None).cast("timestamp"))\
                     .withColumn("IsActive", lit(True))

In [0]:
silver_df.display()

### **Load gold data**

In [0]:
if DeltaTable.isDeltaTable(spark, gold_path):
    dim_accounts = DeltaTable.forPath(spark, gold_path)
else:
    # First-time load 
    silver_df.write.format("delta").mode("append").save(gold_path)
    dim_accounts = DeltaTable.forPath(spark, gold_path)

### **Delta merge & upsert data in gold layer**

In [0]:
(
    dim_accounts.alias("t")
    .merge(
        silver_df.alias("s"),
        "t.AccountID = s.AccountID AND t.IsActive = true"
    )
    .whenMatchedUpdate(
        condition="""
        t.CustomerID <> s.CustomerID OR
        t.BranchID <> s.BranchID OR
        t.AccountType <> s.AccountType OR
        t.Balance <> s.Balance OR
        t.Currency <> s.Currency OR
        t.AccountStatus <> s.AccountStatus OR
        t.DateOpened <> s.DateOpened OR
        t.HandledByEmployeeID <> s.HandledByEmployeeID OR
        t.IFSC <> s.IFSC OR
             """,
        set={
            "EndDate": "current_timestamp()",
            "IsActive": "false"
            }
    )
    .whenNotMatchedInsert(
        values ={
            "AccountID": "s.AccountID",
            "CustomerID": "s.CustomerID",
            "BranchID": "s.BranchID",
            "AccountType": "s.AccountType",
            "Balance": "s.Balance",
            "Currency": "s.Currency",
            "AccountStatus": "s.AccountStatus",
            "DateOpened": "s.DateOpened",
            "HandledByEmployeeID": "s.HandledByEmployeeID",
            "IFSC": "s.IFSC",
            "StartDate": "s.StartDate",
            "EndDate": "s.EndDate",
            "IsActive": "s.IsActive"
        }

    )
    .execute()

)

In [0]:
%sql
select * from finance_cata.gold.dim_accounts