### **Import packages**

In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *
from pyspark.sql.window import Window
from delta.tables import DeltaTable

### **Define paths**

In [0]:
silver_path = "abfss://silver@stfinancedev.dfs.core.windows.net/s_customers"
gold_path = "abfss://gold@stfinancedev.dfs.core.windows.net/dim_customers"


### **Create dim_customers delta table schema**

In [0]:
%sql
--create catalog finance_cata
---create schema finance_cata.gold

In [0]:
#define schema 
spark.sql(f"""
          CREATE TABLE IF NOT EXISTS finance_cata.gold.dim_customers(
                CustomerKey BIGINT GENERATED ALWAYS AS IDENTITY(START WITH 1 INCREMENT BY 1),
                CustomerID INTEGER,
                FirstName STRING,
                LastName  STRING,
                DateOfBirth DATE,
                Gender  STRING,
                Email STRING,
                PhoneNumber STRING,
                Address STRING,
                City STRING,
                State STRING,
                Pincode STRING,
                DateOfRegistration TIMESTAMP,
                CustomerType STRING,
                PAN STRING,
                Aadhaar STRING,
                StartDate TIMESTAMP,
                EndDate TIMESTAMP,
                IsActive BOOLEAN
          )USING DELTA
          LOCATION '{gold_path}'
          """)

### **Load silver data**

In [0]:
silver_df = spark.read.format("delta").load(silver_path)

### **Add SCD 2 columns**

In [0]:
silver_df = silver_df.withColumn("StartDate", current_timestamp())\
                     .withColumn("EndDate", lit(None).cast("timestamp"))\
                     .withColumn("IsActive", lit(True))

In [0]:
silver_df.display()

### **Load gold data**

In [0]:
if DeltaTable.isDeltaTable(spark, gold_path):
    dim_customer = DeltaTable.forPath(spark, gold_path)
else:
    # First-time load 
    silver_df.write.format("delta").mode("append").save(gold_path)
    dim_customer = DeltaTable.forPath(spark, gold_path)

### **Delta merge & Load data in gold layer**

In [0]:
(
    dim_customer.alias("t")
    .merge(
        silver_df.alias("s"),
        "t.CustomerID = s.CustomerID AND t.IsActive = true"
    )
    .whenMatchedUpdate(
        condition="""
        t.FirstName <> s.FirstName OR
        t.LastName <> s.LastName OR
        t.DateOfBirth <> s.DateOfBirth OR
        t.Gender <> s.Gender OR
        t.Email <> s.Email OR
        t.PhoneNumber <> s.PhoneNumber OR
        t.Address <> s.Address OR
        t.City <> s.City OR
        t.State <> s.State OR
        t.Pincode <> s.Pincode OR
        t.DateOfRegistration <> s.DateOfRegistration OR
        t.CustomerType <> s.CustomerType OR
        t.PAN <> s.PAN OR
        t.Aadhaar <> s.Aadhaar""",
        set={
            "EndDate" : "current_timestamp()",
            "IsActive" : "False"
        }
    )
    .whenNotMatchedInsert(
        values={
            "CustomerID": "s.CustomerID",
            "FirstName": "s.FirstName",
            "LastName": "s.LastName",
            "DateOfBirth": "s.DateOfBirth",
            "Gender": "s.Gender",
            "Email": "s.Email",
            "PhoneNumber": "s.PhoneNumber",
            "Address": "s.Address",
            "City": "s.City",
            "State": "s.State",
            "Pincode": "s.Pincode",
            "DateOfRegistration": "s.DateOfRegistration",
            "CustomerType": "s.CustomerType",
            "PAN": "s.PAN",
            "Aadhaar": "s.Aadhaar",
            "StartDate": "s.StartDate",
            "EndDate": "s.EndDate",
            "IsActive": "s.IsActive"
        }
    )
    .execute()
)

In [0]:
%sql
select * from finance_cata.gold.dim_customers;