In [0]:
from pyspark.sql.functions import *
from pyspark.sql import *
from pyspark.sql.types import StructType, StructField, StringType, IntegerType

In [0]:
data = [(1000,"Michael","Columbus","USA",68945682)]

mySchema = StructType([StructField("emp_id",IntegerType()),
                       StructField("name",StringType()),
                       StructField("city",StringType()),
                       StructField("country",StringType()),
                       StructField("contact_no",IntegerType())])

source_df = spark.createDataFrame(data,mySchema)

source_df.show()

Now Lets Create a Delta Table

In [0]:
%sql
Create Or Replace table dim_employee(
  emp_id INT,
  name string,
  city string,
  country string,
  contact_no int
)USING DELTA;

#### SQL - Approach Now Perform Merge Operation to perform SCD - Type 1

First We need Create the table or view of source df to perform SCD- Type 1 in SQL Approach

In [0]:
source_df.createOrReplaceTempView("source_view")

In [0]:
%sql
select * from source_view

Now Lets Perform MERGE operation in SQL

In [0]:
%sql
MERGE INTO dim_employee as target_tbl
USING source_view as source_tbl
ON target_tbl.emp_id = source_tbl.emp_id
WHEN MATCHED
THEN 
UPDATE SET
target_tbl.name = source_tbl.name,
target_tbl.city = source_tbl.city,
target_tbl.country = source_tbl.country,
target_tbl.contact_no = source_tbl.contact_no
WHEN NOT MATCHED
THEN
INSERT(emp_id,name,city,country,contact_no) VALUES(emp_id,name,city,country,contact_no)

For SCD Type 1 We have to create Delta Table as Dataframe

In [0]:
from delta.tables import DeltaTable

In [0]:
delta_df = DeltaTable.forPath(spark,"/path/to_Cloud/Storage/")

Lets Perform Merge Operation

In [0]:
delta_df.alias("target_df").merge(source_df, condition="target_df.emp_id"="source_df.emp_id")
.whenMatchedUpdate(set = 
                   {
                     "name":"source_df.name",
                     "city":"source_df.city",
                     "country":"source_df.country",
                     "contact_no":"source_df.contact_no"
                   })
                   .whenNotMatchedInsert(values = 
                                         {
                                           "emp_id":"source_df.emp_id",
                                           "name":"source_df.name",
                                           "city":"source_df.city",
                                           "country":"source_df.country",
                                           "contact_no":"source_df.contact_no"
                                         }).execute()

In [0]:
%sql
select * from dim_employee