### **Reading Raw ingested data from bronze layer**

In [0]:
from pyspark.sql.functions import *
from pyspark.sql.window import *

In [0]:
df_employee = spark.read.format("parquet").load("abfss://bronze@stretailenvdev.dfs.core.windows.net/f_Employee")

In [0]:
df_employee.display()

In [0]:
df_employee.printSchema()

**Handling Duplicates**

In [0]:
window = Window.partitionBy("EmployeeID").orderBy(col("HireDate").desc())
df_employee = df_employee.withColumn("rn", row_number().over(window)).filter(col("rn") == 1).drop("rn")


**Handling Nulls**

In [0]:
df_employee = df_employee.fillna({"FirstName": "N/A", "LastName": "N/A", "Role": "N/A", "StoreID": "0"})

### **Upsert (Merge) logic into Silver Delta Table**

In [0]:
silver_path = "abfss://silver@stretailenvdev.dfs.core.windows.net/s_Employee"
from delta.tables import DeltaTable

if DeltaTable.isDeltaTable(spark, silver_path):
    silver_table = DeltaTable.forPath(spark, silver_path)
    silver_table.alias("tgt")\
        .merge(
            df_employee.alias("src"),
            "tgt.EmployeeID = src.EmployeeID"
        )\
        .whenMatchedUpdateAll()\
        .whenNotMatchedInsertAll()\
        .execute()
else:
    df_employee.write.format("delta").mode("overwrite").save(silver_path)