### **Reading raw Ingested data from Bronze layer**

In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *
from pyspark.sql.window import *

In [0]:
df_orders = spark.read.format("parquet")\
                  .load("abfss://bronze@stretailenvdev.dfs.core.windows.net/f_Orders")
                  

In [0]:
df_orders.limit(5).display()

### **Incremental Loading in Silver layer**

In [0]:
silver_path = "abfss://silver@stretailenvdev.dfs.core.windows.net/s_Orders"

from delta.tables import DeltaTable

if DeltaTable.isDeltaTable(spark, silver_path):
    last_max_ts = (spark.read.format("delta").load(silver_path)\
                      .agg(max(col("OrderDate")).alias("max_ts"))\
                          .first()[0]["max_tx"])

else: 
    last_max_ts = None

In [0]:
if last_max_ts:
    df_inc = df_orders.filter(col("OrderDate") > lit(last_max_ts))
else:
    df_inc = df_orders

### **Data transformations**

In [0]:
df_inc.printSchema()

**Schema alignment**

In [0]:
df_ord = df_inc.drop("TotalAmount")
df_ord.limit(5).display()

**Handling Duplicates**

In [0]:
window = Window.partitionBy("OrderID").orderBy(col("OrderDate").desc())

df_ord = df_ord.withColumn("rn", row_number().over(window)).filter(col("rn") == 1).drop("rn")

**Handling Nulls**

In [0]:
df_ord = df_ord.filter(col("OrderID").isNotNull())



**Standardization**

In [0]:
silver_ready = df_ord.withColumn("Status", lower(col("Status")))

In [0]:
silver_ready.limit(5).display()

### **Upsert (Merge) logic into Silver Delta Table**

In [0]:
if DeltaTable.isDeltaTable(spark, silver_path):
    silver_tbl = DeltaTable.forPath(spark, silver_path)
    (
         silver_tbl.alias("tgt")
         .merge(
             silver_ready.alias("src"),
             "trg.OrderID = src.OrderID"
         )
         .whenMatchedUpdateALL()
         .whenNotMactchedInsertAll()
         .execute()

     )
else:
    silver_ready.write.format("delta").save(silver_path)