### **Reading Raw ingested data from Bronze layer**

In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *
from pyspark.sql.window import *

In [0]:
df_return = spark.read.format("parquet").load("abfss://bronze@stretailenvdev.dfs.core.windows.net/f_returns")

In [0]:
df_return.limit(10).display()

In [0]:
df_return.printSchema()

### **Incremental loading in silver layer**

In [0]:
silver_path = "abfss://silver@stretailenvdev.dfs.core.windows.net/s_returns"

from delta.tables import DeltaTable

if DeltaTable.isDeltaTable(spark, silver_path):
    last_max_ts = spark.read.format("delta").load(silver_path)\
                        .agg(max(col("ReturnDate")).alias("max_ts"))\
                        .first()["max_ts"]
else:
    last_max_ts = None

In [0]:
if last_max_ts:
    df_inc = df_return.filter(col("ReturnDate") > lit(last_max_ts))
else:
    df_inc = df_return

### **Data Transformation**

**Remove duplicates**

In [0]:
df_trn = df_inc.dropDuplicates(["ReturnID"])

**Handling Null**

In [0]:
df_trn = df_trn.fillna({"Quantity" : 0, "Reason" : "Unknown"})

**Standardization**

In [0]:
df_trn = df_trn.withColumn("Reason", lower(col("Reason")))

In [0]:
df_trn.limit(5).display()

### **Upsert (Merge) logic into Silver Delta Table**

In [0]:
if DeltaTable.isDeltaTable(spark, silver_path):
    silver_table = DeltaTable.forPath(spark, silver_path)
    (
      silver_table.alias("tgt")
      .merge(
          df_trn.alias("src"),
          "trt.ReturnID = src.ReturnID"
      )
      .whenMatchedUpdateAll()
      .whenNotMatchedInsertAll()
      .execute  
    )
else:
    df_trn.write.format("delta").mode("overwrite").save(silver_path)

    