In [0]:
import pyspark.sql.functions as F
from pyspark.sql.types import StringType

df = spark.read.table("workspace.bronze.crm_sales_details")


Transform Data

Trimming

In [0]:
for field in df.schema.fields:
  if isinstance(field.dataType, StringType):
    df = df.withColumn(field.name, F.trim(F.col(field.name)))

**Date Correction**

In [0]:
df= (df
     .withColumn("sls_order_dt",
                 F.when((F.col("sls_order_dt")== 0) | (F.length(F.col("sls_order_dt")) !=8),None)
                 .otherwise(F.to_date(F.col("sls_order_dt").cast("string"), "yyyyMMdd"))
                 )
     
     .withColumn("sls_ship_dt",
                 F.when((F.col("sls_ship_dt")== 0) | (F.length(F.col("sls_ship_dt")) !=8),None)
                 .otherwise(F.to_date(F.col("sls_ship_dt").cast("string"), "yyyyMMdd"))
                 )
     
     .withColumn("sls_due_dt",
                 F.when((F.col("sls_due_dt")== 0) | (F.length(F.col("sls_due_dt")) !=8),None)
                 .otherwise(F.to_date(F.col("sls_due_dt").cast("string"), "yyyyMMdd"))
                 )  
     )

**Price Corrections**

In [0]:
df = (
    df
    .withColumn(
        "sls_price",
        F.when(
            (F.col("sls_price").isNull()) | (F.col("sls_price") <= 0),
            F.when(
                F.col("sls_quantity") != 0,
                F.col("sls_sales") / F.col("sls_quantity")
            ).otherwise(None)
        ).otherwise(F.col("sls_price"))
    )
)

**Rename column**

In [0]:
RENAME_MAP = {
    "sls_ord_num": "order_number",
    "sls_prd_key": "product_number",
    "sls_cust_id": "customer_id",
    "sls_order_dt": "order_date",
    "sls_ship_dt": "ship_date",
    "sls_due_dt": "due_date",
    "sls_sales": "sales_amount",
    "sls_quantity": "quantity",
    "sls_price": "price"
}
for old_name, new_name in RENAME_MAP.items():
    df= df.withColumnRenamed(old_name, new_name)


**Write into Silver Table**

In [0]:
spark.sql("DROP TABLE IF EXISTS silver.crm_sales_detail")
df.write.mode("overwrite").format("delta").saveAsTable("silver.crm_sales_detail")