In [0]:
import pyspark.sql.functions as F
from pyspark.sql.types import StringType

df= spark.read.table("workspace.bronze.erp_customerinfo")

**Triming**

In [0]:
for field in df.schema.fields:
    if isinstance(field.dataType, StringType):
        df = df.withColumn(field.name, F.trim(F.col(field.name)))


In [0]:
%sql
Select * from workspace.bronze.erp_customerinfo

Normalisation

In [0]:
df=df.withColumn("GEN",
                 F.when(F.col("GEN") == "M", "Male")
                 .when(F.col("GEN") == "F", "Female")
                 .otherwise(F.col("GEN")))


**Birthdate Correction**

In [0]:
df= df.withColumn("BDATE",
                  F.when(F.col("BDATE") > F.current_date() , None)
                  .otherwise(F.col("BDATE"))
                )

**Customer ID correction**

In [0]:
df = df.withColumn("CID",
                   F.when(F.col("CID").startswith("NAS"), 
                            F.substring(F.col("CID"), 4, F.length(F.col("CID"))))
                   .otherwise(F.col("CID"))
                    )

**Renaming Columns**

In [0]:
RENAME_MAP = {
    "cid": "customer_number",
    "bdate": "birth_date",
    "gen": "gender"
}
for old_name, new_name in RENAME_MAP.items():
    df = df.withColumnRenamed(old_name, new_name)
     

Writing into Silver table

In [0]:
spark.sql("DROP TABLE IF EXISTS workspace.silver.erp_customers")
df.write.mode("overwrite").format("delta").saveAsTable("workspace.silver.erp_customers")