**Transform Data**

**Trim extra space**

In [0]:
import pyspark.sql.functions as F
from pyspark.sql.types import StringType
# TRim the extra spaces

df= spark.read.table("workspace.bronze.crm_product_info")

for field in df.schema.fields:
  if isinstance(field.dataType, StringType):
    df = df.withColumn(field.name, F.trim(F.col(field.name)))

**Normalisation**

In [0]:
df= df.withColumn(
        "prd_line",
        F.when(F.upper(F.col("prd_line")) == "M", "Mountain")
         .when(F.upper(F.col("prd_line")) == "R", "Road")
         .when(F.upper(F.col("prd_line")) == "S", "Other Sales")
         .when(F.upper(F.col("prd_line")) == "T", "Touring")
         .otherwise("n/a")
    )

**Handling null**

In [0]:
df = df.withColumn("prd_cost",F.coalesce(F.col("prd_cost"),F.lit(0)))
display(df)

**Product Ket Parsing**

In [0]:
#parsing the prd_key to match with gold table
df = df.withColumn("cat_id", F.regexp_replace(F.substring(F.col("prd_key"), 1, 5), "-", "_"))
df= df.withColumn("prd_key",F.substring(F.col("prd_key"),7,F.length(F.col("prd_key"))))
display(df)   

**Renaming columns**

In [0]:
Rename_Map={
    "prd_id": "product_id",
    "cat_id": "category_id",
    "prd_key": "product_number",
    "prd_nm": "product_name",
    "prd_cost": "product_cost",
    "prd_line": "product_line",
    "prd_start_dt": "start_date",
    "prd_end_dt": "end_date"
}
for oldname,newname in Rename_Map.items():
    df = df.withColumnRenamed(oldname,newname)
display(df)

**Writing into silver table**

In [0]:
spark.sql("Drop table IF exists silver.crm_Product_info")
df.write.mode("overwrite").format("delta").saveAsTable("silver.crm_Product_info")