In [0]:

import pyspark.sql.functions as F
from pyspark.sql.types import StringType
from pyspark.sql.functions import trim, col

In [0]:
# read from the bronze layer
df = spark.table("workspace.bronze.erp_cust_az12")

Silver Transformation

In [0]:
# trim the white spaces 
for field in df.schema:
  if isinstance(field.dataType, StringType):
    df = df.withColumn(field.name, trim(col(field.name)))


In [0]:
df.display()

In [0]:
# Birthdate validation
# check if the birthdate is not later than today

df = df.withColumn(
    "bdate",
    F.when(col("bdate") > F.current_date(), None)
     .otherwise(col("bdate"))
)
# gender validation
df = df.withColumn(
    "gen",
    F.when(F.upper(col("gen")).isin("F", "FEMALE"), "Female")
     .when(F.upper(col("gen")).isin("M", "MALE"), "Male")
     .otherwise("n/a")
)


In [0]:
df.display()

In [0]:
# rename column names
Rename_map = {
    "cid": "customer_number",
    "bdate": "birth_date",
    "gen": "gender"

}
for old_name, new_name in Rename_map.items():
    df = df.withColumnRenamed(old_name, new_name)

# sanity check
df.display()

Write to silver table 

In [0]:
df.write.mode("overwrite").format("delta").saveAsTable("workspace.silver.erp_customers")

In [0]:
%sql 
select * from workspace.silver.erp_customers limit 5