In [0]:
import pyspark.sql.functions as F
from pyspark.sql.functions import col, trim
from pyspark.sql.types import StringType

## Reading from Bronze


In [0]:
df = spark.table('workspace.bronze.crm_cust_info')
df.display()


### Renaming


In [0]:

RENAME_MAP = {
    "cst_id": "customer_id",
    "cst_key": "customer_number",
    "cst_firstname": "first_name",
    "cst_lastname": "last_name",
    "cst_marital_status": "marital_status",
    "cst_gndr": "gender",
    "cst_create_date": "created_date"
}
for old_name, new_name in RENAME_MAP.items():
    df = df.withColumnRenamed(old_name, new_name)

##Transformation
####1.Trim


In [0]:

for field in df.schema.fields:
    if isinstance(field.dataType, StringType):
        df = df.withColumn(field.name, trim(col(field.name)))


In [0]:
df.display()

####Normalization

In [0]:

df = (
    df
    .withColumn(
        "marital_status",
        F.when(F.col("marital_status").isNull(), None)
         .when(F.upper(F.col("marital_status")) == "S", "Single")
         .when(F.upper(F.col("marital_status")) == "M", "Married")
         .otherwise("n/a")
    )
    .withColumn(
        "gender",
        F.when(F.col("gender").isNull(), None)
         .when(F.upper(F.col("gender")) == "F", "Female")
         .when(F.upper(F.col("gender")) == "M", "Male")
         .otherwise("n/a")
    )
)


In [0]:
df.display()

##Writing to Silver


In [0]:
df.write.mode("overwrite").saveAsTable("workspace.silver.crm_customers")

In [0]:
%sql
select * from workspace.silver.crm_customers limit 10