# Inint

In [0]:
import pyspark.sql.functions as F
from pyspark.sql.types import StringType
from pyspark.sql.functions import trim, col

# Reading from Bronze

In [0]:
df = spark.table("workspace.bronze.crm_cust_info")

# Data transformation

## Trim the string

In [0]:
# trim the string
for field in df.schema.fields:
  if field.dataType == StringType():
    df = df.withColumn(field.name, trim(col(field.name)))

display(df)

## Normalizing the marital status, gender

In [0]:
# normalize the marital status, gender
df = (
  df.withColumn("cst_marital_status", 
                F.when(F.col("cst_marital_status") == "M", "Married")
                  .when(F.col("cst_marital_status") == "S", "Single")
  )
  .withColumn("cst_gndr", 
                F.when(F.col("cst_gndr") == "M", "Male")
                  .when(F.col("cst_gndr") == "F", "Female")
                  .otherwise("n/a")
  )
)

display(df)

## Renaming the column names

In [0]:
rename_map = {
    "cst_id": "customer_id",
    "cst_key": "customer_key",
    "cst_firstname": "firstname",
    "cst_lastname": "lastname",
    "cst_marital_status": "marital_status",
    "cst_gndr": "gender",
    "cst_create_date": "created_date"
}

In [0]:
for old_name, new_name in rename_map.items():
    df = df.withColumnRenamed(old_name, new_name)

display(df)

# Write to Silver Table

In [0]:
(df.write
  .format("delta")
  .mode("overwrite")
  .option("overwriteSchema", "true")
  .saveAsTable("silver.crm_customers")
)