## INITIALIZATION

In [0]:
import pyspark.sql.functions as F
from pyspark.sql.types import StringType
from pyspark.sql.functions import trim, col

In [0]:
RENAME_MAP = {
    "GEN": "Gender",
    "CID": "Customer_ID",
    "BDATE": "Birth_Date"
}

# Read the Bronze Layer

In [0]:
df = spark.table("workspace.bronze.erp_cust_az12_raw")

# Silver Transformation

## Trimming the Values

In [0]:
# For trimming the whitespace on all the string columents which helps to prevent filter and join issues from messy text
for field in df.schema.fields:
    if isinstance(field.dataType, StringType):
        df = df.withColumn(field.name, trim(col(field.name)))

## Normalization

In [0]:
# Normalize the coded values into readable labels by transfering M, F into full text
df = (
  df
  .withColumn(
        "GEN",
        F.when(F.upper(F.col("GEN")) == "F", "Female")
         .when(F.upper(F.col("GEN")) == "M", "Male")
         .otherwise("n/a")
    )
)

## Renaming the Columns

In [0]:
# Rename the columns using the mapping from raw to standardized
for old_name, new_name in RENAME_MAP.items():
    df = df.withColumnRenamed(old_name, new_name)

# Write the Silver Layer

In [0]:
# We write a silver table that is cleaned + standardized customer dimension-style table
(
    df.write
    .mode("overwrite")
    .format("delta")
    .saveAsTable("workspace.silver.erp_customers")

)

In [0]:
%sql
-- Quick Check
SELECT * FROM workspace.silver.erp_customers