# INITIALIZATION

In [0]:
import pyspark.sql.functions as F
from pyspark.sql.types import StringType
from pyspark.sql.functions import trim, col


In [0]:
# Map the raw CRM column names for cleaner Silver Column names
RENAME_MAP = {
    "cst_id": "customer_id",
    "cst_key": "customer_key",
    "cst_firstname": "first_name",
    "cst_lastname": "last_name",
    "cst_marital_status": "marital_status",
    "cst_gndr": "gender",
    "cst_create_date": "created_date"
}

# Read from Bronze Table

In [0]:
# Create a DataFrame that reads from the Bronze Table
df = spark.table("workspace.bronze.crm_cust_info_raw")

# Silver Data Transformations

## Trimming the values

In [0]:
# For trimming the whitespace on all the string columents which helps to prevent filter and join issues from messy text
for field in df.schema.fields:
    if isinstance(field.dataType, StringType):
        df = df.withColumn(field.name, trim(col(field.name)))


## Normalization

In [0]:
# Normalize the coded values into readable labels by transfering S / M, F / M into full text
df = (
    df
    .withColumn(
        "cst_marital_status",
        F.when(F.upper(F.col("cst_marital_status")) == "S", "Single")
         .when(F.upper(F.col("cst_marital_status")) == "M", "Married")
         .otherwise("n/a")
    )
    .withColumn(
        "cst_gndr",
        F.when(F.upper(F.col("cst_gndr")) == "F", "Female")
         .when(F.upper(F.col("cst_gndr")) == "M", "Male")
         .otherwise("n/a")
    )
)

## Renaming the Columns

In [0]:
# Rename the columns using the mapping from raw to standardized
for old_name, new_name in RENAME_MAP.items():
    df = df.withColumnRenamed(old_name, new_name)

# Write into Silver Table

In [0]:
# We write a silver table that is cleaned + standardized customer dimension-style table
(
        df.write
        .mode("overwrite")
        .format("delta")
        .saveAsTable("workspace.silver.crm_customers")
)

In [0]:
%sql
-- Quick Check:
SELECT *
FROM workspace.silver.crm_customers