# INITIALIZE

In [0]:
import pyspark.sql.functions as F
from pyspark.sql.types import StringType
from pyspark.sql.functions import col, trim

In [0]:
# Raw to Silver renaming
RENAME_MAP = {
    "cid": "company_id",
    "cntry": "country"
}
     

# Read the Bronze Table

In [0]:
df = spark.table("workspace.bronze.erp_loc_a101_raw")

# Silver Transformations

## Trimming

In [0]:
# For trimming the whitespace on all the string columents which helps to prevent filter and join issues from messy text
for field in df.schema.fields:
    if isinstance(field.dataType, StringType):
        df = df.withColumn(field.name, trim(col(field.name)))

## Derivations and Normalization

In [0]:
# Clean ids + normalize country codes
df = (
    df
    # Remove "-" from company id to standardize the identifier
    .withColumn(
        "cid",
        F.regexp_replace(col("cid"), "-", "")
    )

    # Expand country codes to readable names + handle blanks/nulls
    .withColumn(
        "cntry",
        F.when(col("cntry") == "DE", "Germany")
         .when(col("cntry").isin("US", "USA"), "United States")
         .when((col("cntry") == "") | col("cntry").isNull(), "n/a")
         .otherwise(col("cntry"))
    )
)

## Renaming

In [0]:
for old_name, new_name in RENAME_MAP.items():
    df = df.withColumnRenamed(old_name, new_name)

# Writing Silver Table

In [0]:
# We write a silver table that is cleaned + standardized
(
    df.write
      .mode("overwrite")
      .format("delta")
      .saveAsTable("silver.erp_customer_location")
)

In [0]:
%sql
-- Quick check
SELECT * FROM silver.erp_customer_location