In [0]:
from pyspark.sql import functions as F

In [0]:
# Read bronze customers
bronze_customers = spark.table("vsqproject.bronze.raw_customers")
# display(bronze_customers)

In [0]:
# Transform / clean (columns: customerid, firstname, lastname, gender, dob, city, state)
silver_customers = (
    bronze_customers
      .filter(F.col("customerid").isNotNull())             # required key
      .dropDuplicates(["customerid"])
      .withColumnRenamed("customerid", "customer_id")      # rename to match sales FK
      .withColumn("firstname", F.initcap(F.trim(F.col("firstname"))))
      .withColumn("lastname", F.initcap(F.trim(F.col("lastname"))))
      .withColumn("gender", F.when(F.col("gender").isNull(), F.lit("Unknown")).otherwise(F.trim(F.col("gender"))))
      .withColumn("city", F.initcap(F.trim(F.col("city"))))
      .withColumn("state", F.upper(F.trim(F.col("state"))))
      .withColumn("dob", F.to_date(F.col("dob"), "MM/dd/yyyy"))
      # dob is already DATE in your schema — keep as-is (or cast if needed)
)

In [0]:
# display(silver_customers)

In [0]:
# Save to Silver
silver_customers.write.format("delta") \
    .option("mergeSchema", "true") \
    .mode("overwrite") \
    .saveAsTable("vsqproject.silver.customers_silver")