In [0]:
# Databricks notebook: MAS610_Silver_Transform
# Purpose: clean & standardize raw datasets (Exercise 2)
from pyspark.sql import SparkSession, functions as F

spark = SparkSession.builder.appName("MAS610_Silver_Transform").getOrCreate()

dbutils.widgets.text("silver_dir", "/mnt/silver/mas610_ex2")
silver_dir = dbutils.widgets.get("silver_dir")

# Read Bronze snapshots
accounts_raw   = spark.read.format("delta").load(f"{silver_dir}/_bronze_accounts.delta")
loans_raw      = spark.read.format("delta").load(f"{silver_dir}/_bronze_loans.delta")
collateral_raw = spark.read.format("delta").load(f"{silver_dir}/_bronze_collateral.delta")

# Cleanse & Standardize
accounts_silver = (
    accounts_raw
      .withColumn("customer_id", F.col("customer_id").cast("string"))
      .withColumn("account_type", F.upper(F.col("account_type")))
      .withColumn("branch_code", F.trim(F.col("branch_code")))
      .withColumn("region", F.upper(F.col("region")))
      .dropDuplicates()
      .withColumn("load_timestamp", F.current_timestamp())
)

loans_silver = (
    loans_raw
      .withColumn("loan_id", F.col("loan_id").cast("string"))
      .withColumn("customer_id", F.col("customer_id").cast("string"))
      .withColumn("notional", F.col("notional").cast("double"))
      .withColumn("currency", F.upper(F.col("currency")))
      .fillna({"currency": "USD"})
      .dropDuplicates(["loan_id"])
      .withColumn("load_timestamp", F.current_timestamp())
)

collateral_silver = (
    collateral_raw
      .withColumn("loan_id", F.col("loan_id").cast("string"))
      .withColumn("collateral_type", F.upper(F.col("collateral_type")))
      .withColumn("collateral_value", F.col("collateral_value").cast("double"))
      .fillna({"collateral_value": 0})
      .dropDuplicates(["loan_id"])
      .withColumn("load_timestamp", F.current_timestamp())
)

# Persist to Silver zone
(accounts_silver.write.mode("overwrite").format("delta").save(f"{silver_dir}/accounts_silver.delta"))
(loans_silver.write.mode("overwrite").format("delta").save(f"{silver_dir}/loans_silver.delta"))
(collateral_silver.write.mode("overwrite").format("delta").save(f"{silver_dir}/collateral_silver.delta"))

print("ðŸ’¾ Silver transformation complete.")
