# 10 – Build Silver

In [0]:
# ──────────────────────────────────────────────────────────────
# Simplified Silver Build with NULL → default replacements
# ──────────────────────────────────────────────────────────────

dbutils.widgets.text("CATALOG", "reporting_factory_risk_profile")
catalog = dbutils.widgets.get("CATALOG")

spark.sql(f"USE CATALOG {catalog}")
spark.sql("CREATE SCHEMA IF NOT EXISTS silver")
spark.sql("USE SCHEMA silver")

# Stable borrower_id when member_id is null
borrower_id_expr = "COALESCE(CAST(member_id AS STRING), CONCAT('M_', sha2(CAST(id AS STRING), 256)))"

# --- silver.loans -------------------------------------------------------------
spark.sql(f"""
CREATE OR REPLACE TABLE silver.loans AS
SELECT DISTINCT
  CAST(id AS STRING) AS loan_id,
  {borrower_id_expr} AS borrower_id,
  CAST(loan_amnt AS DOUBLE) AS loan_amount,
  CAST(regexp_replace(COALESCE(int_rate,''),'[% ]','') AS DOUBLE) AS interest_rate,
  CAST(regexp_extract(COALESCE(term,''),'\\d+',0) AS INT) AS term_months,
  CAST(grade AS STRING) AS grade,
  to_date(CAST(issue_d AS STRING),'MMM-yyyy') AS issue_date
FROM bronze.lending_raw
WHERE id IS NOT NULL
""")

# --- silver.borrowers ---------------------------------------------------------
# Replace bad/null DTI, utilization, FICO with safe defaults
spark.sql(f"""
CREATE OR REPLACE TABLE silver.borrowers AS
SELECT DISTINCT
  {borrower_id_expr} AS borrower_id,
  -- replace null or NaN with 0
  COALESCE(NULLIF(regexp_replace(CAST(dti AS STRING), '[^0-9\\.-]', ''), ''), '0') AS dti_str,
  COALESCE(NULLIF(regexp_replace(CAST(revol_util AS STRING), '[^0-9\\.-]', ''), ''), '0') AS util_str,
  COALESCE(NULLIF(regexp_replace(CAST(annual_inc AS STRING), '[^0-9\\.-]', ''), ''), '0') AS inc_str,
  COALESCE(NULLIF(regexp_replace(CAST(fico_range_high AS STRING), '[^0-9\\.-]', ''), ''), '0') AS fico_str
FROM bronze.lending_raw
WHERE id IS NOT NULL
""")

# Cast cleaned strings to numeric safely
spark.sql("""
CREATE OR REPLACE TABLE silver.borrowers AS
SELECT
  borrower_id,
  CAST(dti_str AS DOUBLE)        AS dti,
  CAST(inc_str AS DOUBLE)        AS annual_income,
  CAST(util_str AS DOUBLE)       AS utilization,
  CAST(fico_str AS INT)          AS fico_score
FROM silver.borrowers
""")

print("✅ Silver build complete — NULLs replaced with defaults (0).")