# 00 – Ingest Bronze
Fill placeholders via widgets or Job params.

In [0]:
# ──────────────────────────────────────────────────────────────
# Bronze Ingestion Notebook  (10_bronze_ingest)
# ──────────────────────────────────────────────────────────────

# Widgets
dbutils.widgets.text("CATALOG", "reporting_factory_risk_profile")
dbutils.widgets.text("RAW_PATH", "abfss://raw@riskprofilestorageacct.dfs.core.windows.net/lending_club/")
catalog = dbutils.widgets.get("CATALOG")
raw_path = dbutils.widgets.get("RAW_PATH")

# ── ADLS key setup ────────────────────────────────────────────
key = dbutils.secrets.get(scope="adls-riskprofile-secrets", key="adls-riskprofile-key")
spark.conf.set(
    "fs.azure.account.key.riskprofilestorageacct.dfs.core.windows.net",
    key
)

# ── Context ──────────────────────────────────────────────────
spark.sql(f"USE CATALOG {catalog}")
spark.sql("CREATE SCHEMA IF NOT EXISTS bronze")
spark.sql("USE SCHEMA bronze")

# ── Read raw CSVs ─────────────────────────────────────────────
# IMPORTANT: force all columns to STRING to avoid schema merge issues later.
df = (
    spark.read
        .option("header", True)
        .option("inferSchema", False)      # ← no type inference
        .option("escape", "\"")
        .csv(f"{raw_path}*.csv")
)

# Sanitize column names (remove spaces, special chars)
for c in df.columns:
    df = df.withColumnRenamed(c, c.strip().replace(" ", "_").replace("%", "pct").lower())

# ── Write to Delta ────────────────────────────────────────────
# Safe: overwrite bronze table each load (or use append if you partition by date)
(
    df.write
      .mode("overwrite")
      .format("delta")
      .option("overwriteSchema", "true")
      .saveAsTable("bronze.lending_raw")
)

print(f"✅ Bronze loaded successfully → {catalog}.bronze.lending_raw")
print(f"Rows: {df.count()}, Columns: {len(df.columns)}")