In [0]:
from pyspark.sql.functions import col, regexp_replace, trim

# 🔹 Add ADF parameters (input/output paths)
dbutils.widgets.text("input", "")
dbutils.widgets.text("output", "")

input_path = dbutils.widgets.get("input")
output_path = dbutils.widgets.get("output")

# Read Bronze Delta table (from ADF input parameter)
bronze_df = spark.read.format("delta").load(input_path)

# Transformations for Silver
silver_df = (
    bronze_df
    # Remove commas from Loan_Amount & cast to integer
    .withColumn("Loan_Amount", regexp_replace(col("Loan_Amount"), ",", "").cast("int"))
    
    # Remove commas from Debt_Record & cast to integer
    .withColumn("Debt_Record", regexp_replace(col("Debt_Record"), ",", "").cast("int"))
    
    # Trim spaces in string columns
    .withColumn("Customer_ID", trim(col("Customer_ID")))
    .withColumn("Gender", trim(col("Gender")))
    .withColumn("Occupation", trim(col("Occupation")))
    .withColumn("Marital_Status", trim(col("Marital_Status")))
    .withColumn("Loan_Category", trim(col("Loan_Category")))
    
    # Cast numeric fields properly
    .withColumn("Age", col("Age").cast("int"))
    .withColumn("Family_Size", col("Family_Size").cast("int"))
    .withColumn("Income", col("Income").cast("int"))
    .withColumn("Expenditure", col("Expenditure").cast("int"))
    .withColumn("Use_Frequency", col("Use_Frequency").cast("int"))
    .withColumn("Overdue", col("Overdue").cast("int"))
    .withColumn("Returned_Cheque", col("Returned_Cheque").cast("int"))
    .withColumn("Dishonour_of_Bill", col("Dishonour_of_Bill").cast("int"))
)

# Write Silver Delta table (to ADF output parameter)
silver_df.write.format("delta").mode("overwrite").save(output_path)


[0;31m---------------------------------------------------------------------------[0m
[0;31mIllegalArgumentException[0m                  Traceback (most recent call last)
File [0;32m<command-4569668154633621>, line 11[0m
[1;32m      8[0m output_path [38;5;241m=[39m dbutils[38;5;241m.[39mwidgets[38;5;241m.[39mget([38;5;124m"[39m[38;5;124moutput[39m[38;5;124m"[39m)
[1;32m     10[0m [38;5;66;03m# Read Bronze Delta table (from ADF input parameter)[39;00m
[0;32m---> 11[0m bronze_df [38;5;241m=[39m spark[38;5;241m.[39mread[38;5;241m.[39mformat([38;5;124m"[39m[38;5;124mdelta[39m[38;5;124m"[39m)[38;5;241m.[39mload(input_path)
[1;32m     13[0m [38;5;66;03m# Transformations for Silver[39;00m
[1;32m     14[0m silver_df [38;5;241m=[39m (
[1;32m     15[0m     bronze_df
[1;32m     16[0m     [38;5;66;03m# Remove commas from Loan_Amount & cast to integer[39;00m
[0;32m   (...)[0m
[1;32m     37[0m     [38;5;241m.[39mwithColumn([38;5;124m"[39m