In [0]:
from pyspark.sql.functions import sum, avg, count, round, col

# 🔹 Add ADF parameters (input/output paths)
dbutils.widgets.text("input", "")
dbutils.widgets.text("output", "")

input_path = dbutils.widgets.get("input")
output_path = dbutils.widgets.get("output")

# Read Silver Delta table (from ADF input parameter)
silver_df = spark.read.format("delta").load(input_path)

# Example Gold aggregations
gold_df = (
    silver_df.groupBy("Loan_Category")
    .agg(
        count("Customer_ID").alias("Total_Customers"),
        sum("Loan_Amount").alias("Total_Loan_Amount"),
        avg("Income").alias("Avg_Income"),
        avg("Expenditure").alias("Avg_Expenditure"),
        avg("Overdue").alias("Avg_Overdue_Days"),
        avg("Debt_Record").alias("Avg_Debt_Record"),
        avg("Returned_Cheque").alias("Avg_Returned_Cheques"),
        avg("Dishonour_of_Bill").alias("Avg_Dishonour_Bills")
    )
    .withColumn("Total_Loan_Amount", round(col("Total_Loan_Amount"), 2))
    .withColumn("Avg_Income", round(col("Avg_Income"), 2))
    .withColumn("Avg_Expenditure", round(col("Avg_Expenditure"), 2))
    .withColumn("Avg_Overdue_Days", round(col("Avg_Overdue_Days"), 2))
    .withColumn("Avg_Debt_Record", round(col("Avg_Debt_Record"), 2))
    .withColumn("Avg_Returned_Cheques", round(col("Avg_Returned_Cheques"), 2))
    .withColumn("Avg_Dishonour_Bills", round(col("Avg_Dishonour_Bills"), 2))
)

# Write Gold Delta table (to ADF output parameter)
gold_df.write.format("delta").mode("overwrite").save(output_path)
