In [0]:
from pyspark.sql.functions import col, when, concat, lit

# Load Silver Table
df_silver = spark.table("churn_catalog.processed.customer_profiles")

# GOLD FEATURE ENGINEERING
df_gold = (
    df_silver

    # AGE GROUPS
    .withColumn(
        "age_group",
        when(col("age") < 25, "Young")
        .when((col("age") >= 25) & (col("age") < 45), "Adult")
        .when((col("age") >= 45) & (col("age") < 65), "Middle Age")
        .otherwise("Senior")
    )

    # BALANCE CATEGORY
    .withColumn(
        "balance_category",
        when(col("balance") < 50000, "Low")
        .when((col("balance") >= 50000) & (col("balance") < 150000), "Medium")
        .otherwise("High")
    )

    # TENURE BUCKET
    .withColumn(
        "tenure_bucket",
        when(col("tenure") < 3, "New")
        .when((col("tenure") >= 3) & (col("tenure") < 7), "Medium")
        .otherwise("Loyal")
    )

    # ACTIVITY SCORE
    .withColumn(
        "activity_score",
        (col("active_member") * 2) + (col("credit_card") * 1)
    )

    # HIGH RISK FLAG
    .withColumn(
        "is_high_risk",
        when(
            (col("balance") < 30000) &
            (col("active_member") == 0) &
            (col("age") > 45),
            1
        ).otherwise(0)
    )

    # RISK EXPLANATION
    .withColumn(
        "risk_reason",
        when(col("is_high_risk") == 1,
            concat(
                lit("Low balance ("), col("balance"), lit("), "),
                lit("Inactive member, "),
                lit("Age above 45")
            )
        ).otherwise("Customer in safe zone")
    )

    # BUSINESS RECOMMENDATION
    .withColumn(
        "recommendation",
        when(col("is_high_risk") == 1,
             "Offer retention plan, extra benefits, and engagement calls"
        ).otherwise("No action required")
    )
)

# PREVIEW
df_gold.limit(10).display()

# SAVE GOLD TABLE WITH PARTITIONING
df_gold.write.format("delta") \
    .mode("overwrite") \
    .partitionBy("country") \
    .option("overwriteSchema", "true") \
    .saveAsTable("churn_catalog.analytics.churn_features")


# Gold Layer Summary

In [0]:
from pyspark.sql.functions import col, count

print("GOLD LAYER SUMMARY")

# Reload GOLD table (important after writing)
df_gold = spark.table("churn_catalog.analytics.churn_features")

# Total rows
total_rows = df_gold.count()
print("Total rows in GOLD:", total_rows)

# High-risk customer count
high_risk_count = df_gold.filter(col("is_high_risk") == 1).count()
print("High-risk customers:", high_risk_count)

#  High-risk % percentage
risk_percent = (high_risk_count / total_rows) * 100 if total_rows > 0 else 0
print(f"High-risk percentage: {risk_percent:.2f}%")

# Balance category distribution
print("\nBalance Category Distribution:")
display(
    df_gold.groupBy("balance_category")
           .count()
           .orderBy("count", ascending=False)
)

# Age group distribution
print("\nAge Group Distribution:")
display(
    df_gold.groupBy("age_group")
           .count()
           .orderBy("count", ascending=False)
)

#  Recommendation distribution
print("\nRecommendation Summary:")
display(
    df_gold.groupBy("recommendation")
           .count()
           .orderBy("count", ascending=False)
)

# Country-wise churn risk
print("\nHigh-Risk Customers by Country:")
display(
    df_gold.filter(col("is_high_risk") == 1)
           .groupBy("country")
           .count()
           .orderBy("count", ascending=False)
)

# Sample GOLD records
print("\nSample GOLD Records:")
display(df_gold.limit(10))


Databricks visualization. Run in Databricks to view.

Databricks visualization. Run in Databricks to view.

Databricks visualization. Run in Databricks to view.

Databricks visualization. Run in Databricks to view.

Databricks visualization. Run in Databricks to view.

# High risk customers recommendation

In [0]:
top_customers = (
    df_gold.filter(col("is_high_risk") == 1)
           .select("customer_id", "balance", "age", "risk_reason", "recommendation")
           .orderBy("balance")
           .limit(20)
)

display(top_customers)
