In [0]:
from pyspark.sql.functions import avg, count, when

gold_df = (
    spark.table("silver_telco_churn")
    .groupBy("Churn")
    .agg(
        count("*").alias("total_customers"),
        avg("tenure").alias("avg_tenure"),
        avg("MonthlyCharges").alias("avg_monthly_charges"),
        avg("TotalCharges").alias("avg_total_charges")
    )
)

gold_df

DataFrame[Churn: string, total_customers: bigint, avg_tenure: double, avg_monthly_charges: double, avg_total_charges: double]

In [0]:
gold_df.write.format("delta") \
    .mode("overwrite") \
    .saveAsTable("gold_telco_churn_kpis")

Churn rate by contract type

In [0]:
%sql
SELECT
  Contract,
  COUNT(*) AS total,
  SUM(CASE WHEN Churn = 'Yes' THEN 1 ELSE 0 END) AS churned,
  ROUND(100.0 * SUM(CASE WHEN Churn = 'Yes' THEN 1 ELSE 0 END) / COUNT(*), 2) AS churn_rate
FROM silver_telco_churn
GROUP BY Contract
ORDER BY churn_rate DESC;

Contract,total,churned,churn_rate
Month-to-month,3875,1655,42.71
One year,1473,166,11.27
Two year,1695,48,2.83


Payment method vs churn

In [0]:
%sql
SELECT
  PaymentMethod,
  ROUND(100.0 * SUM(CASE WHEN Churn='Yes' THEN 1 ELSE 0 END)/COUNT(*),2) AS churn_rate
FROM silver_telco_churn
GROUP BY PaymentMethod
ORDER BY churn_rate DESC;

PaymentMethod,churn_rate
Electronic check,45.29
Mailed check,19.11
Bank transfer (automatic),16.71
Credit card (automatic),15.24


Tenure bucket analysis 

In [0]:
%sql
SELECT
  CASE
    WHEN tenure < 12 THEN '0–1 year'
    WHEN tenure < 24 THEN '1–2 years'
    WHEN tenure < 48 THEN '2–4 years'
    ELSE '4+ years'
  END AS tenure_group,
  COUNT(*) AS customers,
  ROUND(100.0 * SUM(CASE WHEN Churn='Yes' THEN 1 ELSE 0 END)/COUNT(*),2) AS churn_rate
FROM silver_telco_churn
GROUP BY tenure_group
ORDER BY churn_rate DESC;

tenure_group,customers,churn_rate
0–1 year,2069,48.28
1–2 years,1047,29.51
2–4 years,1624,20.87
4+ years,2303,9.64
