In [1]:
import pandas as pd

df = pd.read_csv("Churn cleaned.csv")
df.head()


Unnamed: 0,customerid,gender,seniorcitizen,partner,dependents,tenure,phoneservice,multiplelines,internetservice,onlinesecurity,...,streamingmovies,contract,paperlessbilling,paymentmethod,monthlycharges,totalcharges,churn,tenure_group,service_count,customer_value
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,Month-to-month,Yes,Electronic check,29.85,29.85,No,0-1 year,1,29.85
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,No,One year,No,Mailed check,56.95,1889.5,No,2-4 years,3,1936.3
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes,0-1 year,3,107.7
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,No,One year,No,Bank transfer (automatic),42.3,1840.75,No,2-4 years,3,1903.5
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes,0-1 year,1,141.4


In [2]:
df['value_segment'] = pd.qcut(
    df['customer_value'],
    q=3,
    labels=['Low Value', 'Medium Value', 'High Value']
)


In [4]:
df['engagement_level'] = pd.cut(
    df['service_count'],
    bins=[0, 2, 5, 10],
    labels=['Low Engagement', 'Medium Engagement', 'High Engagement']
)


In [5]:
df[['service_count', 'engagement_level']].head(10)


Unnamed: 0,service_count,engagement_level
0,1,Low Engagement
1,3,Medium Engagement
2,3,Medium Engagement
3,3,Medium Engagement
4,1,Low Engagement
5,4,Medium Engagement
6,3,Medium Engagement
7,1,Low Engagement
8,5,Medium Engagement
9,3,Medium Engagement


In [6]:
def retention_risk(row):
    if row['churn'] == 1:
        return 'Already Churned'
    elif row['tenure'] <= 12 and row['monthlycharges'] > df['monthlycharges'].median():
        return 'High Risk'
    elif row['service_count'] <= 2:
        return 'Medium Risk'
    else:
        return 'Low Risk'

df['retention_risk'] = df.apply(retention_risk, axis=1)


In [7]:
df['retention_risk'].value_counts(normalize=True) * 100


retention_risk
Low Risk       47.280988
Medium Risk    40.934261
High Risk      11.784751
Name: proportion, dtype: float64

In [8]:
segment_summary = (
    df.groupby(['retention_risk', 'value_segment'])
    .agg(
        customer_count=('customerid', 'count'),
        avg_monthly_charges=('monthlycharges', 'mean'),
        avg_tenure=('tenure', 'mean')
    )
    .reset_index()
)

segment_summary


  df.groupby(['retention_risk', 'value_segment'])


Unnamed: 0,retention_risk,value_segment,customer_count,avg_monthly_charges,avg_tenure
0,High Risk,Low Value,612,82.286111,3.380719
1,High Risk,Medium Value,218,87.962156,10.105505
2,High Risk,High Value,0,,
3,Low Risk,Low Value,194,57.453093,5.907216
4,Low Risk,Medium Value,924,74.049838,25.87013
5,Low Risk,High Value,2212,90.408883,57.406872
6,Medium Risk,Low Value,1543,32.077285,8.513934
7,Medium Risk,Medium Value,1204,39.257807,42.813123
8,Medium Risk,High Value,136,75.497794,51.470588


In [9]:
segment_summary = (
    df.groupby(['retention_risk', 'value_segment'], observed=False)
    .agg(
        customer_count=('customerid', 'count'),
        avg_monthly_charges=('monthlycharges', 'mean'),
        avg_tenure=('tenure', 'mean')
    )
    .reset_index()
)


Customer Segmentation & Retention Risk Analysis

Objective:
The objective of this analysis is to segment customers into meaningful groups based on
customer value, engagement, and churn risk to support targeted retention strategies.

Segmentation Approach:
Customers were segmented using rule-based logic instead of machine learning models.
The segmentation was based on tenure, monthly charges, number of services used,
and churn status.

Retention Risk Summary:
Low Risk customers account for approximately 47% of the base.
Medium Risk customers account for around 41%.
High Risk customers represent about 12%.

Key Findings:
High-risk customers are mostly new customers with high monthly charges.
Medium-risk customers show moderate engagement and value.
Low-risk customers have long tenure and high engagement.

Business Impact:
High-value customers in medium and high-risk segments contribute significantly to
potential revenue loss if churned.

Recommendations:
Provide boarding support and discounts to high-risk customers.
Introduce loyalty programs
