In [1]:
import pandas as pd
import numpy as np

df = pd.read_csv("Data.csv")

print("Dataset loaded successfully ✅")
print("Dataset shape (rows, columns):", df.shape)

Dataset loaded successfully ✅
Dataset shape (rows, columns): (7043, 21)


In [2]:
print("Step 3: Cleaning TotalCharges column...")

df["TotalCharges"] = pd.to_numeric(df["TotalCharges"], errors="coerce")

print("Missing values in TotalCharges before filling:")
print(df["TotalCharges"].isnull().sum())

df["TotalCharges"] = df["TotalCharges"].fillna(df["TotalCharges"].median())

print("Missing values in TotalCharges after filling:")
print(df["TotalCharges"].isnull().sum())


Step 3: Cleaning TotalCharges column...
Missing values in TotalCharges before filling:
11
Missing values in TotalCharges after filling:
0


In [3]:
print("Step 4: Segmenting customers based on tenure...")

df["TenureSegment"] = pd.cut(
    df["tenure"],
    bins=[0, 12, 24, 48, 72],
    labels=["0-1 year", "1-2 years", "2-4 years", "4-6 years"]
)

print("Tenure segmentation completed.")
print(df["TenureSegment"].value_counts())


Step 4: Segmenting customers based on tenure...
Tenure segmentation completed.
TenureSegment
4-6 years    2239
0-1 year     2175
2-4 years    1594
1-2 years    1024
Name: count, dtype: int64


In [4]:
print("Step 5: Segmenting customers based on Monthly Charges...")

df["ChargesSegment"] = pd.qcut(
    df["MonthlyCharges"],
    q=3,
    labels=["Low Charges", "Medium Charges", "High Charges"]
)

print("Monthly charge segmentation completed.")
print(df["ChargesSegment"].value_counts())


Step 5: Segmenting customers based on Monthly Charges...
Monthly charge segmentation completed.
ChargesSegment
Low Charges       2351
High Charges      2347
Medium Charges    2345
Name: count, dtype: int64


In [5]:
print("Step 6: Reviewing contract type distribution...")

print(df["Contract"].value_counts())


Step 6: Reviewing contract type distribution...
Contract
Month-to-month    3875
Two year          1695
One year          1473
Name: count, dtype: int64


In [6]:
print("Step 7: Calculating churn rate by tenure segment...")

tenure_churn = pd.crosstab(
    df["TenureSegment"],
    df["Churn"],
    normalize="index"
) * 100

print("Churn rate (%) by Tenure Segment:")
print(tenure_churn)


Step 7: Calculating churn rate by tenure segment...
Churn rate (%) by Tenure Segment:
Churn                 No        Yes
TenureSegment                      
0-1 year       52.321839  47.678161
1-2 years      71.289062  28.710938
2-4 years      79.611041  20.388959
4-6 years      90.486824   9.513176


In [7]:
print("Step 8: Calculating churn rate by monthly charges segment...")

charges_churn = pd.crosstab(
    df["ChargesSegment"],
    df["Churn"],
    normalize="index"
) * 100

print("Churn rate (%) by Charges Segment:")
print(charges_churn)


Step 8: Calculating churn rate by monthly charges segment...
Churn rate (%) by Charges Segment:
Churn                  No        Yes
ChargesSegment                      
Low Charges     84.134411  15.865589
Medium Charges  70.319829  29.680171
High Charges    65.913933  34.086067


In [8]:
print("Step 9: Calculating churn rate by contract type...")

contract_churn = pd.crosstab(
    df["Contract"],
    df["Churn"],
    normalize="index"
) * 100

print("Churn rate (%) by Contract Type:")
print(contract_churn)


Step 9: Calculating churn rate by contract type...
Churn rate (%) by Contract Type:
Churn                  No        Yes
Contract                            
Month-to-month  57.290323  42.709677
One year        88.730482  11.269518
Two year        97.168142   2.831858


In [9]:
print("Step 10: Identifying high-value customers at risk of churn...")

high_value_customers = df[
    (df["ChargesSegment"] == "High Charges") &
    (df["TenureSegment"].isin(["2-4 years", "4-6 years"])) &
    (df["Churn"] == "Yes")
]

print("Number of high-value customers at risk:", high_value_customers.shape[0])
print("Sample of high-value customers at risk:")
print(high_value_customers.head())


Step 10: Identifying high-value customers at risk of churn...
Number of high-value customers at risk: 371
Sample of high-value customers at risk:
    customerID  gender  SeniorCitizen Partner Dependents  tenure PhoneService  \
8   7892-POOKP  Female              0     Yes         No      28          Yes   
13  0280-XJGEX    Male              0      No         No      49          Yes   
26  6467-CHFZW    Male              0     Yes        Yes      47          Yes   
38  5380-WJKOV    Male              0      No         No      34          Yes   
99  4598-XLKNJ  Female              1     Yes         No      25          Yes   

   MultipleLines InternetService OnlineSecurity  ... StreamingTV  \
8            Yes     Fiber optic             No  ...         Yes   
13           Yes     Fiber optic             No  ...         Yes   
26           Yes     Fiber optic             No  ...         Yes   
38           Yes     Fiber optic             No  ...         Yes   
99            No     Fiber 

In [10]:
print("Step 11: Key Insights Summary")

print("- Customers with short tenure show higher churn rates.")
print("- High monthly charge customers are more likely to churn.")
print("- Month-to-month contracts have the highest churn risk.")
print("- Long-tenure, high-paying customers who churn are critical revenue risks.")
print("- These customers should be prioritized for retention campaigns.")


Step 11: Key Insights Summary
- Customers with short tenure show higher churn rates.
- High monthly charge customers are more likely to churn.
- Month-to-month contracts have the highest churn risk.
- Long-tenure, high-paying customers who churn are critical revenue risks.
- These customers should be prioritized for retention campaigns.
