In [40]:
import pandas as pd
import numpy as np
import plotly.express as px
import nbformat
import plotly.figure_factory as ff
import plotly.io as pio
pio.renderers.default = "vscode"

In [22]:
print(nbformat.__version__)


5.10.4


In [23]:
df = pd.read_csv('../data/data.csv')
df.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


In [24]:
df.shape

(7043, 21)

In [25]:

df.info()
df.describe()
df.isnull().sum()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7043 entries, 0 to 7042
Data columns (total 21 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   customerID        7043 non-null   object 
 1   gender            7043 non-null   object 
 2   SeniorCitizen     7043 non-null   int64  
 3   Partner           7043 non-null   object 
 4   Dependents        7043 non-null   object 
 5   tenure            7043 non-null   int64  
 6   PhoneService      7043 non-null   object 
 7   MultipleLines     7043 non-null   object 
 8   InternetService   7043 non-null   object 
 9   OnlineSecurity    7043 non-null   object 
 10  OnlineBackup      7043 non-null   object 
 11  DeviceProtection  7043 non-null   object 
 12  TechSupport       7043 non-null   object 
 13  StreamingTV       7043 non-null   object 
 14  StreamingMovies   7043 non-null   object 
 15  Contract          7043 non-null   object 
 16  PaperlessBilling  7043 non-null   object 


customerID          0
gender              0
SeniorCitizen       0
Partner             0
Dependents          0
tenure              0
PhoneService        0
MultipleLines       0
InternetService     0
OnlineSecurity      0
OnlineBackup        0
DeviceProtection    0
TechSupport         0
StreamingTV         0
StreamingMovies     0
Contract            0
PaperlessBilling    0
PaymentMethod       0
MonthlyCharges      0
TotalCharges        0
Churn               0
dtype: int64

In [26]:
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')
          

In [27]:
print(df['Churn'].value_counts(normalize=True))


Churn
No     0.73463
Yes    0.26537
Name: proportion, dtype: float64


In [28]:
color_discrete_map = {
    'Yes': '#FF6F61',  # Soft Coral
    'No': '#6B9AC4'    # Muted Blue
}

In [29]:
fig = px.pie(df, names='Churn', title='Churn Distribution',
             hole=0.3, color_discrete_map=color_discrete_map,
                                           hover_data=['Churn']
             )
fig.update_traces(
  hoverinfo='label+percent+value', pull=[0.1 if val=='Yes' else 0 for val in fig.data[0].labels],
  textinfo='percent+label', textfont_size=10
                 )

fig.update_layout(title_x=0.5, 
                  legend_title_text='Churn',
                  legend=dict(orientation='h', yanchor='bottom', 
                              y=-0.1, xanchor='center', x=0.5),
                  margin=dict(t=50, b=0, l=0, r=0)
                  )
fig.show()

In [33]:
fig = px.histogram(df, x='gender', color='Churn', barmode='group',
                   color_discrete_map=color_discrete_map,
                   title='Churn by Gender')
fig.update_layout(title_x=0.5, 
                  xaxis_title='Gender',
                  yaxis_title='Count',
                  legend_title_text='Churn',
                  bargap=0.2,
)
fig.update_traces(marker_line_width=1.5, marker_line_color='white',
                 hovertemplate='%{x}<br>%{y} customers<br>Churn: %{color}' 
                  )
fig.show()


In [35]:
fig = px.box(
    df, x='Churn', y='tenure', color='Churn',
    color_discrete_map=color_discrete_map,
    title='Tenure Distribution by Churn',
    points='all',
    hover_data={'tenure':True, 'Churn':True}
)
fig.update_layout(title_x=0.5,
                  xaxis_title='Churn',
                  yaxis_title='Tenure (months)',
                  legend_title_text='Churn',
)
fig.update_traces(boxmean=True,
                  jitter=0.5,
                  marker_size=3,
                  hovertemplate='Churn: %{x}<br>Tenure: %{y} months'
                  )
fig.show()

In [39]:
fig = px.violin(df, x='Contract', y='MonthlyCharges',
                color='Churn', box=True, points='all',
                title='Monthly Charges By contract Type and Churn',
                color_discrete_map=color_discrete_map,
                hover_data={'MonthlyCharges':True,'Contract':True, 'Churn':True}
              )
fig.update_layout(
    title_x=0.5,
    xaxis_title='Contract Type',
    yaxis_title='Monthly Charges ($)',
    legend_title_text='Churn',
    margin=dict(l=40, r=40, t=60, b=40),
)

fig.update_traces(
  meanline_visible=True,
  marker_size=4,
  marker_opacity=0.6,
  hovertemplate='Contract: %{x}<br>Monthly Charges: %{y}$<br>Churn: %{color}<extra></extra>'
)
fig.show()

In [48]:
corr = df[['tenure', 'MonthlyCharges','TotalCharges', 'SeniorCitizen']].corr()

custom_colorscale = [
    [0.0, '#6B9AC4'],  
    [1.0, '#FF6F61']   
]

fig = ff.create_annotated_heatmap(
  z=np.round(corr.values,2),
  x = list(corr.columns),
  y = list(corr.index),
  annotation_text=np.round(corr.values, 2).astype(str),
  colorscale=custom_colorscale,
  showscale=True
)
fig.update_layout(title_text='Correlation Heatmap')
fig.show()

In [49]:
fig = px.sunburst(df, path=['Contract', 'PaymentMethod', 'Churn'], 
                  title="Customer distribution by Contract, Payment Method and Churn"
                  )
fig.show()

In [None]:
avg_tenure = df.groupby(['Churn', 'Contract'])['tenure'].mean().reset_index()
fig = px.bar(avg_tenure, x='Contract', y='tenure', 
             color='Churn', barmode='group', 
             color_discrete_map=color_discrete_map,
             title='Average Tenure by Churn Status and Contract Type')
fig.show()


In [52]:
churn_rate_pm = df.groupby('PaymentMethod')['Churn'].value_counts(normalize=True).unstack()['Yes'].reset_index()
fig = px.bar(churn_rate_pm, x='PaymentMethod', y='Yes', 
             title='Churn Rate by Payment Method')
fig.update_layout(yaxis_title='Churn Rate')
fig.show()


Key Insights :
-> About 26% of customers churn, i.e. roughly around 1 out of 4 customers want to discontinue using the service.
->gender is not a major factor as churn rates are similar between male and female customers
-> The customers with shorter tenure tends to churn and the majority is among first year of service.
->Customers having monthly contract tends to pay variety of monthly charges and tends to churn more.
->Longer contracts show lower churn rates.
->Majority of customers on monthly contract with electronic payment methods churn more.
->Customers with longer contracts and automatic payments show lower churn