In [None]:
# import library
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
import seaborn as sns

In [None]:
df = pd.read_csv('../input/telco-customer-churn/WA_Fn-UseC_-Telco-Customer-Churn.csv')
df.head()

# Payment Method with the Highest Monhtly Charges per Customer

In [None]:
# count mean of 'MonthlyCharges' per customer group by 'PaymentMethod'
df1 = df.groupby(['PaymentMethod']).mean()[['MonthlyCharges']].reset_index()
df1.head()

In [None]:
# set plot style
sns.set_style('whitegrid')

# palette color to highlight the highest amount
color_focus1 = ['blue' if (x == df1['MonthlyCharges'].max()) else 'grey' for x in df1['MonthlyCharges']]

# create bar plot
plt.figure(figsize = (10, 8))
ax = sns.barplot(
    x = 'PaymentMethod',
    y = 'MonthlyCharges',
    data = df1,
    palette = color_focus1
)

# adjust yticks of 'MonthlyCharges'
monthly_c = [x for x in range(0, 85, 5)]
plt.yticks(monthly_c)

# add tile and rename xlabel & ylabel
plt.title('Electronic Check is Payment Method with the Highest Monhtly Charges per Customer', fontsize = 18, fontweight = 'bold', x = 0.625, y = 1.05)
plt.xlabel('Payment Method', fontsize = 12)
plt.ylabel('Monthly Charges', fontsize = 12);

# add data points
for p in ax.patches:
    ax.annotate(
            format(p.get_height(), '.2f'),
            (p.get_x() + p.get_width() / 2., p.get_height()), 
            ha = 'center',
            va = 'center',
            fontweight = 'semibold',
            size = 13,
            xytext = (0, 7), 
            textcoords = 'offset points'
    )

# Number of Customers Group by Tenure

In [None]:
# create new column 'TenureGroup'
df['TenureGroup'] = np.where(df['tenure'] < 21, 'low_tenure', np.where((df['tenure'] >= 21) & (df['tenure'] <= 41), 'medium_tenure', 'high_tenure'))
df.head()

In [None]:
# count number of unique customers group by 'TenureGroup'
df2 = df.groupby(['TenureGroup']).nunique()[['customerID']].reset_index()
df2.head()

In [None]:
# create bar plot
plt.figure(figsize = (10, 8))
ax = sns.barplot(
    x = 'TenureGroup',
    y = 'customerID',
    data = df2,
    palette = ['#85a5fb', '#1d4cce', '#001c66'],
    order = ['low_tenure','medium_tenure','high_tenure'] # order 'TenureGroup by level'
)

# hide top line
sns.despine(top = True, right = False, left = False, bottom = False)

# add tile and subtitle
plt.title('Number of Customers Group by Tenure', fontsize = 18, fontweight = 'bold', y = 1.13)
plt.suptitle('Most of the customers are in the "low_tenure" group.', fontsize = 13, y = 0.97)

# rename xlabel & ylabel
plt.xlabel('Tenure Group', fontsize = 12)
plt.ylabel("# of Customers", fontsize = 12);

    
# add data points
for p in ax.patches:
    ax.annotate(
            format(p.get_height(), '.0f'),
            (p.get_x() + p.get_width() / 2., p.get_height()), 
            ha = 'center',
            va = 'center',
            fontweight = 'semibold',
            size = 13,
            xytext = (0, 6), 
            textcoords = 'offset points'
    )

# Customer Churn Tendency Based on Their Monthly Charges

In [None]:
# create bar plot
plt.figure(figsize = (10, 8))
sns.boxplot(
    x = 'Churn',
    y = 'MonthlyCharges',
    data = df,
    palette = ['grey', 'red']
)

# add tile and subtitle
plt.title('Customer Churn Tendency Based on Their Monthly Charges', fontsize = 18, fontweight = 'bold', y = 1.13)
plt.suptitle('Customer with high monthly charges tend to churn.', fontsize = 13, y = 0.97)

# rename xlabel & ylabel
plt.xlabel('Churn', fontsize = 12)
plt.ylabel('Monthly Charges', fontsize = 12);

# Customer Churn Rate Based on Having a Partner & Dependents

In [None]:
# concat value of 'Partner' & 'Dependents' columns
df['Partner & Dependents'] = df['Partner'] + ' - ' + df['Dependents']
df.head()

In [None]:
# count number of unique customers group by 'Partner & Dependents' and 'Churn'
df4_in_numbers = df.groupby(['Partner & Dependents', 'Churn']).nunique()[['customerID']].reset_index()
df4_in_numbers

In [None]:
# palette color to highlight Churn = 'Yes'
color_focus2 = ['red' if (x == 'Yes') else 'grey' for x in df4_in_numbers['Churn']]

# create bar plot number of unique customers group by 'Partner & Dependents' and 'Churn'
plt.figure(figsize = (10, 8))
ax = sns.barplot(
    x = 'Partner & Dependents',
    y = 'customerID',
    hue = 'Churn',
    data = df4_in_numbers,
    palette = color_focus2
)

# highlight important part
ax.add_patch(Rectangle(
    (-0.48, 0),
    1, 2350,
    fc ='none',
    ec ='yellow',
    lw = 3,
    linestyle = '--')
)

# adjust yticks of number of unique customers
num_of_customers = [x for x in range(0, 3000, 500)]
plt.yticks(num_of_customers)

# add tile and subtitle
plt.title('Customer Churn Rate Based on Having a Partner & Dependents', fontsize = 18, fontweight = 'bold', y = 1.13)
plt.suptitle('Customer who doesn\'t have a partner & dependents tend to churn.', fontsize = 13, y = 0.97)

# add xlabel & ylabel
plt.xlabel('Partner & Dependents', fontsize = 12)
plt.ylabel("# of Customers", fontsize = 12);

# add data points
for p in ax.patches:
    if p.get_height() == 2350: # continue if it's the 'Rectangle'
        continue
    else:
        ax.annotate(
            format(p.get_height(), '.0f'),
            (p.get_x() + p.get_width() / 2., p.get_height()), 
            ha = 'center',
            va = 'center',
            fontweight = 'semibold',
            size = 13,
            xytext = (0, 10), 
            textcoords = 'offset points'
        )

# Customer Churn Rate Based on Services Used

In [None]:
# concat value of 'PhoneService' & 'InternetService' columns
df['Phone & Internet Services'] = df['PhoneService'] + ' - ' + df['InternetService']
df.head()

In [None]:
# count percentage of number of unique customers group by 'Phone & Internet Services' and 'Churn'
df5_percentage_of_customers = df.groupby(['Phone & Internet Services', 'Churn']).nunique()[['customerID']]
services = df.groupby(['Phone & Internet Services']).nunique()[['customerID']]
df5_percentage_of_customers = df5_percentage_of_customers.div(services, level = 'Phone & Internet Services') * 100
df5_percentage_of_customers = df5_percentage_of_customers.reset_index()
df5_percentage_of_customers

In [None]:
# palette color to highlight Churn = 'Yes'
color_focus3 = ['red' if (x == 'Yes') else 'grey' for x in df5_percentage_of_customers['Churn']]

# create bar plot number of unique customers group by 'Phone & Internet Service' and 'Churn'
plt.figure(figsize = (10, 8))
ax = sns.barplot(
    x = 'Phone & Internet Services',
    y = 'customerID',
    hue = 'Churn',
    data = df5_percentage_of_customers,
    palette = color_focus3
)

# highlight important part
ax.add_patch(Rectangle(
    (1.5, 0),
    1, 65,
    fc ='none',
    ec ='yellow',
    lw = 3,
    linestyle = '--')
)

# adjust yticks of percentage of number of unique customers
percentage_ = [x for x in range(0, 110, 10)]
plt.yticks(percentage_)

# add tile and subtitle
plt.title('Almost Half of the Customers who Used Phone & Fiber Optic Internet Services are Churned', fontsize = 18, fontweight = 'bold', x = -0.04, y = 1.13, loc = 'left')
plt.suptitle('Around 42% of Phone & Fiber Optic Internet Services customers are churned, need to check the complaint on the related services,\nmeanwhile we could give special discount/promo for customers who use the services.', fontsize = 13, x = 0.095, y = 0.97, ha = 'left')

# add xlabel & ylabel
plt.xlabel('Phone & Internet Services', fontsize = 12)
plt.ylabel('# of Customers', fontsize = 12)

# formatting ylabels + '%'
ylabels = [format(y) + '%' for y in ax.get_yticks()]
ax.set_yticklabels(labels = ylabels);

# add data points
for bar in ax.patches:
    if bar.get_height() == 65: # continue if it's the 'Rectangle'
        continue
    else:
        ax.annotate(
            format(bar.get_height(), '.2f') + '%',
            (bar.get_x() + bar.get_width() / 2., bar.get_height()), 
            ha = 'center',
            va = 'center',
            fontweight = 'semibold',
            size = 13,
            xytext = (0, 7), 
            textcoords = 'offset points'
        )