In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# Filtering out the warnings

import warnings

warnings.filterwarnings('ignore')

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

### **Calling the Data Set - **

In [None]:
tele = pd.read_csv("/kaggle/input/telco-customer-churn/WA_Fn-UseC_-Telco-Customer-Churn.csv")
tele.head()

#### To Predict the Behaviour Of Customers who got churned, the customerID column has no use. Thereby will drop it.

In [None]:
tele.drop("customerID", axis=1, inplace=True)
tele.head()

#### To Check the Unique Values:-

In [None]:
tele.nunique()

#### To Check if there any null value present in the data set:-

In [None]:
tele.isnull().values.any()

### Visualization:-
- To have better understanding of the data.

In [None]:
# SeniorCitizen 
label_SenCit = tele['SeniorCitizen'].unique()
value_SenCit = tele['SeniorCitizen'].value_counts(normalize=True)*100

# Gender 
label_gender = tele['gender'].unique()
value_gender = tele['gender'].value_counts(normalize=True)*100

# Partner
label_partner = tele['Partner'].unique()
value_partner = tele['Partner'].value_counts(normalize=True)*100

#Dependents
label_dependents = tele['Dependents'].unique()
value_dependents = tele['Dependents'].value_counts(normalize=True)*100



# Visualization with the help of Pie-Chart:-

fig = plt.figure()

ax1 = fig.add_axes([0, 0, 0.5, 0.5], aspect=2)
ax1.pie(value_SenCit, textprops={'size': 14}, autopct='%1.1f%%', labels=label_SenCit, radius = 2.2)

ax2 = fig.add_axes([0.8, 0, 0.5, 0.5], aspect=2)
ax2.pie(value_gender, textprops={'size': 14}, autopct='%1.1f%%', labels=[label_gender[1],label_gender[0]], radius = 2.2)

ax3 = fig.add_axes([1.6, 0, 0.5, 0.5], aspect=2)
ax3.pie(value_partner, textprops={'size': 14}, autopct='%1.1f%%', labels=[label_partner[1],label_partner[0]], radius = 2.2)

ax4 = fig.add_axes([2.4, 0, 0.5, 0.5], aspect=2)
ax4.pie(value_dependents, textprops={'size': 14}, autopct='%1.1f%%', labels=label_dependents, radius = 2.2)

ax1.set_title('SeniorCitizen', loc='center', pad = 100, fontdict={'fontsize':18})
ax2.set_title('gender', loc='center', pad = 100, fontdict={'fontsize':18})
ax3.set_title('Partner', loc='center', pad = 100, fontdict={'fontsize':18})
ax4.set_title('Dependents', loc='center', pad = 100, fontdict={'fontsize':18})

plt.show()


### From above Pie-Charts it depicts that:
- Senior Citizens are less in number around 16% of entire data set. That is most of customers are of young age.
- Gender ratio is quite same.
- Around 49% customers have partners but only 30% of the total customers have dependents.
- Customer with no dependents are large in number.

In [None]:
tele.info()

As we can see that the column **TotalCharges** have numeric values but its data type is ('O'), which needs to be change to float

In [None]:
# Total charges in object which should be converted to float
# tenure*monthlycharges = Total charges

tele[tele["TotalCharges"]==' ']

In [None]:
# Replacing all ' ' to 0, in TotalCharges Column

tele["TotalCharges"] = tele["TotalCharges"].replace([' '],0)
tele[tele["TotalCharges"]==0]

In [None]:
# Changing the datatype from object to float of the column TotalCharges:-

tele["TotalCharges"] = tele["TotalCharges"].astype(float)

print(tele["TotalCharges"].dtype) #to see the converted datatype

print(tele["TotalCharges"].isnull().sum()) #to see if there is any null value in the respective column

### To Check for Outliers:-

In [None]:
fig, axs = plt.subplots(1, 3)

axs[0].boxplot(tele['tenure'])
axs[0].set_title('tenure')
axs[1].boxplot(tele['MonthlyCharges'])
axs[1].set_title('MonthlyCharges')
axs[2].boxplot(tele['TotalCharges'])
axs[2].set_title('TotalCharges')

fig.subplots_adjust(left=0.5, right=2,wspace=1)

plt.show()

#### No Outliers.

### To have better insights of the data, will create three more columns in the data set:-
- Churn_flag : having binary digits of "Yes" & "No" from the churn column.
- Tenure_group : as the "tenure" column is in months, will group the months in year wise.
- Senior_flag : to convert "seniorcitizen" column binary digits to "Yes" & "No" (just like reverse of "churn" column)

In [None]:
tele["Churn_flag"]=np.where(tele.Churn=="Yes",1,0)
tele["tenure_group"] = pd.cut(tele.tenure, [0,12,60,120,9999], labels=["<1","1-5","5-10","10+"])
tele["Senior_flag"]=np.where(tele["SeniorCitizen"]==1,"Yes","No")

In [None]:
# Total Churn Rate:

churn_x = tele.Churn.unique()
churn_y = tele.Churn.value_counts()
plt.figure(figsize=[8,5])
plt.title("Churn Rate\n")
sns.barplot(churn_x,churn_y, orient="v")
plt.xlabel("Churn labels")
plt.ylabel("Churn Value Counts")

label = (tele.Churn.value_counts(normalize=True)*100).round(2)
for i in range(2):
    plt.text(x = i, y = label[i], s = label[i],horizontalalignment='center',rotation = 360, color = "white", 
             weight="bold", fontsize=30)

plt.show()

#### Observation -
- 74% of the customers donot churn
- Churn Rate is around 27%

In [None]:
#or can be depicted through this as well
print(tele["Churn_flag"].mean()*100,"%")

In [None]:
plt.figure(figsize=[10,4])
plt.subplot(1,2,1)
plt.title("Total count of customers w.r.t year group\n")
tele["tenure_group"].value_counts(normalize=True).plot.bar()
plt.xlabel("Year Group")
plt.ylabel("Total Count of Customers")
plt.subplot(1,2,2)
plt.title("Churn Frequency w.r.t year group\n")
tele.groupby("tenure_group")["Churn_flag"].mean().plot.bar()
plt.xlabel("Year Group")
plt.ylabel("Churn Frequency")
plt.show()

### Observations:

- Customers who are with company with atleast 5 years -  Total Count is high.
- Customers who have just joined the company and not more than 1 year has churn rate very high. They are more likely to switch.

In [None]:
#SeniorCitizen
label_SenCit = tele['Senior_flag'].unique()
print(label_SenCit)
value_SenCit = tele['Senior_flag'].value_counts(normalize=True)*100
print(value_SenCit)
print()

#gender
label_gender = tele['gender'].unique()
print(label_gender)
value_gender = tele['gender'].value_counts(normalize=True)*100
print(value_gender)
print(label_gender[1])
print(label_gender[0])
print()

#Partner
label_partner = tele['Partner'].unique()
print(label_partner)
value_partner = tele['Partner'].value_counts(normalize=True)*100
print(value_partner)
print(label_partner[1])
print(label_partner[0])
print()

#Dependents
label_dependents = tele['Dependents'].unique()
print(label_dependents)
value_dependents = tele['Dependents'].value_counts(normalize=True)*100
print(value_dependents)
print()

#PhoneService      
label_phone = tele['PhoneService'].unique()
print(label_phone)
value_phone = tele['PhoneService'].value_counts(normalize=True)*100
print(value_phone)
print(label_phone[1])
print(label_phone[0])
print()

#MultipleLines
label_ml = tele['MultipleLines'].unique()
print(label_ml)
value_ml = tele['MultipleLines'].value_counts(normalize=True)*100
print(value_ml)
print(label_ml[1])
print(label_ml[2])
print(label_ml[0])
print()

#InternetService
label_is = tele['InternetService'].unique()
print(label_is)
value_is = tele['InternetService'].value_counts(normalize=True)*100
print(value_is)
print(label_is[1])
print(label_is[0])
print(label_is[2])
print()

#OnlineSecurity
label_os = tele['OnlineSecurity'].unique()
print(label_os)
value_os = tele['OnlineSecurity'].value_counts(normalize=True)*100
print(value_os)
print()

#OnlineBackup
label_ob = tele['OnlineBackup'].unique()
print(label_ob)
value_ob = tele['OnlineBackup'].value_counts(normalize=True)*100
print(value_ob)
print(label_ob[1])
print(label_ob[0])
print(label_ob[2])
print()

#DeviceProtection
label_dp = tele['DeviceProtection'].unique()
print(label_dp)
value_dp = tele['DeviceProtection'].value_counts(normalize=True)*100
print(value_dp)
print()
  
#TechSupport
label_ts = tele['TechSupport'].unique()
print(label_ts)
value_ts = tele['TechSupport'].value_counts(normalize=True)*100
print(value_ts)
print()

#StreamingTV
label_stv = tele['StreamingTV'].unique()
print(label_stv)
value_stv = tele['StreamingTV'].value_counts(normalize=True)*100
print(value_stv)
print()
 
#StreamingMovies
label_sm = tele['StreamingMovies'].unique()
print(label_sm)
value_sm = tele['StreamingMovies'].value_counts(normalize=True)*100
print(value_sm)
print()

#Contract
label_c = tele['Contract'].unique()
print(label_c)
value_c = tele['Contract'].value_counts(normalize=True)*100
print(value_c)
print(label_c[0])
print(label_c[1])
print(label_c[2])
print()

#PaperlessBilling
label_pb = tele['PaperlessBilling'].unique()
print(label_pb)
value_pb = tele['PaperlessBilling'].value_counts(normalize=True)*100
print(value_pb)
print()

#PaymentMethod
label_pm = tele['PaymentMethod'].unique()
print(label_pm)
value_pm = tele['PaymentMethod'].value_counts(normalize=True)*100
print(value_pm)
print()

In [None]:
fig = plt.figure()
ax1 = fig.add_axes([0, 10, 0.5, 0.5], aspect=2)
ax1.pie(tele.groupby("Senior_flag")["Churn_flag"].mean(), textprops={'size': 25}, autopct='%1.1f%%', labels=label_SenCit, radius = 3.6)

ax2 = fig.add_axes([1.8, 10, 0.5, 0.5], aspect=2)
ax2.pie(tele.groupby("gender")["Churn_flag"].mean(), textprops={'size': 25}, autopct='%1.1f%%', labels=label_gender, radius = 3.6)

ax3 = fig.add_axes([3.6, 10, 0.5, 0.5], aspect=2)
ax3.pie(tele.groupby("Partner")["Churn_flag"].mean(), textprops={'size': 25}, autopct='%1.1f%%', labels=[label_partner[1],label_partner[0]], radius = 3.6)

ax4 = fig.add_axes([0, 8, 0.5, 0.5], aspect=2)
ax4.pie(tele.groupby("Dependents")["Churn_flag"].mean(), textprops={'size': 25}, autopct='%1.1f%%', labels=label_dependents, radius = 3.6)

ax1.set_title('SeniorCitizen', loc='center', pad = 150, fontdict={'fontsize':30})
ax2.set_title('Gender', loc='center', pad = 150, fontdict={'fontsize':30})
ax3.set_title('Partner', loc='center', pad = 150, fontdict={'fontsize':30})
ax4.set_title('Dependents', loc='center', pad = 180, fontdict={'fontsize':30})


ax5 = fig.add_axes([1.8, 8, 0.5, 0.5], aspect=2)
ax5.pie(tele.groupby("PhoneService")["Churn_flag"].mean(), textprops={'size': 25}, autopct='%1.1f%%', labels=label_phone, radius = 3.6)

ax6 = fig.add_axes([3.6, 8, 0.5, 0.5], aspect=2)
ax6.pie(tele.groupby("MultipleLines")["Churn_flag"].mean(), textprops={'size': 25}, autopct='%1.1f%%', labels=label_ml, radius = 3.6)

ax7 = fig.add_axes([0, 6, 0.5, 0.5], aspect=2)
ax7.pie(tele.groupby("InternetService")["Churn_flag"].mean(), textprops={'size': 25}, autopct='%1.1f%%', labels=label_is, radius = 3.6)

ax8 = fig.add_axes([1.8, 6, 0.5, 0.5], aspect=2)
ax8.pie(tele.groupby("OnlineSecurity")["Churn_flag"].mean(), textprops={'size': 25}, autopct='%1.1f%%', labels=label_os, radius = 3.6)


ax5.set_title('PhoneService', loc='center', pad = 200, fontdict={'fontsize':30})
ax6.set_title('MultipleLines', loc='center', pad = 200, fontdict={'fontsize':30})
ax7.set_title('InternetService', loc='center', pad = 200, fontdict={'fontsize':30})
ax8.set_title('OnlineSecurity', loc='center', pad = 200, fontdict={'fontsize':30})


ax9 = fig.add_axes([3.6, 6, 0.5, 0.5], aspect=2)
ax9.pie(tele.groupby("OnlineBackup")["Churn_flag"].mean(), textprops={'size': 25}, autopct='%1.1f%%', labels=[label_ob[1],label_ob[0],label_ob[2]], radius = 3.6)

ax10 = fig.add_axes([0, 4, 0.5, 0.5], aspect=2)
ax10.pie(tele.groupby("DeviceProtection")["Churn_flag"].mean(), textprops={'size': 25}, autopct='%1.1f%%', labels=[label_dp[0],label_dp[2],label_dp[1]], radius = 3.6)

ax11 = fig.add_axes([1.8, 4, 0.5, 0.5], aspect=2)
ax11.pie(tele.groupby("TechSupport")["Churn_flag"].mean(), textprops={'size': 25}, autopct='%1.1f%%', labels=[label_ts[0],label_ts[2],label_ts[1]], radius = 3.6)

ax12 = fig.add_axes([3.6, 4, 0.5, 0.5], aspect=2)
ax12.pie(tele.groupby("StreamingTV")["Churn_flag"].mean(), textprops={'size': 25}, autopct='%1.1f%%', labels=[label_stv[0],label_stv[2],label_stv[1]], radius = 3.6)

ax13 = fig.add_axes([0, 2, 0.5, 0.5], aspect=2)
ax13.pie(tele.groupby("StreamingMovies")["Churn_flag"].mean(), textprops={'size': 25}, autopct='%1.1f%%', labels=[label_sm[0],label_sm[2],label_sm[1]], radius = 3.6)

ax14 = fig.add_axes([1.8, 2, 0.5, 0.5], aspect=2)
ax14.pie(tele.groupby("Contract")["Churn_flag"].mean(), textprops={'size': 25}, autopct='%1.1f%%', labels=label_c, radius = 3.6)

ax15 = fig.add_axes([3.6, 2, 0.5, 0.5], aspect=2)
ax15.pie(tele.groupby("PaperlessBilling")["Churn_flag"].mean(), textprops={'size': 25}, autopct='%1.1f%%', labels=[label_pb[1],label_pb[0]], radius = 3.6)

ax16 = fig.add_axes([0, 0, 0.5, 0.5], aspect=2)
ax16.pie(tele.groupby("PaymentMethod")["Churn_flag"].mean(), textprops={'size': 25}, autopct='%1.1f%%', labels=[label_pm[2],label_pm[3],label_pm[0],label_pm[1]], radius = 3.6)


ax9.set_title('OnlineBackup', loc='center', pad = 200, fontdict={'fontsize':30})
ax10.set_title('DeviceProtection', loc='center', pad = 200, fontdict={'fontsize':30})
ax11.set_title('TechSupport', loc='center', pad = 200, fontdict={'fontsize':30})
ax12.set_title('StreamingTV', loc='center', pad = 200, fontdict={'fontsize':30})
ax13.set_title('StreamingMovies', loc='center', pad = 200, fontdict={'fontsize':30})
ax14.set_title('Contract', loc='center', pad = 200, fontdict={'fontsize':30})
ax15.set_title('PaperlessBilling', loc='center', pad = 200, fontdict={'fontsize':30})
ax16.set_title('PaymentMethod', loc='center', pad = 200, fontdict={'fontsize':30})

plt.show()

##### Observations w.r.t Churn rate

**Senior Citizens**, Churn rate or churn resposne higher means that older people are more likely to switch.

**Customers with no partner** that is **singles** are more likely to churn.

**Customers with no dependent** are more likely to churn.

#### Internet Service
**Customers with Fibre Optic as internet service** are more likely to churn as compared to DSL.

**Customers with Online Security** are very less likely to churn.

**Customers with Online Backup** are very less likely to churn.

**Customers with no Device Protection** are more likely to churn.

**Customers with Streaming TV** are likely to churn but slightly less as compared to one who do not have streaming tv.

**Customers with Streaming Movies** are likely to churn but slightly less as compared to one who do not have streaming movies.

Customers with **Month-to-Month** contract are likely to churn more.

Customers opted for **Paperless Billing** are likely to churn more.

Customers opted for **Electronic Check** as Payment Method are likely to churn more as compared to other payment methods.


In [None]:
plt.figure(figsize=[15,3])
plt.subplot(1,2,1)
plt.title("Total Charges Vs. TenureGroup_Churn_Rate\n")
tele.groupby(["tenure_group","Churn"])["TotalCharges"].sum().plot.bar()
plt.xlabel("Tenuregroup and its Churn rate")
plt.ylabel("Total Charges")
plt.subplot(1,2,2)
plt.title("Total Charges Vs. TenureGroup_Churn_Rate\n")
tele.groupby(["tenure_group","Churn"])["TotalCharges"].sum().plot()
plt.xlabel("\nTenuregroup and its Churn rate")
plt.show()

#### Observation:

Customer who are with the company for atleast 5 years or more have Total Charges very high and they are not likely to churn, hence most **Loyal** customers.

Chustomers more likely to churn can be forecasted on the basis of there total charge as we cans see in each tenure_group, the total charge of the churn customers is always very less as compared to loyal customers.

The tenure_group less than a year has total_charges equal for both churned and loyal customers. Hence, if there total charge doesnot increase after few months that means they are more likely to churn

In [None]:
plt.figure(figsize=[15,3])
plt.subplot(1,2,1)
plt.title("Monthly Charges Vs. TenureGroup_Churn_Rate\n")
tele.groupby(["tenure_group","Churn"])["MonthlyCharges"].sum().plot.bar()
plt.xlabel("Tenuregroup and its Churn rate")
plt.ylabel("Monthly Charges")
plt.subplot(1,2,2)
plt.title("Monthly Charges Vs. TenureGroup_Churn_Rate\n")
tele.groupby(["tenure_group","Churn"])["MonthlyCharges"].sum().plot()
plt.xlabel("\nTenuregroup and its Churn rate")
plt.show()

#### Observation:

Customers monthly charges are very high for loyal customers of tenure group (1-5 and 5-10).

Customers who are likely to churn out have monthly charges very less as compared to loyal customers.

The tenure group of **<1**, is highly unpredicatble, company has to keep a keen watch on them and see if their monthly charges gets increase along with tenure, and if "yes", then they will not churn out.

In [None]:
res = pd.pivot_table(data=tele, index="tenure_group", columns="SeniorCitizen", values="Churn_flag")
sns.heatmap(res, annot=True, cmap="Purples", center=0.26537)
plt.show()

#### Churn Rate of Senior Citizen whose tenure is less than a year is very high

In [None]:
res = pd.pivot_table(data=tele, index="tenure_group", columns=["gender"], values="Churn_flag")
sns.heatmap(res, annot=True, cmap="Purples", center=0.26537)
plt.show()

#### Churn rate of females a bit higher as compared to males in tenure_group less than a year

In [None]:
res = pd.pivot_table(data=tele, index="tenure_group", columns=["Partner","PhoneService"], values="Churn_flag")
sns.heatmap(res, annot=True, cmap="Purples", center=0.26537)
plt.show()

#### Customers less than a year with company plus having no partners but have phone service are more likely to churn.

In [None]:
res = pd.pivot_table(data=tele, index="tenure_group", columns=["gender","PaymentMethod"], values="Churn_flag")
sns.heatmap(res, annot=True, cmap="Purples", center=0.26537)
plt.show()

#### Customers less than a year span no matter what the gender is payment method electronic check ones are more likely to churn

In [None]:
res = pd.pivot_table(data=tele, index="tenure_group", columns=["gender","Contract"], values="Churn_flag")
sns.heatmap(res, annot=True, cmap="Purples", center=0.26537)
plt.show()

#### Customers less than a year span no matter what the gender is contract month-to-month ones are more likely to churn. In females a bit higher.

In [None]:
plt.figure(figsize=[15,6])
res = pd.pivot_table(data=tele, index="tenure_group", columns=["Contract","PaymentMethod"], values="Churn_flag")
sns.heatmap(res, annot=True, cmap="Purples", center=0.26537)
plt.show()

#### Customers less than a year with contract of Month-to-Month and payment mode:Electronic Check are more likely to churn.

In [None]:
plt.figure(figsize=[10,8])
sns.pairplot(data=tele,vars=["tenure","MonthlyCharges","TotalCharges"],hue="Contract")
plt.show()

#### The above pairplot -  shows that:

- No Linear relation between monthly charges and tenure.
- Customers with Two Year Contract have Total Charges very high as there tenure also gets increase.
- As tenure increase, customers more likely to switch from Month to Month Contract to Two Year Contract.