In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
# Importing the dataset
df = pd.read_csv('../input/telecom-churn-datasets/churn-bigml-80.csv')

In [None]:
df.head()

In [None]:
# Checking for null values

df.info()

#### Clearly we can see that no null values are present in our dataset, thereby we can proceed with uni-variate analysis.

# 1.Basic visualisation of the data 

In [None]:
plt.figure(figsize = (12,6))
sns.countplot(data = df, x = 'International plan')
plt.xlabel('International plan', fontsize = 15)
plt.title('How many users have opted for an International Plan?', fontsize = 20)
plt.show()

## Inference:
- We can see that not many users opt for International plans.

In [None]:
plt.figure(figsize = (12,6))
sns.countplot(data = df, x = 'Voice mail plan')
plt.xlabel('Voice mail plan', fontsize = 15)
plt.title('How many users opt for a voice mail plan?', fontsize = 20)
plt.show()

## Inference:
- Many users do not opt for a voice mail plan

In [None]:
plt.figure(figsize = (18,15))
plt.subplot(1,3,1)
sns.boxplot(data=df, y = 'Total day calls', color = 'lightblue')
plt.ylabel('Total Day Calls', fontsize = 15)

plt.subplot(1,3,2)
sns.boxplot(data = df, y = 'Total eve calls', color = 'lightgreen')
plt.ylabel('Total Eve Calls', fontsize = 15)

plt.subplot(1,3,3)
sns.boxplot(data=df, y = 'Total night calls', color = 'white')
plt.ylabel('Total Night Calls', fontsize = 15)

plt.show()

## Inference:
- We can see that more calls are made in the morning with respect to the evening and night when it is the lowest

In [None]:
plt.figure(figsize = (18,15))
plt.subplot(1,3,1)
sns.boxplot(data=df, y = 'Total day minutes', color = 'lightblue')
plt.ylabel('Total Day Minutes', fontsize = 15)

plt.subplot(1,3,2)
sns.boxplot(data = df, y = 'Total eve minutes', color = 'lightgreen')
plt.ylabel('Total Eve Minutes', fontsize = 15)

plt.subplot(1,3,3)
sns.boxplot(data=df, y = 'Total night minutes', color = 'white')
plt.ylabel('Total Night Minutes', fontsize = 15)

plt.show()

## Inference:
- Despite having more calls made during the day, we can see that users do not spend longer time on the calls in the morning.
- Users tend to talk for longer time in the afternoon than other times.

In [None]:
plt.figure(figsize = (18,15))
plt.subplot(1,3,1)
sns.boxplot(data=df, y = 'Total day charge', color = 'lightblue')
plt.ylabel('Total Day Charges', fontsize = 15)

plt.subplot(1,3,2)
sns.boxplot(data = df, y = 'Total eve charge', color = 'lightgreen')
plt.ylabel('Total Eve Charges', fontsize = 15)

plt.subplot(1,3,3)
sns.boxplot(data=df, y = 'Total night charge', color = 'white')
plt.ylabel('Total Night Charges', fontsize = 15)

plt.show()

## Inference:
- The charges are maximum in the evening time and lowest in the night time.
- These plots are in conjunction with the minutes spoken.

In [None]:
plt.figure(figsize = (10,6))
sns.countplot(data = df, x = 'Churn')
plt.show()

## Inference:
- Most people do not leave the service

In [None]:
df.head()

In [None]:
# Creating a new dataframe dftotal:
dftotal = pd.DataFrame(df['Churn'])

In [None]:
#Creating three columns for total minutes, total calls and total charges
dftotal['Total minutes'] = df['Total day minutes'] + df['Total eve minutes'] + df['Total night minutes']
dftotal['Total calls'] = df['Total day calls'] + df['Total eve calls'] + df['Total night calls']
dftotal['Total charges'] = df['Total day charge'] + df['Total eve charge'] + df['Total night charge']

In [None]:
dftotal.head()

In [None]:
plt.figure(figsize=(18,15))
plt.subplot(1,3,1)
sns.boxplot(data = dftotal, y = 'Total minutes', color = 'lightblue')
plt.ylabel('Total minutes', fontsize = 15)

plt.subplot(1,3,2)
sns.boxplot(data = dftotal, y = 'Total calls', color = 'lightgreen')
plt.ylabel('Total calls', fontsize = 15)

plt.subplot(1,3,3)
sns.boxplot(data = dftotal, y = 'Total charges', color = 'royalblue')
plt.ylabel('Total Charges', fontsize = 15)

## Inference:
- Each attribute is consistent with each other suggesting normal pricing for calls made.

# 2. Bi-Variate Analysis

In [None]:
# Building a heatmap to check correlation between each variable

corr = df.corr()

In [None]:
plt.figure(figsize = (15,8))
sns.heatmap(data = corr, annot = True, cmap = 'coolwarm')
plt.show()

## Observation:
#### With regard to the heatmap we can see high correlation bettween the following variables:
##### With respect to churn
- Total day minutes to Churn at 0.2 
- Total day charge to Churn at 0.2
- Customer service calls to Churn at 0.2

##### Other relations:
- Total day charge to Total day minutes at 1
- Total evening charge to total eve minutes at 1
- total night charge to total night minutes at 1 

In [None]:
# How does Total day minutes spoken relate to the Churn rate
plt.figure(figsize=(12,6))
sns.barplot(data = df, x = 'Customer service calls', y = 'Total day charge', hue = 'Churn', ci = False)
plt.xlabel('Customer service calls', fontsize = 15)
plt.ylabel('Total day charges', fontsize = 15)
plt.show()



## Inference:
- We can see that if customer service calls are made more than 7 times, then the service is bound to be cancelled. This also comes with high charges imposed on the user.
- At the same time we can see that many users leave the service over 1 to 3 calls made to the customer service when are charged more. Analysis of the total day charge to the minutes spoken over churn rate could give us a better understanding of the same.

In [None]:
plt.figure(figsize = (15,6))
sns.jointplot(data = df, x = 'Total day minutes', y = 'Total day charge',
                hue = 'Churn', alpha = 0.3)
plt.xlabel('Total Day Minutes', fontsize = 13)
plt.ylabel('Total Day Charges', fontsize = 13)
plt.show()

## Inference:
- A linear relationship can be observed between the two Variables.

- Visible Churn can be seen with higher charges imposed when spoken for longer times.

In [None]:
plt.figure(figsize = (15,8))
plt.subplot(1,3,1)
sns.scatterplot(data = df, x = 'Total day charge', y = 'Total day minutes',
                hue = 'Churn', alpha = 0.6)
plt.xlabel('Total Day Charge', fontsize = 9)
plt.ylabel('Total Day Minutes', fontsize = 9)

plt.subplot(1,3,2)
sns.scatterplot(data = df, x = 'Total eve charge', y = 'Total eve minutes',
                hue = 'Churn', alpha = 0.6)
plt.xlabel('Total Eve Charge', fontsize = 9)
plt.ylabel('Total Eve Minutes', fontsize = 9)

plt.subplot(1,3,3)
sns.scatterplot(data = df, x = 'Total night charge', y = 'Total night minutes',
                hue = 'Churn', alpha = 0.6)
plt.xlabel('Total Night Charge', fontsize = 9)
plt.ylabel('Total Night Minutes', fontsize = 9)

plt.show()

## Inference:
- We can observe from the three scatterplots that the churn rate is maximum times True in the Day.
- In the evening and night we can see that the churn rate is not so relevant.

## How do Users with international plan relate with the churn?

In [None]:
plt.figure(figsize = (15,8))
sns.catplot(data = df, x ='International plan', hue = 'Churn',
           kind = 'count')
plt.xlabel('International plan status', fontsize = 12)
plt.ylabel('Count', fontsize = 12)

## Inference:
- Users who Have an international plan are relatively very less than those who do not.
- We can also observe that people who have an international plan have almost equal churn rate.

# Conclusion
- Based on my observation I can see that most people who leave the service are the ones who use the service in the day/morning.
- It can also be observed that most people who use the service in the morning speak for shorter amounts of time but make more calls.
- International plan users are more consistent with their churn w.r.t the ones who do not have the service.

#### Prescription:
- Introducing plans which minimize costs for more number of calls can be used. 
- Decreasing the prices as the talk-time increases can be an effective way to reduce the churn.
- Improvement in the customer service can be done to reduce the number of calls which cause the churn.