## Data Overview

In [None]:
#importing libraries
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
import matplotlib.ticker as mtick
import seaborn as sns

import os
print(os.listdir("../input"))

In [None]:
#reading data
df = pd.read_csv('../input/telco-customer-churn/WA_Fn-UseC_-Telco-Customer-Churn.csv')

In [None]:
#first few rows
df.head()

In [None]:
# total rows and columns
df.shape

In [None]:
#column names
df.columns

In [None]:
#getting data summary
df.describe()

In [None]:
#column types
df.dtypes

## Data Preparation

In [None]:
# "TotalCharges" is numeric, but the data type is 'object'
# It needs to be converted to numeric
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')
df.dtypes

In [None]:
#changing vlues of SeniorCitizen from 0 and 1 to Yes and No
df["SeniorCitizen"] = df["SeniorCitizen"].replace({1:"Yes",0:"No"})

In [None]:
# Checking the number of missing values
df.isnull().sum()

In [None]:
# There are 11 missing records in 'TotalCharges'. We will drop these missing records
# Dropping the missing records
df.dropna(inplace=True)

In [None]:
df.describe()

In [None]:
# Dropping 'customerID' column
df.drop('customerID', axis=1, inplace=True)

In [None]:
df.head()

In [None]:
# Converting target variable to binary number
df['Churn'].replace(to_replace='Yes', value=1, inplace=True)
df['Churn'].replace(to_replace='No',  value=0, inplace=True)

df.head()

## EDA

In [None]:
#converting all the categorical variables into dummy variables
df_dummies = pd.get_dummies(df)
df_dummies.head()

### 1. Correlation

In [None]:
# correlation of Churn with other variables
plt.figure(figsize=(20,10))
df_dummies.corr()['Churn'].sort_values(ascending = False).plot(kind='bar')

In [None]:
#correlation matrix
corr = df_dummies.corr()
corr.style.background_gradient(cmap='coolwarm')

### 2. Churns Distribution

In [None]:
colors = ['blue','green']
val = (df['Churn'].value_counts()*100.0 /len(df)).plot(kind='bar', stacked = True, rot = 0,color = colors)
val.yaxis.set_major_formatter(mtick.PercentFormatter())
val.set_ylabel('% Customers')
val.set_xlabel('Churn')
val.set_title('Churn Distribution')

totals = []

for i in val.patches:
    totals.append(i.get_width())

total = sum(totals)

for i in val.patches:
    val.text(i.get_x()+.05, i.get_height()-5, \
            str(round((i.get_height()/total), 1))+'%',
            fontsize=14,
            color='white',
           weight = 'bold')

Looking at the distribution of the target variable, it can be found that 74% records for non-churn customers and 26 % of records are for churn customers.

### 3. Gender

In [None]:
colors = ['blue','green']
val = (df['gender'].value_counts()*100.0 /len(df)).plot(kind='bar',
                                                       stacked = True,rot = 0,color = colors)
val.yaxis.set_major_formatter(mtick.PercentFormatter())
val.set_ylabel('% Customers')
val.set_xlabel('Gender')
val.set_ylabel('% Customers')
val.set_title('Gender Distribution')

totals = []

for i in val.patches:
    totals.append(i.get_width())

total = sum(totals)

for i in val.patches:
    val.text(i.get_x()+.05, i.get_height()-5, \
            str(round((i.get_height()/total), 1))+'%',
            fontsize=14,
            color='white',
           weight = 'bold')

### 4. Senior Citizens

In [None]:
val = (df['SeniorCitizen'].value_counts()*100.0 /len(df))\
.plot.pie(autopct='%.1f%%', labels = ['No', 'Yes'],figsize =(5,5), fontsize = 12 )                                                                           
val.yaxis.set_major_formatter(mtick.PercentFormatter())
val.set_ylabel('Senior Citizens',fontsize = 12)
val.set_title('% of Senior Citizens', fontsize = 12)

In [None]:
colors = ['blue','green']
senior_churn = df.groupby(['SeniorCitizen','Churn']).size().unstack()

val = (senior_churn.T*100.0 / senior_churn.T.sum()).T.plot(kind='bar', width = 0.2,stacked = True, rot = 0,  figsize = (8,6),color = colors)
val.yaxis.set_major_formatter(mtick.PercentFormatter())
val.legend(loc='center',prop={'size':14},title = 'Churn')
val.set_ylabel('% Customers')
val.set_title('Churn by Seniority Level',size = 14)

for i in val.patches:
    width, height = i.get_width(), i.get_height()
    x, y = i.get_xy() 
    val.annotate('{:.0f}%'.format(height), (i.get_x()+.25*width, i.get_y()+.4*height),
                color = 'white',
               weight = 'bold',size =14)

### 5. Dependents and Partners

#### 5.1 Dependents & Churn

In [None]:
pd.crosstab(df['Churn'], df['Dependents'],margins = True)

In [None]:
plt.figure(figsize = (5,5))
sns.set(style = 'whitegrid')
sns.countplot(df['Dependents'], hue=df['Churn'])
plt.show()

#### Churn rate is higher in customers who don't have dependents

#### 5.2 Dependents & Partner

In [None]:
pd.crosstab(df['Dependents'], df['Partner'],margins = True)

In [None]:
plt.figure(figsize = (5,5))
sns.set(style = 'whitegrid')
sns.countplot(df['Partner'], hue=df['Dependents'])
plt.show()

#### 5.3. Partner and Churn

In [None]:
pd.crosstab(df['Partner'], df['Churn'],margins = True)

In [None]:
plt.figure(figsize = (5,5))
sns.set(style = 'whitegrid')
sns.countplot(df['Partner'], hue=df['Churn'])
plt.show()

Churn rate is higher for customers having no partners

### 7. Billing

In [None]:
pd.crosstab(df['PaperlessBilling'], df['Churn'],margins = True)

In [None]:
plt.figure(figsize = (5,5))
sns.set(style = 'whitegrid')
sns.countplot(df['PaperlessBilling'], hue=df['Churn'])
plt.show()

### 8. Contract Type

In [None]:
df['Contract'].value_counts().plot(kind='bar')

In [None]:
pd.crosstab(df['Contract'], df['Churn'],margins = True)

In [None]:
plt.figure(figsize = (5,5))
sns.set(style = 'whitegrid')
sns.countplot(df['Contract'], hue=df['Churn'])
plt.show()

#### 8.1 Contract & Dependent

In [None]:
pd.crosstab(df['Contract'], df['Dependents'],margins = True)

In [None]:
plt.figure(figsize = (5,5))
sns.set(style = 'whitegrid')
sns.countplot(df['Contract'], hue=df['Dependents'])
plt.show()

In [None]:
fig, (ax1,ax2,ax3) = plt.subplots(nrows=1, ncols=3, sharey = True, figsize = (20,10))

ax = sns.distplot(df[df['Contract']=='Month-to-month']['tenure'],
                   hist=True,kde=False, ax=ax1)
ax.set_ylabel('# of Customers')
ax.set_xlabel('months')
ax.set_title('Month to Month Contract')

ax = sns.distplot(df[df['Contract']=='One year']['tenure'],
                   hist=True, kde=False,ax=ax2)
ax.set_xlabel('months',size = 14)
ax.set_title('One Year Contract',size = 14)

ax = sns.distplot(df[df['Contract']=='Two year']['tenure'],
                   hist=True, kde=False,ax=ax3)

ax.set_xlabel('months')
ax.set_title('Two Year Contract')

## Modeling

In [None]:
y = df_dummies['Churn'].values
X = df_dummies.drop(columns = ['Churn'])

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=0)
    
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
sc.fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)

### Perceptron

In [None]:
from sklearn.linear_model import Perceptron

In [None]:
ppn_para = {'max_iter': [100, 300, 500, 1000, 1500, 2000], 'eta0': [0.001, 0.01, 0.05, 0.1, 0.5],
           'random_state': [0], 'tol': [0.001, 0.01, 0.05, 0.1, 0.5]}
ppn = GridSearchCV(estimator=Perceptron(), param_grid=ppn_para, scoring='accuracy', n_jobs=-1, cv=10)
ppn.fit(X_train_std, y_train)

In [None]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report, confusion_matrix

In [None]:
y_ppn_pred = ppn.predict(X_test_std)
print('Accuracy: %.4f' % accuracy_score(y_test, y_ppn_pred))
print(confusion_matrix(y_test, y_ppn_pred))
print(classification_report(y_test,y_ppn_pred))
print('Best score:', ppn.best_score_)

In [None]:
ppn.best_estimator_.get_params()

### Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
lr_para = {"C":np.logspace(-3,3,7), "penalty":["l1","l2"]}
lr = GridSearchCV(estimator=LogisticRegression(), param_grid=lr_para, scoring='accuracy', n_jobs=-1, cv=10)
lr.fit(X_train_std, y_train)

In [None]:
y_lr_pred = lr.predict(X_test_std)
print('Accuracy: %.4f' % accuracy_score(y_test, y_lr_pred))
print(confusion_matrix(y_test, y_lr_pred))
print(classification_report(y_test,y_lr_pred))
print('Best Params:',lr.best_params_)
print('Best score:', lr.best_score_)

In [None]:
lr.best_estimator_.get_params()

In [None]:
lr_plot = LogisticRegression(penalty='l1', dual=False, tol=0.0001, 
                             C=10.0, fit_intercept=True, intercept_scaling=1, 
                             class_weight=None, random_state=None, solver='warn', 
                             max_iter=100, multi_class='warn', verbose=0, 
                             warm_start=False, n_jobs=None)

In [None]:
lr_plot.fit(X_train_std, y_train)

In [None]:
y_lr_plot_pred = lr_plot.predict(X_test_std)

In [None]:
print('Accuracy: %.4f' % accuracy_score(y_test, y_lr_plot_pred))

In [None]:
plt.figure(figsize=(10,10))
plt.title('Top 10 Weights of Variables')
weights = pd.Series(lr_plot.coef_[0],
                 index=X.columns.values)
weights.sort_values(ascending = False)[:10].plot(kind='bar')


In [None]:
plt.figure(figsize=(8,8))
plt.title('Least 10 Weights of Variables')
weights.sort_values(ascending = False)[-10:].plot(kind='bar')

In [None]:
from sklearn.metrics import roc_auc_score,roc_curve,scorer
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score

In [None]:

def plot_roc_curve(fpr, tpr):
    plt.plot(fpr, tpr, color='orange', label='ROC')
    plt.plot([0, 1], [0, 1], color='darkblue', linestyle='--')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver Operating Characteristic (ROC) Curve')
    plt.legend()
    plt.show()

probs = lr.predict_proba(X_test_std)
probs=probs[:,1]
auc = roc_auc_score(y_test, probs)
print('AUC: %.2f' % auc)
fpr, tpr, thresholds = roc_curve(y_test, probs)
plot_roc_curve(fpr, tpr)

### Support Vector Machine

In [None]:
from sklearn.svm import SVC

In [None]:
svm = SVC(kernel='linear') 
svm.fit(X_train_std,y_train)

In [None]:
y_svm_pred = svm.predict(X_test_std)
print('Accuracy: %.4f' % accuracy_score(y_test, y_svm_pred))
print(confusion_matrix(y_test, y_svm_pred))
print(classification_report(y_test,y_svm_pred))


### Decision Tree

In [None]:
from sklearn.tree import DecisionTreeClassifier

In [None]:
clf_param = {'max_depth':np.arange(1, 49),
             'criterion': ['gini', 'entropy']
            }
clf = GridSearchCV(estimator=DecisionTreeClassifier(), param_grid=clf_param, 
                   scoring='accuracy', n_jobs=-1, cv=10)

In [None]:
clf.fit(X_train_std, y_train)

In [None]:
y_clf_pred = svm.predict(X_test_std)
print('Accuracy: %.4f' % accuracy_score(y_test, y_clf_pred))
print(confusion_matrix(y_test, y_clf_pred))
print(classification_report(y_test,y_clf_pred))
print('Best score:', clf.best_score_)

In [None]:
clf.best_estimator_.get_params()

### Random Forest

In [None]:
from sklearn.ensemble import RandomForestClassifier

In [None]:
rfl = RandomForestClassifier(n_estimators=1000 , oob_score = True, n_jobs = -1,
                                  random_state =50, max_features = "auto",
                                  max_leaf_nodes = 30)

In [None]:
rfl.fit(X_train_std, y_train)

In [None]:
y_rfl_pred = rfl.predict(X_test_std)
print('Accuracy: %.4f' % accuracy_score(y_test, y_rfl_pred))
print(confusion_matrix(y_test, y_rfl_pred))
print(classification_report(y_test,y_rfl_pred))

In [None]:
rfl_plot = RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini', max_depth=1500,
                                 max_features=3, max_leaf_nodes=None,min_impurity_decrease=0.0,
                                 min_impurity_split=None, min_samples_leaf=3, min_samples_split=8,
                                 min_weight_fraction_leaf=0.0, n_estimators=80, n_jobs=None, random_state=None,
                                 verbose=0, warm_start=False)
rfl_plot.fit(X_train_std, y_train)

In [None]:
importances = rfl_plot.feature_importances_ 
indices = np.argsort(importances)[::-1]
feat_labels = df_dummies.columns[1:]

for f in range(X_train_std.shape[1]):
    print("%2d) %-*s %f" % (f + 1, 30, 
                            feat_labels[indices[f]], 
                            importances[indices[f]]))

In [None]:
plt.figure(figsize=(10,8))
plt.title('Top 10 Important Variables (Random Forest)')
weights = pd.Series(importances,
                 index=X.columns.values)
weights.sort_values()[-11:].plot(kind = 'barh')

### ADA Boost

In [None]:
from sklearn.ensemble import AdaBoostClassifier

In [None]:
ada = AdaBoostClassifier()
ada.fit(X_train_std,y_train)


In [None]:
y_ada_pred = ada.predict(X_test_std)
print('Accuracy: %.4f' % accuracy_score(y_test, y_ada_pred))
print(confusion_matrix(y_test, y_ada_pred))
print(classification_report(y_test,y_ada_pred))

### Gradient Boosting

In [None]:
from sklearn.ensemble import GradientBoostingClassifier

In [None]:
gbc = GradientBoostingClassifier()
gbc.fit(X_train_std, y_train)

In [None]:
y_gbc_pred = gbc.predict(X_test_std)
print('Accuracy: %.4f' % accuracy_score(y_test, y_gbc_pred))
print(confusion_matrix(y_test, y_gbc_pred))
print(classification_report(y_test,y_gbc_pred))


### Neural Network

In [None]:
from sklearn.neural_network import MLPClassifier

In [None]:
nn = MLPClassifier()
nn.fit(X_train_std, y_train)

In [None]:
y_nn_pred = nn.predict(X_test_std)
print('Accuracy: %.4f' % accuracy_score(y_test, y_nn_pred))
print(confusion_matrix(y_test, y_nn_pred))
print(classification_report(y_test,y_nn_pred))


#### XGBoost

In [None]:
from xgboost import XGBClassifier

In [None]:
xgb = XGBClassifier()

In [None]:
xgb.fit(X_train_std, y_train)

In [None]:
y_xgb_pred = xgb.predict(X_test_std)
print('Accuracy: %.4f' % accuracy_score(y_test, y_xgb_pred))
print(confusion_matrix(y_test, y_xgb_pred))
print(classification_report(y_test,y_xgb_pred))


### Conclusion