In [None]:
import warnings
warnings.filterwarnings('ignore')

import eli5
from eli5.sklearn import PermutationImportance

import numpy as np
import pandas as pd 
import seaborn as sns
import matplotlib.pyplot as plt
from IPython.display import set_matplotlib_formats
set_matplotlib_formats('retina')
import seaborn as sns
sns.set(palette='viridis_r',context='notebook',
        font='ubuntu', style='white')

from xgboost import XGBClassifier
from sklearn.model_selection import cross_val_predict
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV, train_test_split, StratifiedKFold
from sklearn.ensemble import StackingClassifier
from sklearn.neighbors import KNeighborsClassifier

from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import plot_precision_recall_curve
from sklearn.metrics import plot_roc_curve
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score

from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler


# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
train = pd.read_csv('../input/telco-customer-churn/WA_Fn-UseC_-Telco-Customer-Churn.csv').\
drop(['customerID'], axis=1)
train.head()

## A Brief Exploratory Analysis
-----------

In [None]:
train.info()

In [None]:
for column in train.drop(['tenure','MonthlyCharges','TotalCharges'], axis=1).columns:
    print(column,'-',train[column].unique())

In [None]:
train.describe(include='object').T

In [None]:
train.TotalCharges = train.TotalCharges.apply(pd.to_numeric, errors='coerce')
train.TotalCharges = train.TotalCharges.fillna(train.TotalCharges.median())

In [None]:
plt.figure(figsize=(2,6))
sns.countplot(x=train.Churn, edgecolor='darkgray', 
              alpha=.95)
sns.despine()

In [None]:
train.TotalCharges = train.TotalCharges.apply(pd.to_numeric, errors='coerce')
train.TotalCharges = train.TotalCharges.fillna(train.TotalCharges.median())

In [None]:
sns.pairplot(train, hue='Churn', markers='x')
plt.show()

In [None]:
fig, axes = plt.subplots(ncols=3, figsize=(8,3))

sample = train[['tenure','MonthlyCharges','TotalCharges']]

for ax, column in zip(axes.ravel(),sample):
    sns.boxplot(x=train.Churn,
          y=sample[column], ax=ax)
sns.despine()
plt.tight_layout()

Non-surprisingly, **churned have a lower median tenure than a non-churned**. But they are much higher in terms of MontlyCharges and spend lesser money in total.

So, there could be some insights from tenure & monthly charges.

In [None]:
import warnings
warnings.filterwarnings('ignore')

melted = pd.melt(train, id_vars=['Churn'], value_vars = ['gender', 'SeniorCitizen',
        'Contract','PhoneService','MultipleLines','TechSupport'])
melted = melted.sort_values(['value','variable']).rename(
                            columns={'variable':'var.'})

g = sns.FacetGrid(melted, col='Churn', row='var.', aspect=1.15,
                  hue = 'Churn',sharex=False)
g.map(sns.countplot, 'value')

g.set_xticklabels(rotation=25)
plt.tight_layout()

It also looks like that those who have no internet service on Tech Support are at the group of risk.

Type of contact could also have a significant meaning to Churn. It is interesting to look at Month-to-Month Contracts closely.

## Preprocessing the Data
---

I would take a transition from categorical values to scaled features by using some steps to further prediction

1. Scaling of non-bool values like `tenure`,`TotalCharges`,`Monthly Charges` with a help of `MinMaxScaler`
2. Encode categorical values with $>2$ choices and redistribute them as a new binary feature (with `OneHotEncoder`).
3. Transform categorical values with binary choice to `[0,1]` view

In [None]:
train2 = train.copy()

lec = LabelEncoder()

train2.loc[:,'gender':'Dependents']=train2.loc[:,'gender':'Dependents'].transform(lec.fit_transform)
train2.loc[:,'PhoneService':'PaymentMethod']=train2.loc[:,'PhoneService':'PaymentMethod'].\
transform(lec.fit_transform)
train2['Churn'] = lec.fit_transform(train2['Churn'])

mms = MinMaxScaler()
train2[['tenure','MonthlyCharges','TotalCharges']] =\
mms.fit_transform(train2[['tenure','MonthlyCharges','TotalCharges']])

In [None]:
features = train2.loc[:,'gender':'TotalCharges']
target = train2['Churn']

fig = plt.figure(figsize=(24,12))
ax = sns.heatmap(train2.corr(), cmap='viridis_r',
      linecolor='black', lw=.65,annot=True, alpha=.95)
ax.set_xticklabels([x[:7] for x in train2.columns])
ax.set_yticklabels([y[:7] for y in train2.columns])

plt.show()

The dataset is imbalanced. That is why I wouldn't use ROC AUC score as a primary metric (but for some reasons, I will caluclate it as an additional one).

For this task of churn classification **accuracy score** is used.

## Model Selection & Prediction
---------

In [None]:
X_train, X_valid, y_train, y_valid = train_test_split(
    features, target,test_size=.2,random_state=42)

gbc = GradientBoostingClassifier(random_state=42)
rfc = RandomForestClassifier(random_state=42,min_samples_leaf=30)
svc = SVC(random_state=42,degree=3)
lgc = LogisticRegression(random_state=42)
knn = KNeighborsClassifier(n_jobs=5)

estimators = [('Random Forest Classifier',rfc),
              ('Support Vector Machines',svc),
              ('Logistic Regression',lgc), 
              ('Gradient Boosting Classifier',gbc),
              ('KNN Classifier',knn)]

In [None]:
def confusion_plot(label, y_valid, y_pred, ax=None):
    
    co_ma = confusion_matrix(y_valid, y_pred)
    groups = ['True Neg','False Pos','False Neg','True Pos']
    counts = [int(value) for value in co_ma.flatten()]
    shares = ['{0:.2%}'.format(value) for value in
             co_ma.flatten()/np.sum(co_ma)]
    labels = [f'{v1}\n{v2}\n{v3}' for v1, v2, v3 in
              zip(groups,counts,shares)]
    labels = np.asarray(labels).reshape(2,2)
    sns.heatmap(co_ma,annot=labels,cmap='binary', alpha=.55, ax=ax,
             cbar=True, fmt='', linewidth=1,linecolor='black')
    plt.axis('off')
    plt.title(f'Confusion Matrix for {label}')

                                            
def show_metrics(metrics):
    try:return pd.DataFrame(metrics)
    except:return pd.DataFrame([metrics])

In [None]:
metrics = []

for est in estimators:
    
    fig, axes = plt.subplots(ncols=3, figsize=(15,4))
   
    mod = est[1].fit(X_train, y_train)
    y_pred = mod.predict(X_valid)
    plot_precision_recall_curve(mod, X_valid, y_valid, 
                    y_pred, ax=axes[0], color='black')
    plot_roc_curve(mod, X_valid, y_valid,ax = axes[1], color='black')

    axes[0].set_title(f'Precision-Recall Curve for {est[0]}')
    axes[1].set_title(f'ROC Curve for {est[0]}')
    axes[1].plot([1,0],[1,0], c='green',ls='--')
    confusion_plot(est[0],y_valid, y_pred, axes[2])
    for ax in axes.ravel():
        ax.legend(frameon=False)
        
    scores = {}
    scores['classifier'] = est[0]
    scores['accuracy_score'] = accuracy_score(y_valid, y_pred)
    scores['roc_auc_score']=roc_auc_score(y_valid, y_pred)
    scores['f1_score'] = f1_score(y_valid,y_pred)

    plt.tight_layout()
    metrics.append(scores)

show_metrics(metrics)

In [None]:
params={'colsample_bytree': 0.6, 'gamma': 5, 'max_depth': 3, 'min_child_weight': 5.0, 'subsample': 1.0}


xgb = XGBClassifier(random_state=42,
                    **params,cv=5, verbosity=0)

stacked_metrics={}

reg = StackingClassifier(estimators=estimators,
    final_estimator=xgb)
cls_name = 'Stacking Classifier'

regmodel = reg.fit(X_train, y_train)
y_pred = reg.predict(X_valid)

fig, axes = plt.subplots(ncols=3, figsize=(14,4))

plot_precision_recall_curve(regmodel,X_valid,y_valid, 
            y_pred, color='black', ax=axes[0])
plot_roc_curve(regmodel,X_valid,y_valid, 
               color='black', ax=axes[1])

axes[0].set_title(f'Precision-Recall Curve for {cls_name}')
axes[1].plot([1,0],[1,0], c='green',ls='--')
axes[1].set_title(f'ROC Curve for {cls_name}')
confusion_plot(cls_name,y_valid, y_pred, axes[2])
for ax in axes:
    ax.legend(frameon=False)

stacked_metrics['classifier'] = cls_name
stacked_metrics['accuracy_score'] = accuracy_score(y_valid, y_pred)
stacked_metrics['ROC AUC score'] = roc_auc_score(y_valid, y_pred)
stacked_metrics['f1_score'] = f1_score(y_valid, y_pred)
    
plt.tight_layout()

pd.DataFrame([stacked_metrics])

 <sub>1: these parameters for XGBoost Classifier were got after a GridSearch</sub>

In [None]:
perm = PermutationImportance(regmodel,random_state=17).fit(X_valid,y_valid)
eli5.show_weights(perm, feature_names=X_valid.columns.values)

$\implies$ A stacked model has accuracy score of $0.811923$... after a grid search and can be used for predictions.