# Loading Libraries

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import roc_curve, classification_report

# Reading the data

In [2]:
churn_data = pd.read_csv('telecom_churn.csv')
churn_data.head()

Unnamed: 0,Churn,AccountWeeks,ContractRenewal,DataPlan,DataUsage,CustServCalls,DayMins,DayCalls,MonthlyCharge,OverageFee,RoamMins
0,0,128,1,1,2.7,1,265.1,110,89.0,9.87,10.0
1,0,107,1,1,3.7,1,161.6,123,82.0,9.78,13.7
2,0,137,1,0,0.0,0,243.4,114,52.0,6.06,12.2
3,0,84,0,0,0.0,2,299.4,71,57.0,3.1,6.6
4,0,75,0,0,0.0,3,166.7,113,41.0,7.42,10.1


In [4]:
churn_data['Churn'].value_counts(normalize = True)

0    0.855086
1    0.144914
Name: Churn, dtype: float64

In [5]:
X = churn_data[['AccountWeeks', 'ContractRenewal', 'CustServCalls', 'DayMins', 'MonthlyCharge']]
Y = churn_data['Churn']

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, stratify = Y)

# Random Forest

In [9]:
RF_md = RandomForestClassifier(n_estimators = 500, 
                               max_depth = 3).fit(X_train, Y_train)

RF_pred = RF_md.predict_proba(X_test)[:, 1]

## Computing the ROC-curve
fpr, tpr, thresholds = roc_curve(Y_test, RF_pred)

## Finding the optimal threshold
RF_cutoff = pd.DataFrame({'False_Positive': fpr,
                          'True_Positive': tpr,
                          'Cutoff': thresholds})

RF_cutoff['True_Positive_minus_1'] = RF_cutoff['True_Positive'] - 1
RF_cutoff['Distance_to_perfect_model'] = np.sqrt(RF_cutoff['False_Positive']**2 + RF_cutoff['True_Positive_minus_1']**2)

## Sorting based on distance to perfect model 
RF_cutoff = RF_cutoff.sort_values(by = 'Distance_to_perfect_model').reset_index(drop = True)
RF_cutoff.head()

Unnamed: 0,False_Positive,True_Positive,Cutoff,True_Positive_minus_1,Distance_to_perfect_model
0,0.138596,0.886598,0.150428,-0.113402,0.179078
1,0.136842,0.865979,0.155983,-0.134021,0.191539
2,0.138596,0.865979,0.155069,-0.134021,0.192797
3,0.177193,0.896907,0.11676,-0.103093,0.205001
4,0.177193,0.886598,0.118149,-0.113402,0.210374


In [10]:
# Changing likelihoods to labels
RF_pred_label = np.where(RF_pred < 0.150428, 0, 1)

## Classification report
print(classification_report(Y_test, RF_pred_label))

              precision    recall  f1-score   support

           0       0.98      0.86      0.92       570
           1       0.52      0.89      0.66        97

    accuracy                           0.87       667
   macro avg       0.75      0.87      0.79       667
weighted avg       0.91      0.87      0.88       667



# Gradient Boosting

In [11]:
GB_md = GradientBoostingClassifier(n_estimators = 500, 
                                   max_depth = 3,
                                   learning_rate = 0.01).fit(X_train, Y_train)

GB_pred = GB_md.predict_proba(X_test)[:, 1]

## Computing the ROC-curve
fpr, tpr, thresholds = roc_curve(Y_test, GB_pred)

## Finding the optimal threshold
GB_cutoff = pd.DataFrame({'False_Positive': fpr,
                          'True_Positive': tpr,
                          'Cutoff': thresholds})

GB_cutoff['True_Positive_minus_1'] = GB_cutoff['True_Positive'] - 1
GB_cutoff['Distance_to_perfect_model'] = np.sqrt(GB_cutoff['False_Positive']**2 + GB_cutoff['True_Positive_minus_1']**2)

## Sorting based on distance to perfect model 
GB_cutoff = GB_cutoff.sort_values(by = 'Distance_to_perfect_model').reset_index(drop = True)
GB_cutoff.head()

Unnamed: 0,False_Positive,True_Positive,Cutoff,True_Positive_minus_1,Distance_to_perfect_model
0,0.098246,0.876289,0.230591,-0.123711,0.157977
1,0.087719,0.865979,0.247903,-0.134021,0.160176
2,0.115789,0.886598,0.116273,-0.113402,0.162072
3,0.096491,0.865979,0.235602,-0.134021,0.165143
4,0.110526,0.876289,0.132474,-0.123711,0.165893


In [12]:
# Changing likelihoods to labels
GB_pred_label = np.where(GB_pred < 0.230591, 0, 1)

## Classification report
print(classification_report(Y_test, GB_pred_label))

              precision    recall  f1-score   support

           0       0.98      0.90      0.94       570
           1       0.60      0.87      0.71        97

    accuracy                           0.90       667
   macro avg       0.79      0.88      0.82       667
weighted avg       0.92      0.90      0.91       667



In [None]:
# From the above results, I would use Gradient Boosting to predict Churn.