# Loading Libraries

In [1]:
import pandas as pd
import numpy as np

from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import roc_curve, classification_report

# Reading the data

In [2]:
churn_data = pd.read_csv('telecom_churn.csv')
churn_data.head()

Unnamed: 0,Churn,AccountWeeks,ContractRenewal,DataPlan,DataUsage,CustServCalls,DayMins,DayCalls,MonthlyCharge,OverageFee,RoamMins
0,0,128,1,1,2.7,1,265.1,110,89.0,9.87,10.0
1,0,107,1,1,3.7,1,161.6,123,82.0,9.78,13.7
2,0,137,1,0,0.0,0,243.4,114,52.0,6.06,12.2
3,0,84,0,0,0.0,2,299.4,71,57.0,3.1,6.6
4,0,75,0,0,0.0,3,166.7,113,41.0,7.42,10.1


# Defining X and Y 

In [3]:
X = churn_data[['AccountWeeks', 'ContractRenewal', 'CustServCalls', 'DayMins', 'MonthlyCharge']]
Y = churn_data['Churn']

# Splitting the data

In [4]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, stratify = Y)

# SMOTE

In [5]:
X_SMOTE, Y_SMOTE = SMOTE().fit_resample(X_train, Y_train)

In [6]:
Y_SMOTE.value_counts(normalize = True)

1    0.5
0    0.5
Name: Churn, dtype: float64

# SVM

In [8]:
svm_md = SVC(C = 0.01, kernel = 'rbf', probability = True).fit(X_SMOTE, Y_SMOTE)

svm_pred = svm_md.predict_proba(X_test)[:, 1]

## Computing the ROC-curve
fpr, tpr, thresholds = roc_curve(Y_test, svm_pred)

## Finding the optimal threshold
svm_cutoff = pd.DataFrame({'False_Positive': fpr,
                           'True_Positive': tpr,
                           'Cutoff': thresholds})

svm_cutoff['True_Positive_minus_1'] = svm_cutoff['True_Positive'] - 1
svm_cutoff['Distance_to_perfect_model'] = np.sqrt(svm_cutoff['False_Positive']**2 + svm_cutoff['True_Positive_minus_1']**2)

## Sorting based on distance to perfect model 
svm_cutoff = svm_cutoff.sort_values(by = 'Distance_to_perfect_model').reset_index(drop = True)
svm_cutoff.head()

Unnamed: 0,False_Positive,True_Positive,Cutoff,True_Positive_minus_1,Distance_to_perfect_model
0,0.2,0.56701,0.542995,-0.43299,0.476949
1,0.184211,0.556701,0.550674,-0.443299,0.480049
2,0.173684,0.546392,0.556568,-0.453608,0.485723
3,0.2,0.556701,0.544665,-0.443299,0.486327
4,0.15614,0.536082,0.566286,-0.463918,0.489489


In [9]:
# Changing likelihoods to labels
svm_label = np.where(svm_pred < 0.542995, 0, 1)

# Computing the classification_report
print(classification_report(Y_test, svm_label))

              precision    recall  f1-score   support

           0       0.91      0.80      0.85       570
           1       0.32      0.56      0.41        97

    accuracy                           0.76       667
   macro avg       0.62      0.68      0.63       667
weighted avg       0.83      0.76      0.79       667



# MLP

In [13]:
mlp_md = MLPClassifier(hidden_layer_sizes = (8, 16, 16, 8), 
                       activation = 'logistic',
                       max_iter = 1000).fit(X_SMOTE, Y_SMOTE)

mlp_pred = mlp_md.predict_proba(X_test)[:, 1]

## Computing the ROC-curve
fpr, tpr, thresholds = roc_curve(Y_test, mlp_pred)

## Finding the optimal threshold
mlp_cutoff = pd.DataFrame({'False_Positive': fpr,
                           'True_Positive': tpr,
                           'Cutoff': thresholds})

mlp_cutoff['True_Positive_minus_1'] = mlp_cutoff['True_Positive'] - 1
mlp_cutoff['Distance_to_perfect_model'] = np.sqrt(mlp_cutoff['False_Positive']**2 + mlp_cutoff['True_Positive_minus_1']**2)

## Sorting based on distance to perfect model 
mlp_cutoff = mlp_cutoff.sort_values(by = 'Distance_to_perfect_model').reset_index(drop = True)
mlp_cutoff.head()

Unnamed: 0,False_Positive,True_Positive,Cutoff,True_Positive_minus_1,Distance_to_perfect_model
0,0.136842,0.835052,0.637269,-0.164948,0.214322
1,0.159649,0.85567,0.586778,-0.14433,0.215218
2,0.12807,0.824742,0.670881,-0.175258,0.217065
3,0.152632,0.845361,0.608696,-0.154639,0.217278
4,0.159649,0.845361,0.591877,-0.154639,0.222264


In [14]:
# Changing likelihoods to labels
mlp_label = np.where(mlp_pred < 0.637269, 0, 1)

# Computing the classification_report
print(classification_report(Y_test, mlp_label))

              precision    recall  f1-score   support

           0       0.97      0.86      0.91       570
           1       0.51      0.84      0.63        97

    accuracy                           0.86       667
   macro avg       0.74      0.85      0.77       667
weighted avg       0.90      0.86      0.87       667



In [None]:
# From the above results, I would use the MLPClassifier model to predict Churn.