# Telco Customer Churn - Multilayer Perceptron Model & Random Forest Classification

In [None]:
import pandas as pd
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import OrdinalEncoder

In [None]:
Dados = pd.read_csv (r'../input/telco-customer-churn/WA_Fn-UseC_-Telco-Customer-Churn.csv', delimiter=',', encoding = "ISO-8859-1")   
Dados

# Data Evaluation, Preparation and Analysis

In [None]:
Dados.describe()

In [None]:
Dados.columns

In [None]:
columns = ['SeniorCitizen', 'gender', 'Partner', 'Dependents',
       'PhoneService', 'MultipleLines', 'InternetService',
       'OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport',
       'StreamingTV', 'StreamingMovies', 'Contract', 'PaperlessBilling',
       'PaymentMethod', 'Churn']
for i in columns :
    print(Dados.groupby(i).agg(Count = ('customerID','count')))

In [None]:
Dados['Churn_cat'] = (Dados['Churn'] == 'Yes').astype(int)

In [None]:
Dados['Churn_cat'].sum()

In [None]:
fig=plt.figure(figsize=(20,20))
j = 1
for i in columns :
  ax=fig.add_subplot(5,5,j)
  j = j + 1
  plt.figure(figsize=(2, 2))
  sns.barplot(x=i,y='Churn_cat',data=Dados,ci=None,ax=ax)
  # ax.set_title(i+" Distribution",color='DarkRed')
  # ax.set_yscale('log')
  fig.tight_layout()  
plt.show()

In [None]:
sns.barplot(x='PaymentMethod',y='Churn_cat',data=Dados,ci=None)

In [None]:
sns.histplot(Dados, x='tenure', bins = range(0,80,5), hue='Churn_cat')

In [None]:
sns.histplot(Dados, x='MonthlyCharges', bins = range(0,130,5), hue='Churn_cat')

In [None]:
# Categorizing important dichotomous variables
Dados['Partner_cat'] = (Dados['Partner'] == 'Yes').astype(int)
Dados['Dependents_cat'] = (Dados['Dependents'] == 'Yes').astype(int)
Dados['MultipleLines_cat'] = (Dados['MultipleLines'] == 'Yes').astype(int)
Dados['InternetService_cat'] = (Dados['InternetService'] == 'Fiber optic').astype(int)
Dados['NoOnlineSecurity_cat'] = (Dados['OnlineSecurity'] == 'No').astype(int)
Dados['OnlineBackup_cat'] = (Dados['OnlineBackup'] == 'Yes').astype(int)
Dados['NoDeviceProtection_cat'] = (Dados['DeviceProtection'] == 'No').astype(int)
Dados['NoTechSupport_cat'] = (Dados['TechSupport'] == 'No').astype(int)
Dados['MtoMContract_cat'] = (Dados['Contract'] == 'Month-to-month').astype(int)
Dados['PaperlessBilling_cat'] = (Dados['PaperlessBilling'] == 'Yes').astype(int)
Dados['PaymMethElectCheck_cat'] = (Dados['PaymentMethod'] == 'Electronic check').astype(int)

In [None]:
list(Dados.columns)

In [None]:
Dados.describe()

In [None]:
Dados2 = Dados.loc[:,['SeniorCitizen','tenure','MonthlyCharges','Churn_cat','Partner_cat',
                     'Dependents_cat','MultipleLines_cat','InternetService_cat','NoOnlineSecurity_cat',
                     'OnlineBackup_cat','NoDeviceProtection_cat','NoTechSupport_cat','MtoMContract_cat',
                     'PaperlessBilling_cat','PaymMethElectCheck_cat']]

In [None]:
plt.figure(figsize=(18, 8))
sns.heatmap(Dados2.corr(), vmin=-1, vmax=1, annot=True, cmap='vlag') # cmap='BrBG'
plt.title('Correlation Map', fontdict={'fontsize':12}, pad=12);

In [None]:
plt.figure(figsize=(10, 6))
sns.scatterplot(x='tenure', y='MonthlyCharges', data= Dados, hue='Churn_cat')

# Balancing dataset for modeling

In [None]:
# Balancing Dataset
Dados3Y = Dados2[Dados2['Churn_cat'] == 1]
Dados3X = Dados2[Dados2['Churn_cat'] == 0].sample(n=len(Dados3Y), random_state=1)

In [None]:
len(Dados3Y)

In [None]:
len(Dados3X)

In [None]:
Dados3 = pd.concat([Dados3X,Dados3Y])

In [None]:
Dados3

In [None]:
Dados3.describe()

# Preparing dependent / independent variables and splitting training / testing data

In [None]:
DadosX = Dados3.loc[:,['SeniorCitizen','tenure','MonthlyCharges','Partner_cat',
                     'Dependents_cat','MultipleLines_cat','InternetService_cat','NoOnlineSecurity_cat',
                     'OnlineBackup_cat','NoDeviceProtection_cat','NoTechSupport_cat','MtoMContract_cat',
                     'PaperlessBilling_cat','PaymMethElectCheck_cat']]

In [None]:
DadosY = Dados3['Churn_cat']

In [None]:
DadosX

In [None]:
DadosY

In [None]:
# Splitting data

X_train, X_test, Y_train, Y_test = train_test_split(DadosX, DadosY,random_state=101, test_size=0.3)

In [None]:
X_train.describe()

In [None]:
X_test.describe()

# Models Generation and Evaluation

In [None]:
# Multi MLP Generator - 1 Layer
training_accuracy = []
testing_accuracy = []
Layer1 = range(10,110,10)
LayersComb = len(Layer1)
Step=0
score=0

for i in Layer1 :
    mlp = MLPRegressor(hidden_layer_sizes=(i),activation="logistic" ,random_state=1, max_iter=2000).fit(X_train, Y_train)
    Y_pred_train = mlp.predict(X_train).round()
    training_accuracy.append(accuracy_score(Y_train, Y_pred_train))
    Y_pred_test = mlp.predict(X_test).round()
    acc_score = accuracy_score(Y_test,Y_pred_test)
    testing_accuracy.append(acc_score)
    Step = Step + 1
    if score < acc_score:
        score = acc_score
        best_Layer1 = i
        best_Step = Step
    
    print('Step ', Step, ' of ', LayersComb, ' Layer1: ', i,' Accuracy', acc_score.round(4))    
    print('Best Accuracy Score', score.round(4), ' Best Layer1: ', best_Layer1, ' do Step:', best_Step)

In [None]:
sns.lineplot(data=[training_accuracy,testing_accuracy])

In [None]:
# Selected Model - 1 Layer
mlp_selected = MLPRegressor(hidden_layer_sizes=(40),activation="logistic" ,random_state=1, max_iter=2000)
mlp_selected.fit(X_test, Y_test)

In [None]:
Y_pred_train = mlp_selected.predict(X_train).round()
training_accuracy.append(accuracy_score(Y_train, Y_pred_train))
Y_pred_test = mlp_selected.predict(X_test).round()
acc_score = accuracy_score(Y_test,Y_pred_test)
testing_accuracy.append(acc_score)

In [None]:
# Confusion Matrix for Selected Model - 1 Layer
cm=confusion_matrix(Y_test,Y_pred_test)
conf_matrix=pd.DataFrame(data=cm,columns=['Predicted:0','Predicted:1'],index=['Actual:0','Actual:1'])
plt.figure(figsize = (8,5))
sns.heatmap(conf_matrix, annot=True,fmt='d',cmap="YlGnBu");

In [None]:
# 1 Layer
print(classification_report(Y_test,Y_pred_test)) 

In [None]:
# Multi MLP Generator
training_accuracy = []
testing_accuracy = []
Layer1 = range(10,90,20)
Layer2 = range(10,90,20)
LayersComb = len(Layer1)*len(Layer2)
Step=0
score=0

for i in Layer1 :
  for j in Layer2 :
    mlp = MLPRegressor(hidden_layer_sizes=(i,j),activation="logistic" ,random_state=1, max_iter=2000).fit(X_train, Y_train)
    Y_pred_train = mlp.predict(X_train).round()
    training_accuracy.append(accuracy_score(Y_train, Y_pred_train))
    Y_pred_test = mlp.predict(X_test).round()
    acc_score = accuracy_score(Y_test,Y_pred_test)
    testing_accuracy.append(acc_score)
    Step = Step + 1
    if score < acc_score:
        score = acc_score
        best_Layer1 = i
        best_Layer2 = j
        best_Step = Step
    
    print('Step ', Step, ' of ', LayersComb, ' Layer1: ', i, ' Layer2: ', j, ' Accuracy', acc_score.round(4))    
    print('Best Accuracy Score', score.round(4), ' Best Layer1: ', best_Layer1, ' Best Layer2: ', best_Layer2, ' do Step:', best_Step)

In [None]:
# 2 Layers
sns.lineplot(data=[training_accuracy,testing_accuracy])

In [None]:
# Selected Model - 2 Layers
mlp_selected = MLPRegressor(hidden_layer_sizes=(30,30),activation="logistic" ,random_state=1, max_iter=2000)
mlp_selected.fit(X_test, Y_test)

In [None]:
Y_pred_train = mlp_selected.predict(X_train).round()
training_accuracy.append(accuracy_score(Y_train, Y_pred_train))
Y_pred_test = mlp_selected.predict(X_test).round()
acc_score = accuracy_score(Y_test,Y_pred_test)
testing_accuracy.append(acc_score)

In [None]:
# Confusion Matrix for Selected Model - 2 Layers
cm=confusion_matrix(Y_test,Y_pred_test)
conf_matrix=pd.DataFrame(data=cm,columns=['Predicted:0','Predicted:1'],index=['Actual:0','Actual:1'])
plt.figure(figsize = (8,5))
sns.heatmap(conf_matrix, annot=True,fmt='d',cmap="YlGnBu");

In [None]:
# 2 Layers
print(classification_report(Y_test,Y_pred_test))

# Random Forest Classifier

In [None]:
from sklearn.ensemble import RandomForestClassifier

In [None]:
rfc = RandomForestClassifier(criterion = 'entropy', random_state = 42)
rfc.fit(X_train,Y_train)

In [None]:
Y_pred_train = rfc.predict(X_train).round()
training_accuracy.append(accuracy_score(Y_train, Y_pred_train))
Y_pred_test = rfc.predict(X_test).round()
acc_score = accuracy_score(Y_test,Y_pred_test)
testing_accuracy.append(acc_score)

In [None]:
# Confusion Matrix for Selected Model - Random Forest Classifier
cm=confusion_matrix(Y_test,Y_pred_test)
conf_matrix=pd.DataFrame(data=cm,columns=['Predicted:0','Predicted:1'],index=['Actual:0','Actual:1'])
plt.figure(figsize = (8,5))
sns.heatmap(conf_matrix, annot=True,fmt='d',cmap="YlGnBu");

In [None]:
# Random Forest Classifier
print(classification_report(Y_test,Y_pred_test))

In [None]:
rfc = RandomForestClassifier(criterion = 'gini', random_state = 42)
rfc.fit(X_train,Y_train)

In [None]:
Y_pred_train = rfc.predict(X_train).round()
training_accuracy.append(accuracy_score(Y_train, Y_pred_train))
Y_pred_test = rfc.predict(X_test).round()
acc_score = accuracy_score(Y_test,Y_pred_test)
testing_accuracy.append(acc_score)

In [None]:
# Confusion Matrix for Selected Model - Random Forest Classifier
cm=confusion_matrix(Y_test,Y_pred_test)
conf_matrix=pd.DataFrame(data=cm,columns=['Predicted:0','Predicted:1'],index=['Actual:0','Actual:1'])
plt.figure(figsize = (8,5))
sns.heatmap(conf_matrix, annot=True,fmt='d',cmap="YlGnBu");

In [None]:
# Random Forest Classifier
print(classification_report(Y_test,Y_pred_test))