In [191]:
random_state = 456

import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
import math

from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
from tensorflow import keras
from sklearn.metrics import confusion_matrix, classification_report

# Dataset

In [2]:
pd.set_option('display.max_column',None)

In [4]:
# loading dataset
df = pd.read_csv('./datasets/Telco_Customer_Churn.csv')
df.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,Yes,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,No,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,Yes,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,No,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,No,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


In [5]:
# checking for null values
df.isnull().sum().sum()

0

In [6]:
# info
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7043 entries, 0 to 7042
Data columns (total 21 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   customerID        7043 non-null   object 
 1   gender            7043 non-null   object 
 2   SeniorCitizen     7043 non-null   int64  
 3   Partner           7043 non-null   object 
 4   Dependents        7043 non-null   object 
 5   tenure            7043 non-null   int64  
 6   PhoneService      7043 non-null   object 
 7   MultipleLines     7043 non-null   object 
 8   InternetService   7043 non-null   object 
 9   OnlineSecurity    7043 non-null   object 
 10  OnlineBackup      7043 non-null   object 
 11  DeviceProtection  7043 non-null   object 
 12  TechSupport       7043 non-null   object 
 13  StreamingTV       7043 non-null   object 
 14  StreamingMovies   7043 non-null   object 
 15  Contract          7043 non-null   object 
 16  PaperlessBilling  7043 non-null   object 


In [7]:
# droping unwanted columns
df2 = df.drop('customerID', axis = 1)

# dropping records not having TotalCharges
df2 = df2.drop(df2[df2.TotalCharges == ' '].index)

df2 = df2.reset_index().drop('index', axis = 1)
df2.head()

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,Female,0,Yes,No,1,No,No phone service,DSL,No,Yes,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,Male,0,No,No,34,Yes,No,DSL,Yes,No,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,Male,0,No,No,2,Yes,No,DSL,Yes,Yes,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,Male,0,No,No,45,No,No phone service,DSL,Yes,No,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,Female,0,No,No,2,Yes,No,Fiber optic,No,No,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


In [9]:
obj_cols = df2.select_dtypes('O').columns

binary_cols = []
multiclass_cols = []

for i in obj_cols:
    if len(df[i].unique())>2:
        multiclass_cols.append(i)
    else:
        binary_cols.append(i)

# converting catagorical variables to numerical
gender_map = {'Female':0, 'Male':1}
yes_no_map = {'Yes':1,'No':0}
MultipleLines_map = {'Yes':1,'No':0,'No phone service': -1}
yes_no_n_internetservice_map = {'Yes':1,'No':0,'No internet service': -1}
contract_map = {'Month-to-month':0,'One year':1,'Two year':2}

df3 = df2.copy()

df3.gender = df2.gender.map(gender_map)
df3.MultipleLines = df2.MultipleLines.map(MultipleLines_map)
df3.Contract = df2.Contract.map(contract_map)
df3.TotalCharges  = [float(i) for i in df2.TotalCharges]

for i in binary_cols[1:]:
    df3[i] = df2[i].map(yes_no_map)

for i in multiclass_cols[2:8]:
    df3[i] = df2[i].map(yes_no_n_internetservice_map)

In [10]:
cols = ['InternetService_FiberOptic','InternetService_No','PaymentMethod_CreditCard','PaymentMethod_ElectronicCheck','PaymentMethod_MailedCheck']

df4 = df3.drop(['InternetService','PaymentMethod'],axis = 1)

ohe = OneHotEncoder(sparse=False, drop='first')
dummy_df = pd.DataFrame(ohe.fit_transform(df3[['InternetService','PaymentMethod']]),columns=cols)
df4 = df4.join(dummy_df)
df4.head()

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,MonthlyCharges,TotalCharges,Churn,InternetService_FiberOptic,InternetService_No,PaymentMethod_CreditCard,PaymentMethod_ElectronicCheck,PaymentMethod_MailedCheck
0,0,0,1,0,1,0,-1,0,1,0,0,0,0,0,1,29.85,29.85,0,0.0,0.0,0.0,1.0,0.0
1,1,0,0,0,34,1,0,1,0,1,0,0,0,1,0,56.95,1889.5,0,0.0,0.0,0.0,0.0,1.0
2,1,0,0,0,2,1,0,1,1,0,0,0,0,0,1,53.85,108.15,1,0.0,0.0,0.0,0.0,1.0
3,1,0,0,0,45,0,-1,1,0,1,1,0,0,1,0,42.3,1840.75,0,0.0,0.0,0.0,0.0,0.0
4,0,0,0,0,2,1,0,0,0,0,0,0,0,0,1,70.7,151.65,1,1.0,0.0,0.0,1.0,0.0


In [13]:
df4.Churn.value_counts()

0    5163
1    1869
Name: Churn, dtype: int64

In [15]:
#  We have thrice the people staying than those who are leaving
# The dataset is imbalanced
5163/1869

2.7624398073836276

# Model

In [37]:
def ANN(X_train,y_train,epochs,batch_size,tb_callback):
    model = keras.Sequential([
        keras.layers.Dense(22,input_shape = (22,),activation = 'relu'),
        keras.layers.Dropout(0.5),
        keras.layers.Dense(12,input_shape = (22,),activation = 'relu'),
        keras.layers.Dropout(0.5),
        keras.layers.Dense(22,input_shape = (10,),activation = 'relu'),
        keras.layers.Dropout(0.5),
        keras.layers.Dense(22,input_shape = (10,),activation = 'relu'),
        keras.layers.Dense(1,activation = 'sigmoid')
    ])

    model.compile(
        optimizer='adam',
        loss = 'binary_crossentropy',
        metrics = ['accuracy']
    )
    
    model.fit(X_train,y_train,epochs=epochs,batch_size = batch_size,callbacks=tb_callback)
    
    return model

# Training without resolving imbalance

In [20]:
# train test split

X = df4.drop('Churn', axis = 1)
y = df4.Churn
X_train, X_test, y_train, y_test = train_test_split(X,y,train_size=0.8,random_state=random_state)

# Normalization

scaler = MinMaxScaler()
X_train_scaled = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns)
X_test_scaled = pd.DataFrame(scaler.transform(X_test), columns=X_train.columns)

In [39]:
# training model
tb_callback = keras.callbacks.TensorBoard(log_dir = './logs/customer_churn_unbalanced')

model1 = ANN(X_train_scaled,y_train,epochs=100,batch_size=1000,tb_callback=tb_callback)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [41]:
y_pred = [1 if i>0.5 else 0 for i in model1.predict(X_test_scaled).reshape((1407,))]
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.86      0.87      0.86      1032
           1       0.63      0.61      0.62       375

    accuracy                           0.80      1407
   macro avg       0.74      0.74      0.74      1407
weighted avg       0.80      0.80      0.80      1407



- accuracy = 0.80
- f1_churn_0 = 0.86
- f1_churn_1 = 0.62 -- matters most, we need to increase its f1 score

# Undersampling Majority class

In [60]:
df4.Churn.value_counts()

0    5163
1    1869
Name: Churn, dtype: int64

In [61]:
df4_churn_0 = df4[df4.Churn == 0].sample(1869) # undersampling randomly
df4_churn_1 = df4[df4.Churn == 1]

In [62]:
df4_churn_0.shape, df4_churn_1.shape

((1869, 23), (1869, 23))

In [63]:
df5 = df4_churn_0.append(df4_churn_1)
df5.shape

  df5 = df4_churn_0.append(df4_churn_1)


(3738, 23)

In [64]:
# train test split

X = df5.drop('Churn', axis = 1)
y = df5.Churn
X_train, X_test, y_train, y_test = train_test_split(X,y,train_size=0.8,random_state=random_state)

# Normalization

scaler = MinMaxScaler()
X_train_scaled = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns)
X_test_scaled = pd.DataFrame(scaler.transform(X_test), columns=X_train.columns)

In [65]:
# training model
tb_callback = keras.callbacks.TensorBoard(log_dir = './logs/customer_churn_undersampling')

model2 = ANN(X_train_scaled,y_train,epochs=100,batch_size=1000,tb_callback=tb_callback)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [66]:
y_pred = [1 if i>0.5 else 0 for i in model2.predict(X_test_scaled).reshape((748,))]
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.79      0.69      0.74       392
           1       0.70      0.80      0.75       356

    accuracy                           0.74       748
   macro avg       0.75      0.74      0.74       748
weighted avg       0.75      0.74      0.74       748



- accuracy = 0.74
- f1_churn_0 = 0.78
- f1_churn_1 = 0.75 -- increase in f1 score by 0.13 in expense of class 0

# Oversampling minority class

In [67]:
df4.Churn.value_counts()

0    5163
1    1869
Name: Churn, dtype: int64

In [68]:
df4_churn_0 = df4[df4.Churn == 0]
df4_churn_1 = df4[df4.Churn == 1].sample(5163, replace=True)  # Oversampling randomly

In [69]:
df4_churn_0.shape, df4_churn_1.shape

((5163, 23), (5163, 23))

In [70]:
df5 = df4_churn_0.append(df4_churn_1)
df5.shape

  df5 = df4_churn_0.append(df4_churn_1)


(10326, 23)

In [71]:
# train test split

X = df5.drop('Churn', axis = 1)
y = df5.Churn
X_train, X_test, y_train, y_test = train_test_split(X,y,train_size=0.8,random_state=random_state)

# Normalization

scaler = MinMaxScaler()
X_train_scaled = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns)
X_test_scaled = pd.DataFrame(scaler.transform(X_test), columns=X_train.columns)

In [72]:
# training model
tb_callback = keras.callbacks.TensorBoard(log_dir = './logs/customer_churn_oversampling')

model3 = ANN(X_train_scaled,y_train,epochs=100,batch_size=1000,tb_callback=tb_callback)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [74]:
y_pred = [1 if i>0.5 else 0 for i in model3.predict(X_test_scaled).reshape((2066,))]
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.78      0.76      0.77      1062
           1       0.75      0.77      0.76      1004

    accuracy                           0.77      2066
   macro avg       0.77      0.77      0.77      2066
weighted avg       0.77      0.77      0.77      2066



- accuracy = 0.77
- f1_churn_0 = 0.77
- f1_churn_1 = 0.76 -- both accuracy and f1 score of class 1 increased

# Oversampling : SMOTE

In [93]:
# importing SMOTE

from imblearn.over_sampling import SMOTE

In [94]:
df4.Churn.value_counts()

0    5163
1    1869
Name: Churn, dtype: int64

In [95]:
X = df4.drop('Churn', axis = 1)
y = df4.Churn

sampling = SMOTE(sampling_strategy='minority')
X_sampled, y_sampled = sampling.fit_sample(X,y)

In [96]:
y_sampled.value_counts()

0    5163
1    5163
Name: Churn, dtype: int64

In [97]:
# train test split

X_train, X_test, y_train, y_test = train_test_split(X_sampled,y_sampled,train_size=0.8,random_state=random_state)

# Normalization

scaler = MinMaxScaler()
X_train_scaled = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns)
X_test_scaled = pd.DataFrame(scaler.transform(X_test), columns=X_train.columns)

In [98]:
# training model
tb_callback = keras.callbacks.TensorBoard(log_dir = './logs/customer_churn_SMOTE')

model4 = ANN(X_train_scaled,y_train,epochs=100,batch_size=1000,tb_callback=tb_callback)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [83]:
y_pred = [1 if i>0.5 else 0 for i in model4.predict(X_test_scaled).reshape((2066,))]
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.81      0.79      0.80      1024
           1       0.80      0.82      0.81      1042

    accuracy                           0.81      2066
   macro avg       0.81      0.81      0.81      2066
weighted avg       0.81      0.81      0.81      2066



- accuracy = 0.81
- f1_churn_0 = 0.80
- f1_churn_1 = 0.81 -- everyone's f1 score increased

# Ensemble Technique

In [180]:
# Normalization

scaler = MinMaxScaler()
df5 = pd.DataFrame(scaler.fit_transform(df4),columns=df4.columns)
df5.head()

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,MonthlyCharges,TotalCharges,Churn,InternetService_FiberOptic,InternetService_No,PaymentMethod_CreditCard,PaymentMethod_ElectronicCheck,PaymentMethod_MailedCheck
0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.5,1.0,0.5,0.5,0.5,0.5,0.0,1.0,0.115423,0.001275,0.0,0.0,0.0,0.0,1.0,0.0
1,1.0,0.0,0.0,0.0,0.464789,1.0,0.5,1.0,0.5,1.0,0.5,0.5,0.5,0.5,0.0,0.385075,0.215867,0.0,0.0,0.0,0.0,0.0,1.0
2,1.0,0.0,0.0,0.0,0.014085,1.0,0.5,1.0,1.0,0.5,0.5,0.5,0.5,0.0,1.0,0.354229,0.01031,1.0,0.0,0.0,0.0,0.0,1.0
3,1.0,0.0,0.0,0.0,0.619718,0.0,0.0,1.0,0.5,1.0,1.0,0.5,0.5,0.5,0.0,0.239303,0.210241,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.014085,1.0,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.0,1.0,0.521891,0.01533,1.0,1.0,0.0,0.0,1.0,0.0


In [181]:
X = df5.drop('Churn', axis = 1)
y = df5.Churn

# train test split
X_train, X_test, y_train, y_test = train_test_split(X,y,train_size=0.8,random_state=random_state)

In [186]:
df6 = X_train.join(y_train)
df6.head()

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,MonthlyCharges,TotalCharges,InternetService_FiberOptic,InternetService_No,PaymentMethod_CreditCard,PaymentMethod_ElectronicCheck,PaymentMethod_MailedCheck,Churn
3576,0.0,0.0,0.0,0.0,0.028169,1.0,0.5,0.5,0.5,0.5,0.5,1.0,0.5,0.0,1.0,0.619403,0.024642,1.0,0.0,0.0,1.0,0.0,0.0
2646,1.0,0.0,0.0,0.0,0.323944,1.0,0.5,0.5,0.5,0.5,0.5,1.0,1.0,0.0,1.0,0.717413,0.256139,1.0,0.0,0.0,1.0,0.0,1.0
3032,1.0,0.0,0.0,0.0,0.43662,1.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.020896,0.079471,0.0,1.0,1.0,0.0,0.0,0.0
5595,1.0,0.0,0.0,0.0,0.225352,1.0,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.0,1.0,0.522886,0.137111,1.0,0.0,0.0,1.0,0.0,0.0
3087,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.5,0.5,0.5,0.5,0.5,0.0,1.0,0.068159,0.000727,0.0,0.0,0.0,0.0,1.0,1.0


In [187]:
df6.Churn.value_counts()

0.0    4131
1.0    1494
Name: Churn, dtype: int64

In [188]:
# we can make 3 batches here
4131/1494

2.7650602409638556

In [195]:
def train_model(majority_df,minority_df,target_variable):
    models = []
    batches = majority_df.shape[0]//minority_df.shape[0]
    
    if minority_df.shape[0]*batches < majority_df.shape[0]:
        batches += 1
    
    for i in range(batches):
        df = minority_df.append(majority_df.sample(minority_df.shape[0]))
        model = ANN(df.drop(target_variable,axis = 1),df[target_variable],epochs = 100,batch_size=1000,tb_callback=None)
        models.append(model)
    return models

In [196]:
df6_majority = df6[df6.Churn == 0]
df6_minority = df6[df6.Churn == 1]

models = train_model(df6_majority,df6_minority,'Churn')

  df = minority_df.append(majority_df.sample(minority_df.shape[0]))


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
Epoch 1/100


  df = minority_df.append(majority_df.sample(minority_df.shape[0]))


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 7

Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
Epoch 1/100


  df = minority_df.append(majority_df.sample(minority_df.shape[0]))


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 7

Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [198]:
X_test.shape

(1407, 22)

In [202]:
temp = np.zeros(X_test.shape[0], dtype = 'int64')
for i in models:
    temp += np.array([1 if j>=0.5 else 0 for j in i.predict(X_test).reshape(X_test.shape[0],)])

# considering majority vote
y_pred = [1 if i>1 else 0 for i in temp]

print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

         0.0       0.91      0.74      0.82      1032
         1.0       0.53      0.80      0.63       375

    accuracy                           0.75      1407
   macro avg       0.72      0.77      0.72      1407
weighted avg       0.81      0.75      0.77      1407

