### Import Data Preprocesing Modules

In [2]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split

### Reading the Data

In [3]:
df = pd.read_csv(r"./churn.csv")
df.head()

Unnamed: 0,Customer ID,Gender,Senior Citizen,Partner,Dependents,tenure,Phone Service,Multiple Lines,Internet Service,Online Security,...,Device Protection,Tech Support,Streaming TV,Streaming Movies,Contract,Paperless Billing,Payment Method,Monthly Charges,Total Charges,Churn
0,7590-VHVEA,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
2,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
3,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
4,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7044 entries, 0 to 7043
Data columns (total 21 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Customer ID        7044 non-null   object 
 1   Gender             7044 non-null   object 
 2   Senior Citizen     7044 non-null   int64  
 3   Partner            7044 non-null   object 
 4   Dependents         7044 non-null   object 
 5   tenure             7044 non-null   int64  
 6   Phone Service      7044 non-null   object 
 7   Multiple Lines     7044 non-null   object 
 8   Internet Service   7044 non-null   object 
 9   Online Security    7044 non-null   object 
 10  Online Backup      7044 non-null   object 
 11  Device Protection  7044 non-null   object 
 12  Tech Support       7044 non-null   object 
 13  Streaming TV       7044 non-null   object 
 14  Streaming Movies   7044 non-null   object 
 15  Contract           7044 non-null   object 
 16  Paperless Billing  7044 

In [7]:
df.isna().sum()

Customer ID          0
Gender               0
Senior Citizen       0
Partner              0
Dependents           0
tenure               0
Phone Service        0
Multiple Lines       0
Internet Service     0
Online Security      0
Online Backup        0
Device Protection    0
Tech Support         0
Streaming TV         0
Streaming Movies     0
Contract             0
Paperless Billing    0
Payment Method       0
Monthly Charges      0
Total Charges        0
Churn                0
dtype: int64

### Data Preprocessing 

In [8]:
X = pd.get_dummies(df.drop(['Churn' , 'Customer ID'] , axis = 1))

In [9]:
X.head()

Unnamed: 0,Senior Citizen,tenure,Monthly Charges,Gender_Female,Gender_Male,Partner_No,Partner_Yes,Dependents_No,Dependents_Yes,Phone Service_No,...,Total Charges_995.35,Total Charges_996.45,Total Charges_996.85,Total Charges_996.95,Total Charges_997.65,Total Charges_997.75,Total Charges_998.1,Total Charges_999.45,Total Charges_999.8,Total Charges_999.9
0,0,1,29.85,1,0,0,1,1,0,1,...,0,0,0,0,0,0,0,0,0,0
1,0,1,29.85,1,0,0,1,1,0,1,...,0,0,0,0,0,0,0,0,0,0
2,0,34,56.95,0,1,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,2,53.85,0,1,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,45,42.3,0,1,1,0,1,0,1,...,0,0,0,0,0,0,0,0,0,0


In [10]:
y = df['Churn'].apply(lambda x : 1 if x=="Yes" else 0)

In [11]:
y.head()

0    0
1    0
2    0
3    1
4    0
Name: Churn, dtype: int64

In [12]:
TRAIN_TEST_RATIO = 0.8
X_train,X_test,y_train,y_test = train_test_split(X , y , train_size= TRAIN_TEST_RATIO)

In [13]:
X_train.head()

Unnamed: 0,Senior Citizen,tenure,Monthly Charges,Gender_Female,Gender_Male,Partner_No,Partner_Yes,Dependents_No,Dependents_Yes,Phone Service_No,...,Total Charges_995.35,Total Charges_996.45,Total Charges_996.85,Total Charges_996.95,Total Charges_997.65,Total Charges_997.75,Total Charges_998.1,Total Charges_999.45,Total Charges_999.8,Total Charges_999.9
496,0,22,43.75,0,1,1,0,1,0,1,...,0,0,0,0,0,0,0,0,0,0
6997,0,41,66.5,1,0,0,1,0,1,0,...,0,0,0,0,0,0,0,0,0,0
3312,0,48,44.8,1,0,1,0,1,0,1,...,0,0,0,0,0,0,0,0,0,0
2751,0,3,19.45,0,1,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
6038,1,63,109.4,0,1,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0


In [14]:
y_train.head()

496     1
6997    1
3312    0
2751    0
6038    0
Name: Churn, dtype: int64

In [16]:
from tensorflow.keras.models import Sequential , load_model
from tensorflow.keras.layers import Dense
from sklearn.metrics import accuracy_score , mean_squared_error , mean_absolute_error , precision_recall_fscore_support

In [23]:
model = Sequential()


In [24]:
# includes 32 neurons with "relu" as the activation function , input size will be the number of columns in the training dataset
layer1 = Dense(units = 32 , activation = "relu" , input_dim = len(X_train.columns))
model.add(layer1)

In [25]:
# hidden layer with 64 neurons
layer2= Dense(units = 64 , activation = "relu")
model.add(layer2)

In [26]:
# output layer with only one neuron and "sigmoid" activation function so as to convert the decimals into 0 or 1
layer3 = Dense(units = 1 , activation = "sigmoid")
model.add(layer3)

In [29]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_3 (Dense)             (None, 32)                210432    
                                                                 
 dense_4 (Dense)             (None, 64)                2112      
                                                                 
 dense_5 (Dense)             (None, 1)                 65        
                                                                 
Total params: 212,609
Trainable params: 212,609
Non-trainable params: 0
_________________________________________________________________


In [30]:
model.layers

[<keras.layers.core.dense.Dense at 0x28b53e1d0>,
 <keras.layers.core.dense.Dense at 0x28c88b0a0>,
 <keras.layers.core.dense.Dense at 0x28c88ae90>]

In [31]:
model.compile(loss = "binary_crossentropy" , optimizer= 'sgd' , metrics = 'accuracy')

In [34]:
model.fit(X_train , y_train , epochs = 200 , batch_size = 32)

Epoch 1/200


2023-02-20 01:24:17.019442: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2023-02-20 01:24:17.178137: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78/200
Epoch 7

<keras.callbacks.History at 0x28dbe7c70>

In [48]:
y_pred = model.predict(X_test)




2023-02-20 01:32:43.079198: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


In [37]:
y_pred[0:5]

array([[0.34674653],
       [0.19676842],
       [0.01725991],
       [0.01380257],
       [0.3682982 ]], dtype=float32)

In [38]:
y_pred = [0 if val < 0.5 else 1 for val in y_pred]

In [39]:
y_pred[0:5]

[0, 0, 0, 0, 0]

In [41]:
round(accuracy_score(y_test , y_pred),2)

0.78

In [42]:
mean_squared_error(y_test,y_pred)

0.21859474804826118

In [43]:
mean_absolute_error(y_test , y_pred)

0.21859474804826118

In [44]:
model.save('churn_model')

INFO:tensorflow:Assets written to: churn_model/assets


In [45]:
del model

In [47]:
model = load_model(r"churn_model")