In [56]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
import pickle

In [57]:
df = pd.read_csv('Churn_Modelling.csv')
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [58]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 14 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   RowNumber        10000 non-null  int64  
 1   CustomerId       10000 non-null  int64  
 2   Surname          10000 non-null  object 
 3   CreditScore      10000 non-null  int64  
 4   Geography        10000 non-null  object 
 5   Gender           10000 non-null  object 
 6   Age              10000 non-null  int64  
 7   Tenure           10000 non-null  int64  
 8   Balance          10000 non-null  float64
 9   NumOfProducts    10000 non-null  int64  
 10  HasCrCard        10000 non-null  int64  
 11  IsActiveMember   10000 non-null  int64  
 12  EstimatedSalary  10000 non-null  float64
 13  Exited           10000 non-null  int64  
dtypes: float64(2), int64(9), object(3)
memory usage: 1.1+ MB


In [59]:
df = df.drop(['RowNumber', 'CustomerId', 'Surname'], axis=1)
df.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [60]:
df.isnull().sum()

CreditScore        0
Geography          0
Gender             0
Age                0
Tenure             0
Balance            0
NumOfProducts      0
HasCrCard          0
IsActiveMember     0
EstimatedSalary    0
Exited             0
dtype: int64

In [61]:
df.duplicated().sum()

0

In [62]:
X = df.drop('Exited', axis=1)
y = df['Exited']

In [63]:
numerical_features = [col for col in X.columns if df[col].dtype!= 'O' ]
categorical_features = [col for col in X.columns if df[col].dtype== 'O' ]
numerical_features, categorical_features

(['CreditScore',
  'Age',
  'Tenure',
  'Balance',
  'NumOfProducts',
  'HasCrCard',
  'IsActiveMember',
  'EstimatedSalary'],
 ['Geography', 'Gender'])

In [None]:
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer

numerical_transformer = StandardScaler()
categorical_transformer = OneHotEncoder(drop='first', sparse_output=False)

preprocessor = ColumnTransformer(
    transformers=[  
        ('num', numerical_transformer, numerical_features),
        ('cat', categorical_transformer, categorical_features)
    ],
    remainder='passthrough'
)

In [65]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [91]:
X_train['Geography'].value_counts()

Geography
France     3994
Germany    2011
Spain      1995
Name: count, dtype: int64

In [67]:
X_test

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
6252,596,Germany,Male,32,3,96709.07,2,0,0,41788.37
4684,623,France,Male,43,1,0.00,2,1,1,146379.30
1731,601,Spain,Female,44,4,0.00,2,1,0,58561.31
4742,506,Germany,Male,59,8,119152.10,2,1,1,170679.74
4521,560,Spain,Female,27,7,124995.98,1,1,1,114669.79
...,...,...,...,...,...,...,...,...,...,...
6412,602,Germany,Female,53,5,98268.84,1,0,1,45038.29
8285,609,France,Male,25,10,0.00,1,0,1,109895.16
7853,730,France,Female,47,7,0.00,1,1,0,33373.26
1095,692,France,Male,29,4,0.00,1,1,0,76755.99


In [68]:
X_train_processed = preprocessor.fit_transform(X_train)
X_test_processed = preprocessor.transform(X_test)

In [69]:
X_train_processed

array([[ 0.35649971, -0.6557859 ,  0.34567966, ...,  0.        ,
         0.        ,  1.        ],
       [-0.20389777,  0.29493847, -0.3483691 , ...,  1.        ,
         0.        ,  1.        ],
       [-0.96147213, -1.41636539, -0.69539349, ...,  0.        ,
         1.        ,  1.        ],
       ...,
       [ 0.86500853, -0.08535128, -1.38944225, ...,  0.        ,
         0.        ,  0.        ],
       [ 0.15932282,  0.3900109 ,  1.03972843, ...,  0.        ,
         0.        ,  1.        ],
       [ 0.47065475,  1.15059039, -1.38944225, ...,  1.        ,
         0.        ,  1.        ]])

In [70]:
X_test_processed

array([[-0.57749609, -0.6557859 , -0.69539349, ...,  1.        ,
         0.        ,  1.        ],
       [-0.29729735,  0.3900109 , -1.38944225, ...,  0.        ,
         0.        ,  1.        ],
       [-0.52560743,  0.48508334, -0.3483691 , ...,  0.        ,
         1.        ,  0.        ],
       ...,
       [ 0.81311987,  0.77030065,  0.69270405, ...,  0.        ,
         0.        ,  0.        ],
       [ 0.41876609, -0.94100321, -0.3483691 , ...,  0.        ,
         0.        ,  1.        ],
       [-0.24540869,  0.00972116, -1.38944225, ...,  1.        ,
         0.        ,  1.        ]])

In [71]:
import pickle

with open('preprocessor.pkl', 'wb') as f:
    pickle.dump(preprocessor, f)

In [72]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard
import datetime


In [73]:
pd.DataFrame(X_train)

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
9254,686,France,Male,32,6,0.00,2,1,1,179093.26
1561,632,Germany,Male,42,4,119624.60,2,1,1,195978.86
1670,559,Spain,Male,24,3,114739.92,1,1,0,85891.02
6087,561,France,Female,27,9,135637.00,1,1,0,153080.40
6669,517,France,Male,56,9,142147.32,1,0,0,39488.04
...,...,...,...,...,...,...,...,...,...,...
5734,768,France,Male,54,8,69712.74,1,1,1,69381.05
5191,682,France,Female,58,1,0.00,1,1,1,706.50
5390,735,France,Female,38,1,0.00,3,0,0,92220.12
860,667,France,Male,43,8,190227.46,1,1,0,97508.04


In [74]:
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train_processed.shape[1],)),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')
])

In [75]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_6 (Dense)             (None, 64)                768       
                                                                 
 dense_7 (Dense)             (None, 32)                2080      
                                                                 
 dense_8 (Dense)             (None, 1)                 33        
                                                                 
Total params: 2881 (11.25 KB)
Trainable params: 2881 (11.25 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [84]:
opt = tf.keras.optimizers.Adam(learning_rate=0.01)
loss = tf.keras.losses.BinaryCrossentropy()

model.compile(optimizer=opt, loss=loss, metrics=['accuracy'])

In [85]:
## Set up the Tensorboard
from tensorflow.keras.callbacks import EarlyStopping,TensorBoard

log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

In [86]:
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

In [87]:
history = model.fit(
    X_train_processed, y_train,validation_data=(X_test_processed, y_test),
    epochs=100, 
    callbacks= [tensorboard_callback, early_stopping]
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100


In [88]:
model.save('churn_model.h5')

  saving_api.save_model(


In [89]:
%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [90]:
%tensorboard --logdir logs/fit

Reusing TensorBoard on port 6011 (pid 16620), started 0:02:57 ago. (Use '!kill 16620' to kill it.)