In [119]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,LabelEncoder
import pickle

In [120]:
churn_model_df=pd.read_csv('Churn_Modelling.csv')

In [121]:
churn_model_df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


## Preprocess Data:

In [122]:
## Drop the irrelevant feature
churn_model_df=churn_model_df.drop(['RowNumber','CustomerId','Surname'], axis=1)
churn_model_df.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [123]:
## Encode Category variables

label_encoder_gender=LabelEncoder()
churn_model_df['Gender']=label_encoder_gender.fit_transform(churn_model_df['Gender'])
churn_model_df.head()


Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,0,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,0,41,1,83807.86,1,0,1,112542.58,0
2,502,France,0,42,8,159660.8,3,1,0,113931.57,1
3,699,France,0,39,1,0.0,2,0,0,93826.63,0
4,850,Spain,0,43,2,125510.82,1,1,1,79084.1,0


In [124]:
churn_model_df['Geography'].unique()

array(['France', 'Spain', 'Germany'], dtype=object)

In [125]:
## One hot encode churn_model_df['Geography'].unique()
from sklearn.preprocessing import OneHotEncoder

In [126]:
onehotencoder_geo= OneHotEncoder(sparse_output=False)
geo_encoder=onehotencoder_geo.fit_transform(churn_model_df[['Geography']])
geo_encoder

array([[1., 0., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       ...,
       [1., 0., 0.],
       [0., 1., 0.],
       [1., 0., 0.]])

In [127]:
onehotencoder_geo.get_feature_names_out()

array(['Geography_France', 'Geography_Germany', 'Geography_Spain'],
      dtype=object)

In [128]:
geo_encoder_df=pd.DataFrame(geo_encoder,columns=onehotencoder_geo.get_feature_names_out())
geo_encoder_df.head()

Unnamed: 0,Geography_France,Geography_Germany,Geography_Spain
0,1.0,0.0,0.0
1,0.0,0.0,1.0
2,1.0,0.0,0.0
3,1.0,0.0,0.0
4,0.0,0.0,1.0


In [129]:
#Combine all colummns

churn_model_df=pd.concat([churn_model_df.drop('Geography', axis=1),geo_encoder_df], axis=1)

In [130]:
churn_model_df.head()

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain
0,619,0,42,2,0.0,1,1,1,101348.88,1,1.0,0.0,0.0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,0.0,0.0,1.0
2,502,0,42,8,159660.8,3,1,0,113931.57,1,1.0,0.0,0.0
3,699,0,39,1,0.0,2,0,0,93826.63,0,1.0,0.0,0.0
4,850,0,43,2,125510.82,1,1,1,79084.1,0,0.0,0.0,1.0


In [131]:
## SAve the encoder and scaling
with open('label_encoder_gender.pkl','wb') as fileobj:
    pickle.dump(label_encoder_gender, fileobj)

with open('onehotencoder_geo.pkl','wb') as fileobj:
    pickle.dump(onehotencoder_geo, fileobj)


In [132]:
## Divide the dataset into dependent and independet dataset
X=churn_model_df.drop('Exited', axis=1)
y=churn_model_df['Exited']

## Split the data
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)


## Scale down this feature
scaler = StandardScaler()
X_train_scaled= scaler.fit_transform(X_train)
X_test_scaled= scaler.transform(X_test)

In [133]:
with open('scaler.pkl','wb') as fileobj:
    pickle.dump(scaler, fileobj)

### ANN Implementation

In [134]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping,TensorBoard
import datetime


In [135]:
tf.__version__

'2.15.0'

In [136]:
## check the inputs:
print("Input shape for the neural network: ", X_train.shape)

Input shape for the neural network:  (8000, 12)


In [137]:
model =Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid') 
])

In [138]:
model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_9 (Dense)             (None, 64)                832       
                                                                 
 dense_10 (Dense)            (None, 32)                2080      
                                                                 
 dense_11 (Dense)            (None, 1)                 33        
                                                                 
Total params: 2945 (11.50 KB)
Trainable params: 2945 (11.50 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [139]:
9*64+64 # no. of input * no. of neuron in HL + no of bias 

640

In [140]:
opt = tf.keras.optimizers.Adam(learning_rate=0.01)

In [141]:
## Compile the model
model.compile(optimizer=opt, loss="binary_crossentropy",metrics=['accuracy'])

In [142]:
## Setup tensorboard 
log_dir = "logs/fit_"+datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
tensorflow_callback=TensorBoard(log_dir=log_dir,histogram_freq=1)

In [143]:
# setup early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=10
                               ,restore_best_weights=True)

In [144]:
X_train_scaled.shape

(8000, 12)

In [145]:
## Train the model
history=model.fit(X_train_scaled,y_train,
                  validation_data=(X_test_scaled,y_test),
                  epochs=100,
                  callbacks=[tensorflow_callback,early_stopping])

Epoch 1/100


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100


In [146]:
# save the model
model.save('ann_model.h5')

  saving_api.save_model(


In [147]:
## Load the tensorboard extension:
%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [148]:
log_dir

'logs/fit_20250102-080732'

In [149]:
%tensorboard --logdir logs/fit_20250102-072016

Reusing TensorBoard on port 6009 (pid 14913), started 0:35:37 ago. (Use '!kill 14913' to kill it.)

In [150]:
X.columns

Index(['CreditScore', 'Gender', 'Age', 'Tenure', 'Balance', 'NumOfProducts',
       'HasCrCard', 'IsActiveMember', 'EstimatedSalary', 'Geography_France',
       'Geography_Germany', 'Geography_Spain'],
      dtype='object')

In [151]:
X_train.columns

Index(['CreditScore', 'Gender', 'Age', 'Tenure', 'Balance', 'NumOfProducts',
       'HasCrCard', 'IsActiveMember', 'EstimatedSalary', 'Geography_France',
       'Geography_Germany', 'Geography_Spain'],
      dtype='object')